xref: /petsc/src/mat/impls/maij/maij.c (revision 9c30b7d2697335155d7490a7e085415ee7b4a02a)
1 /*
2     Defines the basic matrix operations for the MAIJ  matrix storage format.
3   This format is used for restriction and interpolation operations for
4   multicomponent problems. It interpolates each component the same way
5   independently.
6 
7      We provide:
8          MatMult()
9          MatMultTranspose()
10          MatMultTransposeAdd()
11          MatMultAdd()
12           and
13          MatCreateMAIJ(Mat,dof,Mat*)
14 
15      This single directory handles both the sequential and parallel codes
16 */
17 
18 #include "src/mat/impls/maij/maij.h"
19 #include "vecimpl.h"
20 
21 #undef __FUNCT__
22 #define __FUNCT__ "MatMAIJGetAIJ"
23 int MatMAIJGetAIJ(Mat A,Mat *B)
24 {
25   int         ierr;
26   PetscTruth  ismpimaij,isseqmaij;
27 
28   PetscFunctionBegin;
29   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
30   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
31   if (ismpimaij) {
32     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
33 
34     *B = b->A;
35   } else if (isseqmaij) {
36     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
37 
38     *B = b->AIJ;
39   } else {
40     *B = A;
41   }
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatMAIJRedimension"
47 int MatMAIJRedimension(Mat A,int dof,Mat *B)
48 {
49   int ierr;
50   Mat Aij;
51 
52   PetscFunctionBegin;
53   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
54   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDestroy_SeqMAIJ"
60 int MatDestroy_SeqMAIJ(Mat A)
61 {
62   int         ierr;
63   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
64 
65   PetscFunctionBegin;
66   if (b->AIJ) {
67     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
68   }
69   ierr = PetscFree(b);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatDestroy_MPIMAIJ"
75 int MatDestroy_MPIMAIJ(Mat A)
76 {
77   int         ierr;
78   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
79 
80   PetscFunctionBegin;
81   if (b->AIJ) {
82     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
83   }
84   if (b->OAIJ) {
85     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
86   }
87   if (b->A) {
88     ierr = MatDestroy(b->A);CHKERRQ(ierr);
89   }
90   if (b->ctx) {
91     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
92   }
93   if (b->w) {
94     ierr = VecDestroy(b->w);CHKERRQ(ierr);
95   }
96   ierr = PetscFree(b);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 
100 /*MC
101   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
102   multicomponent problems, interpolating or restricting each component the same way independently.
103   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
104 
105   Operations provided:
106 . MatMult
107 . MatMultTranspose
108 . MatMultAdd
109 . MatMultTransposeAdd
110 
111   Level: advanced
112 
113 .seealso: MatCreateSeqDense
114 M*/
115 
116 EXTERN_C_BEGIN
117 #undef __FUNCT__
118 #define __FUNCT__ "MatCreate_MAIJ"
119 int MatCreate_MAIJ(Mat A)
120 {
121   int         ierr;
122   Mat_MPIMAIJ *b;
123 
124   PetscFunctionBegin;
125   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
126   A->data  = (void*)b;
127   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
128   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
129   A->factor           = 0;
130   A->mapping          = 0;
131 
132   b->AIJ  = 0;
133   b->dof  = 0;
134   b->OAIJ = 0;
135   b->ctx  = 0;
136   b->w    = 0;
137   PetscFunctionReturn(0);
138 }
139 EXTERN_C_END
140 
141 /* --------------------------------------------------------------------------------------*/
142 #undef __FUNCT__
143 #define __FUNCT__ "MatMult_SeqMAIJ_2"
144 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
145 {
146   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
147   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
148   PetscScalar   *x,*y,*v,sum1, sum2;
149   int           ierr,m = b->AIJ->m,*idx,*ii;
150   int           n,i,jrow,j;
151 
152   PetscFunctionBegin;
153   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
154   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
155   idx  = a->j;
156   v    = a->a;
157   ii   = a->i;
158 
159   for (i=0; i<m; i++) {
160     jrow = ii[i];
161     n    = ii[i+1] - jrow;
162     sum1  = 0.0;
163     sum2  = 0.0;
164     for (j=0; j<n; j++) {
165       sum1 += v[jrow]*x[2*idx[jrow]];
166       sum2 += v[jrow]*x[2*idx[jrow]+1];
167       jrow++;
168      }
169     y[2*i]   = sum1;
170     y[2*i+1] = sum2;
171   }
172 
173   PetscLogFlops(4*a->nz - 2*m);
174   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
181 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182 {
183   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
184   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
185   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
186   int           ierr,m = b->AIJ->m,n,i,*idx;
187 
188   PetscFunctionBegin;
189   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
190   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
191   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
192 
193   for (i=0; i<m; i++) {
194     idx    = a->j + a->i[i] ;
195     v      = a->a + a->i[i] ;
196     n      = a->i[i+1] - a->i[i];
197     alpha1 = x[2*i];
198     alpha2 = x[2*i+1];
199     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
200   }
201   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
202   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
203   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
209 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
210 {
211   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
212   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
213   PetscScalar   *x,*y,*v,sum1, sum2;
214   int           ierr,m = b->AIJ->m,*idx,*ii;
215   int           n,i,jrow,j;
216 
217   PetscFunctionBegin;
218   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
219   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
220   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
221   idx  = a->j;
222   v    = a->a;
223   ii   = a->i;
224 
225   for (i=0; i<m; i++) {
226     jrow = ii[i];
227     n    = ii[i+1] - jrow;
228     sum1  = 0.0;
229     sum2  = 0.0;
230     for (j=0; j<n; j++) {
231       sum1 += v[jrow]*x[2*idx[jrow]];
232       sum2 += v[jrow]*x[2*idx[jrow]+1];
233       jrow++;
234      }
235     y[2*i]   += sum1;
236     y[2*i+1] += sum2;
237   }
238 
239   PetscLogFlops(4*a->nz - 2*m);
240   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
241   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 #undef __FUNCT__
245 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
246 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
247 {
248   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
249   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
250   PetscScalar   *x,*y,*v,alpha1,alpha2;
251   int           ierr,m = b->AIJ->m,n,i,*idx;
252 
253   PetscFunctionBegin;
254   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
255   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
256   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
257 
258   for (i=0; i<m; i++) {
259     idx   = a->j + a->i[i] ;
260     v     = a->a + a->i[i] ;
261     n     = a->i[i+1] - a->i[i];
262     alpha1 = x[2*i];
263     alpha2 = x[2*i+1];
264     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
265   }
266   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
267   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
268   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
269   PetscFunctionReturn(0);
270 }
271 /* --------------------------------------------------------------------------------------*/
272 #undef __FUNCT__
273 #define __FUNCT__ "MatMult_SeqMAIJ_3"
274 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
275 {
276   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
277   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
278   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
279   int           ierr,m = b->AIJ->m,*idx,*ii;
280   int           n,i,jrow,j;
281 
282   PetscFunctionBegin;
283   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
284   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
285   idx  = a->j;
286   v    = a->a;
287   ii   = a->i;
288 
289   for (i=0; i<m; i++) {
290     jrow = ii[i];
291     n    = ii[i+1] - jrow;
292     sum1  = 0.0;
293     sum2  = 0.0;
294     sum3  = 0.0;
295     for (j=0; j<n; j++) {
296       sum1 += v[jrow]*x[3*idx[jrow]];
297       sum2 += v[jrow]*x[3*idx[jrow]+1];
298       sum3 += v[jrow]*x[3*idx[jrow]+2];
299       jrow++;
300      }
301     y[3*i]   = sum1;
302     y[3*i+1] = sum2;
303     y[3*i+2] = sum3;
304   }
305 
306   PetscLogFlops(6*a->nz - 3*m);
307   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
308   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
314 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
315 {
316   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
317   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
318   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
319   int           ierr,m = b->AIJ->m,n,i,*idx;
320 
321   PetscFunctionBegin;
322   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
323   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
325 
326   for (i=0; i<m; i++) {
327     idx    = a->j + a->i[i];
328     v      = a->a + a->i[i];
329     n      = a->i[i+1] - a->i[i];
330     alpha1 = x[3*i];
331     alpha2 = x[3*i+1];
332     alpha3 = x[3*i+2];
333     while (n-->0) {
334       y[3*(*idx)]   += alpha1*(*v);
335       y[3*(*idx)+1] += alpha2*(*v);
336       y[3*(*idx)+2] += alpha3*(*v);
337       idx++; v++;
338     }
339   }
340   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
341   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
342   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
348 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
349 {
350   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
351   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
352   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
353   int           ierr,m = b->AIJ->m,*idx,*ii;
354   int           n,i,jrow,j;
355 
356   PetscFunctionBegin;
357   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
358   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
359   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
360   idx  = a->j;
361   v    = a->a;
362   ii   = a->i;
363 
364   for (i=0; i<m; i++) {
365     jrow = ii[i];
366     n    = ii[i+1] - jrow;
367     sum1  = 0.0;
368     sum2  = 0.0;
369     sum3  = 0.0;
370     for (j=0; j<n; j++) {
371       sum1 += v[jrow]*x[3*idx[jrow]];
372       sum2 += v[jrow]*x[3*idx[jrow]+1];
373       sum3 += v[jrow]*x[3*idx[jrow]+2];
374       jrow++;
375      }
376     y[3*i]   += sum1;
377     y[3*i+1] += sum2;
378     y[3*i+2] += sum3;
379   }
380 
381   PetscLogFlops(6*a->nz);
382   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
383   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 #undef __FUNCT__
387 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
388 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
389 {
390   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
391   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
392   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
393   int           ierr,m = b->AIJ->m,n,i,*idx;
394 
395   PetscFunctionBegin;
396   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
397   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
398   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
399   for (i=0; i<m; i++) {
400     idx    = a->j + a->i[i] ;
401     v      = a->a + a->i[i] ;
402     n      = a->i[i+1] - a->i[i];
403     alpha1 = x[3*i];
404     alpha2 = x[3*i+1];
405     alpha3 = x[3*i+2];
406     while (n-->0) {
407       y[3*(*idx)]   += alpha1*(*v);
408       y[3*(*idx)+1] += alpha2*(*v);
409       y[3*(*idx)+2] += alpha3*(*v);
410       idx++; v++;
411     }
412   }
413   PetscLogFlops(6*a->nz);
414   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
415   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 /* ------------------------------------------------------------------------------*/
420 #undef __FUNCT__
421 #define __FUNCT__ "MatMult_SeqMAIJ_4"
422 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
423 {
424   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
425   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
426   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
427   int           ierr,m = b->AIJ->m,*idx,*ii;
428   int           n,i,jrow,j;
429 
430   PetscFunctionBegin;
431   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
432   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
433   idx  = a->j;
434   v    = a->a;
435   ii   = a->i;
436 
437   for (i=0; i<m; i++) {
438     jrow = ii[i];
439     n    = ii[i+1] - jrow;
440     sum1  = 0.0;
441     sum2  = 0.0;
442     sum3  = 0.0;
443     sum4  = 0.0;
444     for (j=0; j<n; j++) {
445       sum1 += v[jrow]*x[4*idx[jrow]];
446       sum2 += v[jrow]*x[4*idx[jrow]+1];
447       sum3 += v[jrow]*x[4*idx[jrow]+2];
448       sum4 += v[jrow]*x[4*idx[jrow]+3];
449       jrow++;
450      }
451     y[4*i]   = sum1;
452     y[4*i+1] = sum2;
453     y[4*i+2] = sum3;
454     y[4*i+3] = sum4;
455   }
456 
457   PetscLogFlops(8*a->nz - 4*m);
458   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
465 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
466 {
467   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
468   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
469   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
470   int           ierr,m = b->AIJ->m,n,i,*idx;
471 
472   PetscFunctionBegin;
473   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
474   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
475   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
476   for (i=0; i<m; i++) {
477     idx    = a->j + a->i[i] ;
478     v      = a->a + a->i[i] ;
479     n      = a->i[i+1] - a->i[i];
480     alpha1 = x[4*i];
481     alpha2 = x[4*i+1];
482     alpha3 = x[4*i+2];
483     alpha4 = x[4*i+3];
484     while (n-->0) {
485       y[4*(*idx)]   += alpha1*(*v);
486       y[4*(*idx)+1] += alpha2*(*v);
487       y[4*(*idx)+2] += alpha3*(*v);
488       y[4*(*idx)+3] += alpha4*(*v);
489       idx++; v++;
490     }
491   }
492   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
493   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
494   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
500 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
503   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
504   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
505   int           ierr,m = b->AIJ->m,*idx,*ii;
506   int           n,i,jrow,j;
507 
508   PetscFunctionBegin;
509   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
510   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
511   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
512   idx  = a->j;
513   v    = a->a;
514   ii   = a->i;
515 
516   for (i=0; i<m; i++) {
517     jrow = ii[i];
518     n    = ii[i+1] - jrow;
519     sum1  = 0.0;
520     sum2  = 0.0;
521     sum3  = 0.0;
522     sum4  = 0.0;
523     for (j=0; j<n; j++) {
524       sum1 += v[jrow]*x[4*idx[jrow]];
525       sum2 += v[jrow]*x[4*idx[jrow]+1];
526       sum3 += v[jrow]*x[4*idx[jrow]+2];
527       sum4 += v[jrow]*x[4*idx[jrow]+3];
528       jrow++;
529      }
530     y[4*i]   += sum1;
531     y[4*i+1] += sum2;
532     y[4*i+2] += sum3;
533     y[4*i+3] += sum4;
534   }
535 
536   PetscLogFlops(8*a->nz - 4*m);
537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
538   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
543 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
544 {
545   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
546   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
547   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
548   int           ierr,m = b->AIJ->m,n,i,*idx;
549 
550   PetscFunctionBegin;
551   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
552   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
553   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
554 
555   for (i=0; i<m; i++) {
556     idx    = a->j + a->i[i] ;
557     v      = a->a + a->i[i] ;
558     n      = a->i[i+1] - a->i[i];
559     alpha1 = x[4*i];
560     alpha2 = x[4*i+1];
561     alpha3 = x[4*i+2];
562     alpha4 = x[4*i+3];
563     while (n-->0) {
564       y[4*(*idx)]   += alpha1*(*v);
565       y[4*(*idx)+1] += alpha2*(*v);
566       y[4*(*idx)+2] += alpha3*(*v);
567       y[4*(*idx)+3] += alpha4*(*v);
568       idx++; v++;
569     }
570   }
571   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
572   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
574   PetscFunctionReturn(0);
575 }
576 /* ------------------------------------------------------------------------------*/
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_SeqMAIJ_5"
580 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
581 {
582   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
583   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
584   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
585   int           ierr,m = b->AIJ->m,*idx,*ii;
586   int           n,i,jrow,j;
587 
588   PetscFunctionBegin;
589   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
590   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
591   idx  = a->j;
592   v    = a->a;
593   ii   = a->i;
594 
595   for (i=0; i<m; i++) {
596     jrow = ii[i];
597     n    = ii[i+1] - jrow;
598     sum1  = 0.0;
599     sum2  = 0.0;
600     sum3  = 0.0;
601     sum4  = 0.0;
602     sum5  = 0.0;
603     for (j=0; j<n; j++) {
604       sum1 += v[jrow]*x[5*idx[jrow]];
605       sum2 += v[jrow]*x[5*idx[jrow]+1];
606       sum3 += v[jrow]*x[5*idx[jrow]+2];
607       sum4 += v[jrow]*x[5*idx[jrow]+3];
608       sum5 += v[jrow]*x[5*idx[jrow]+4];
609       jrow++;
610      }
611     y[5*i]   = sum1;
612     y[5*i+1] = sum2;
613     y[5*i+2] = sum3;
614     y[5*i+3] = sum4;
615     y[5*i+4] = sum5;
616   }
617 
618   PetscLogFlops(10*a->nz - 5*m);
619   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
620   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
626 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
627 {
628   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
629   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
630   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
631   int           ierr,m = b->AIJ->m,n,i,*idx;
632 
633   PetscFunctionBegin;
634   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
635   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
636   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
637 
638   for (i=0; i<m; i++) {
639     idx    = a->j + a->i[i] ;
640     v      = a->a + a->i[i] ;
641     n      = a->i[i+1] - a->i[i];
642     alpha1 = x[5*i];
643     alpha2 = x[5*i+1];
644     alpha3 = x[5*i+2];
645     alpha4 = x[5*i+3];
646     alpha5 = x[5*i+4];
647     while (n-->0) {
648       y[5*(*idx)]   += alpha1*(*v);
649       y[5*(*idx)+1] += alpha2*(*v);
650       y[5*(*idx)+2] += alpha3*(*v);
651       y[5*(*idx)+3] += alpha4*(*v);
652       y[5*(*idx)+4] += alpha5*(*v);
653       idx++; v++;
654     }
655   }
656   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
664 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
665 {
666   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
667   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
668   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
669   int           ierr,m = b->AIJ->m,*idx,*ii;
670   int           n,i,jrow,j;
671 
672   PetscFunctionBegin;
673   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
674   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
675   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
676   idx  = a->j;
677   v    = a->a;
678   ii   = a->i;
679 
680   for (i=0; i<m; i++) {
681     jrow = ii[i];
682     n    = ii[i+1] - jrow;
683     sum1  = 0.0;
684     sum2  = 0.0;
685     sum3  = 0.0;
686     sum4  = 0.0;
687     sum5  = 0.0;
688     for (j=0; j<n; j++) {
689       sum1 += v[jrow]*x[5*idx[jrow]];
690       sum2 += v[jrow]*x[5*idx[jrow]+1];
691       sum3 += v[jrow]*x[5*idx[jrow]+2];
692       sum4 += v[jrow]*x[5*idx[jrow]+3];
693       sum5 += v[jrow]*x[5*idx[jrow]+4];
694       jrow++;
695      }
696     y[5*i]   += sum1;
697     y[5*i+1] += sum2;
698     y[5*i+2] += sum3;
699     y[5*i+3] += sum4;
700     y[5*i+4] += sum5;
701   }
702 
703   PetscLogFlops(10*a->nz);
704   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
705   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
711 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
712 {
713   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
714   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
715   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
716   int           ierr,m = b->AIJ->m,n,i,*idx;
717 
718   PetscFunctionBegin;
719   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
720   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
721   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
722 
723   for (i=0; i<m; i++) {
724     idx    = a->j + a->i[i] ;
725     v      = a->a + a->i[i] ;
726     n      = a->i[i+1] - a->i[i];
727     alpha1 = x[5*i];
728     alpha2 = x[5*i+1];
729     alpha3 = x[5*i+2];
730     alpha4 = x[5*i+3];
731     alpha5 = x[5*i+4];
732     while (n-->0) {
733       y[5*(*idx)]   += alpha1*(*v);
734       y[5*(*idx)+1] += alpha2*(*v);
735       y[5*(*idx)+2] += alpha3*(*v);
736       y[5*(*idx)+3] += alpha4*(*v);
737       y[5*(*idx)+4] += alpha5*(*v);
738       idx++; v++;
739     }
740   }
741   PetscLogFlops(10*a->nz);
742   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
743   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 /* ------------------------------------------------------------------------------*/
748 #undef __FUNCT__
749 #define __FUNCT__ "MatMult_SeqMAIJ_6"
750 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
751 {
752   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
753   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
754   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
755   int           ierr,m = b->AIJ->m,*idx,*ii;
756   int           n,i,jrow,j;
757 
758   PetscFunctionBegin;
759   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
760   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
761   idx  = a->j;
762   v    = a->a;
763   ii   = a->i;
764 
765   for (i=0; i<m; i++) {
766     jrow = ii[i];
767     n    = ii[i+1] - jrow;
768     sum1  = 0.0;
769     sum2  = 0.0;
770     sum3  = 0.0;
771     sum4  = 0.0;
772     sum5  = 0.0;
773     sum6  = 0.0;
774     for (j=0; j<n; j++) {
775       sum1 += v[jrow]*x[6*idx[jrow]];
776       sum2 += v[jrow]*x[6*idx[jrow]+1];
777       sum3 += v[jrow]*x[6*idx[jrow]+2];
778       sum4 += v[jrow]*x[6*idx[jrow]+3];
779       sum5 += v[jrow]*x[6*idx[jrow]+4];
780       sum6 += v[jrow]*x[6*idx[jrow]+5];
781       jrow++;
782      }
783     y[6*i]   = sum1;
784     y[6*i+1] = sum2;
785     y[6*i+2] = sum3;
786     y[6*i+3] = sum4;
787     y[6*i+4] = sum5;
788     y[6*i+5] = sum6;
789   }
790 
791   PetscLogFlops(12*a->nz - 6*m);
792   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
793   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
799 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
800 {
801   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
802   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
803   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
804   int           ierr,m = b->AIJ->m,n,i,*idx;
805 
806   PetscFunctionBegin;
807   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
808   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
809   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
810 
811   for (i=0; i<m; i++) {
812     idx    = a->j + a->i[i] ;
813     v      = a->a + a->i[i] ;
814     n      = a->i[i+1] - a->i[i];
815     alpha1 = x[6*i];
816     alpha2 = x[6*i+1];
817     alpha3 = x[6*i+2];
818     alpha4 = x[6*i+3];
819     alpha5 = x[6*i+4];
820     alpha6 = x[6*i+5];
821     while (n-->0) {
822       y[6*(*idx)]   += alpha1*(*v);
823       y[6*(*idx)+1] += alpha2*(*v);
824       y[6*(*idx)+2] += alpha3*(*v);
825       y[6*(*idx)+3] += alpha4*(*v);
826       y[6*(*idx)+4] += alpha5*(*v);
827       y[6*(*idx)+5] += alpha6*(*v);
828       idx++; v++;
829     }
830   }
831   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
832   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
833   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
839 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
840 {
841   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
842   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
843   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
844   int           ierr,m = b->AIJ->m,*idx,*ii;
845   int           n,i,jrow,j;
846 
847   PetscFunctionBegin;
848   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
849   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
850   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
851   idx  = a->j;
852   v    = a->a;
853   ii   = a->i;
854 
855   for (i=0; i<m; i++) {
856     jrow = ii[i];
857     n    = ii[i+1] - jrow;
858     sum1  = 0.0;
859     sum2  = 0.0;
860     sum3  = 0.0;
861     sum4  = 0.0;
862     sum5  = 0.0;
863     sum6  = 0.0;
864     for (j=0; j<n; j++) {
865       sum1 += v[jrow]*x[6*idx[jrow]];
866       sum2 += v[jrow]*x[6*idx[jrow]+1];
867       sum3 += v[jrow]*x[6*idx[jrow]+2];
868       sum4 += v[jrow]*x[6*idx[jrow]+3];
869       sum5 += v[jrow]*x[6*idx[jrow]+4];
870       sum6 += v[jrow]*x[6*idx[jrow]+5];
871       jrow++;
872      }
873     y[6*i]   += sum1;
874     y[6*i+1] += sum2;
875     y[6*i+2] += sum3;
876     y[6*i+3] += sum4;
877     y[6*i+4] += sum5;
878     y[6*i+5] += sum6;
879   }
880 
881   PetscLogFlops(12*a->nz);
882   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
883   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
889 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
890 {
891   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
892   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
893   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
894   int           ierr,m = b->AIJ->m,n,i,*idx;
895 
896   PetscFunctionBegin;
897   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
898   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
899   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
900 
901   for (i=0; i<m; i++) {
902     idx    = a->j + a->i[i] ;
903     v      = a->a + a->i[i] ;
904     n      = a->i[i+1] - a->i[i];
905     alpha1 = x[6*i];
906     alpha2 = x[6*i+1];
907     alpha3 = x[6*i+2];
908     alpha4 = x[6*i+3];
909     alpha5 = x[6*i+4];
910     alpha6 = x[6*i+5];
911     while (n-->0) {
912       y[6*(*idx)]   += alpha1*(*v);
913       y[6*(*idx)+1] += alpha2*(*v);
914       y[6*(*idx)+2] += alpha3*(*v);
915       y[6*(*idx)+3] += alpha4*(*v);
916       y[6*(*idx)+4] += alpha5*(*v);
917       y[6*(*idx)+5] += alpha6*(*v);
918       idx++; v++;
919     }
920   }
921   PetscLogFlops(12*a->nz);
922   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 /* ------------------------------------------------------------------------------*/
928 #undef __FUNCT__
929 #define __FUNCT__ "MatMult_SeqMAIJ_8"
930 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
931 {
932   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
933   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
934   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
935   int           ierr,m = b->AIJ->m,*idx,*ii;
936   int           n,i,jrow,j;
937 
938   PetscFunctionBegin;
939   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
940   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
941   idx  = a->j;
942   v    = a->a;
943   ii   = a->i;
944 
945   for (i=0; i<m; i++) {
946     jrow = ii[i];
947     n    = ii[i+1] - jrow;
948     sum1  = 0.0;
949     sum2  = 0.0;
950     sum3  = 0.0;
951     sum4  = 0.0;
952     sum5  = 0.0;
953     sum6  = 0.0;
954     sum7  = 0.0;
955     sum8  = 0.0;
956     for (j=0; j<n; j++) {
957       sum1 += v[jrow]*x[8*idx[jrow]];
958       sum2 += v[jrow]*x[8*idx[jrow]+1];
959       sum3 += v[jrow]*x[8*idx[jrow]+2];
960       sum4 += v[jrow]*x[8*idx[jrow]+3];
961       sum5 += v[jrow]*x[8*idx[jrow]+4];
962       sum6 += v[jrow]*x[8*idx[jrow]+5];
963       sum7 += v[jrow]*x[8*idx[jrow]+6];
964       sum8 += v[jrow]*x[8*idx[jrow]+7];
965       jrow++;
966      }
967     y[8*i]   = sum1;
968     y[8*i+1] = sum2;
969     y[8*i+2] = sum3;
970     y[8*i+3] = sum4;
971     y[8*i+4] = sum5;
972     y[8*i+5] = sum6;
973     y[8*i+6] = sum7;
974     y[8*i+7] = sum8;
975   }
976 
977   PetscLogFlops(16*a->nz - 8*m);
978   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
979   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
985 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
986 {
987   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
988   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
989   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
990   int           ierr,m = b->AIJ->m,n,i,*idx;
991 
992   PetscFunctionBegin;
993   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
994   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
995   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
996 
997   for (i=0; i<m; i++) {
998     idx    = a->j + a->i[i] ;
999     v      = a->a + a->i[i] ;
1000     n      = a->i[i+1] - a->i[i];
1001     alpha1 = x[8*i];
1002     alpha2 = x[8*i+1];
1003     alpha3 = x[8*i+2];
1004     alpha4 = x[8*i+3];
1005     alpha5 = x[8*i+4];
1006     alpha6 = x[8*i+5];
1007     alpha7 = x[8*i+6];
1008     alpha8 = x[8*i+7];
1009     while (n-->0) {
1010       y[8*(*idx)]   += alpha1*(*v);
1011       y[8*(*idx)+1] += alpha2*(*v);
1012       y[8*(*idx)+2] += alpha3*(*v);
1013       y[8*(*idx)+3] += alpha4*(*v);
1014       y[8*(*idx)+4] += alpha5*(*v);
1015       y[8*(*idx)+5] += alpha6*(*v);
1016       y[8*(*idx)+6] += alpha7*(*v);
1017       y[8*(*idx)+7] += alpha8*(*v);
1018       idx++; v++;
1019     }
1020   }
1021   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1022   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1023   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1029 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1030 {
1031   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1032   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1033   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1034   int           ierr,m = b->AIJ->m,*idx,*ii;
1035   int           n,i,jrow,j;
1036 
1037   PetscFunctionBegin;
1038   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1039   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1040   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1041   idx  = a->j;
1042   v    = a->a;
1043   ii   = a->i;
1044 
1045   for (i=0; i<m; i++) {
1046     jrow = ii[i];
1047     n    = ii[i+1] - jrow;
1048     sum1  = 0.0;
1049     sum2  = 0.0;
1050     sum3  = 0.0;
1051     sum4  = 0.0;
1052     sum5  = 0.0;
1053     sum6  = 0.0;
1054     sum7  = 0.0;
1055     sum8  = 0.0;
1056     for (j=0; j<n; j++) {
1057       sum1 += v[jrow]*x[8*idx[jrow]];
1058       sum2 += v[jrow]*x[8*idx[jrow]+1];
1059       sum3 += v[jrow]*x[8*idx[jrow]+2];
1060       sum4 += v[jrow]*x[8*idx[jrow]+3];
1061       sum5 += v[jrow]*x[8*idx[jrow]+4];
1062       sum6 += v[jrow]*x[8*idx[jrow]+5];
1063       sum7 += v[jrow]*x[8*idx[jrow]+6];
1064       sum8 += v[jrow]*x[8*idx[jrow]+7];
1065       jrow++;
1066      }
1067     y[8*i]   += sum1;
1068     y[8*i+1] += sum2;
1069     y[8*i+2] += sum3;
1070     y[8*i+3] += sum4;
1071     y[8*i+4] += sum5;
1072     y[8*i+5] += sum6;
1073     y[8*i+6] += sum7;
1074     y[8*i+7] += sum8;
1075   }
1076 
1077   PetscLogFlops(16*a->nz);
1078   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1079   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1085 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1086 {
1087   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1088   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1089   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1090   int           ierr,m = b->AIJ->m,n,i,*idx;
1091 
1092   PetscFunctionBegin;
1093   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1094   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1095   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1096   for (i=0; i<m; i++) {
1097     idx    = a->j + a->i[i] ;
1098     v      = a->a + a->i[i] ;
1099     n      = a->i[i+1] - a->i[i];
1100     alpha1 = x[8*i];
1101     alpha2 = x[8*i+1];
1102     alpha3 = x[8*i+2];
1103     alpha4 = x[8*i+3];
1104     alpha5 = x[8*i+4];
1105     alpha6 = x[8*i+5];
1106     alpha7 = x[8*i+6];
1107     alpha8 = x[8*i+7];
1108     while (n-->0) {
1109       y[8*(*idx)]   += alpha1*(*v);
1110       y[8*(*idx)+1] += alpha2*(*v);
1111       y[8*(*idx)+2] += alpha3*(*v);
1112       y[8*(*idx)+3] += alpha4*(*v);
1113       y[8*(*idx)+4] += alpha5*(*v);
1114       y[8*(*idx)+5] += alpha6*(*v);
1115       y[8*(*idx)+6] += alpha7*(*v);
1116       y[8*(*idx)+7] += alpha8*(*v);
1117       idx++; v++;
1118     }
1119   }
1120   PetscLogFlops(16*a->nz);
1121   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1122   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 /* ------------------------------------------------------------------------------*/
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1129 int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1130 {
1131   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1132   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1133   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1134   int           ierr,m = b->AIJ->m,*idx,*ii;
1135   int           n,i,jrow,j;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1139   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1140   idx  = a->j;
1141   v    = a->a;
1142   ii   = a->i;
1143 
1144   for (i=0; i<m; i++) {
1145     jrow = ii[i];
1146     n    = ii[i+1] - jrow;
1147     sum1  = 0.0;
1148     sum2  = 0.0;
1149     sum3  = 0.0;
1150     sum4  = 0.0;
1151     sum5  = 0.0;
1152     sum6  = 0.0;
1153     sum7  = 0.0;
1154     sum8  = 0.0;
1155     sum9  = 0.0;
1156     for (j=0; j<n; j++) {
1157       sum1 += v[jrow]*x[9*idx[jrow]];
1158       sum2 += v[jrow]*x[9*idx[jrow]+1];
1159       sum3 += v[jrow]*x[9*idx[jrow]+2];
1160       sum4 += v[jrow]*x[9*idx[jrow]+3];
1161       sum5 += v[jrow]*x[9*idx[jrow]+4];
1162       sum6 += v[jrow]*x[9*idx[jrow]+5];
1163       sum7 += v[jrow]*x[9*idx[jrow]+6];
1164       sum8 += v[jrow]*x[9*idx[jrow]+7];
1165       sum9 += v[jrow]*x[9*idx[jrow]+8];
1166       jrow++;
1167      }
1168     y[9*i]   = sum1;
1169     y[9*i+1] = sum2;
1170     y[9*i+2] = sum3;
1171     y[9*i+3] = sum4;
1172     y[9*i+4] = sum5;
1173     y[9*i+5] = sum6;
1174     y[9*i+6] = sum7;
1175     y[9*i+7] = sum8;
1176     y[9*i+8] = sum9;
1177   }
1178 
1179   PetscLogFlops(18*a->nz - 9*m);
1180   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1181   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1187 int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1188 {
1189   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1190   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1191   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1192   int           ierr,m = b->AIJ->m,n,i,*idx;
1193 
1194   PetscFunctionBegin;
1195   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1196   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1197   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1198 
1199   for (i=0; i<m; i++) {
1200     idx    = a->j + a->i[i] ;
1201     v      = a->a + a->i[i] ;
1202     n      = a->i[i+1] - a->i[i];
1203     alpha1 = x[9*i];
1204     alpha2 = x[9*i+1];
1205     alpha3 = x[9*i+2];
1206     alpha4 = x[9*i+3];
1207     alpha5 = x[9*i+4];
1208     alpha6 = x[9*i+5];
1209     alpha7 = x[9*i+6];
1210     alpha8 = x[9*i+7];
1211     alpha9 = x[9*i+8];
1212     while (n-->0) {
1213       y[9*(*idx)]   += alpha1*(*v);
1214       y[9*(*idx)+1] += alpha2*(*v);
1215       y[9*(*idx)+2] += alpha3*(*v);
1216       y[9*(*idx)+3] += alpha4*(*v);
1217       y[9*(*idx)+4] += alpha5*(*v);
1218       y[9*(*idx)+5] += alpha6*(*v);
1219       y[9*(*idx)+6] += alpha7*(*v);
1220       y[9*(*idx)+7] += alpha8*(*v);
1221       y[9*(*idx)+8] += alpha9*(*v);
1222       idx++; v++;
1223     }
1224   }
1225   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1226   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1227   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1233 int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1234 {
1235   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1236   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1237   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1238   int           ierr,m = b->AIJ->m,*idx,*ii;
1239   int           n,i,jrow,j;
1240 
1241   PetscFunctionBegin;
1242   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1244   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1245   idx  = a->j;
1246   v    = a->a;
1247   ii   = a->i;
1248 
1249   for (i=0; i<m; i++) {
1250     jrow = ii[i];
1251     n    = ii[i+1] - jrow;
1252     sum1  = 0.0;
1253     sum2  = 0.0;
1254     sum3  = 0.0;
1255     sum4  = 0.0;
1256     sum5  = 0.0;
1257     sum6  = 0.0;
1258     sum7  = 0.0;
1259     sum8  = 0.0;
1260     sum9  = 0.0;
1261     for (j=0; j<n; j++) {
1262       sum1 += v[jrow]*x[9*idx[jrow]];
1263       sum2 += v[jrow]*x[9*idx[jrow]+1];
1264       sum3 += v[jrow]*x[9*idx[jrow]+2];
1265       sum4 += v[jrow]*x[9*idx[jrow]+3];
1266       sum5 += v[jrow]*x[9*idx[jrow]+4];
1267       sum6 += v[jrow]*x[9*idx[jrow]+5];
1268       sum7 += v[jrow]*x[9*idx[jrow]+6];
1269       sum8 += v[jrow]*x[9*idx[jrow]+7];
1270       sum9 += v[jrow]*x[9*idx[jrow]+8];
1271       jrow++;
1272      }
1273     y[9*i]   += sum1;
1274     y[9*i+1] += sum2;
1275     y[9*i+2] += sum3;
1276     y[9*i+3] += sum4;
1277     y[9*i+4] += sum5;
1278     y[9*i+5] += sum6;
1279     y[9*i+6] += sum7;
1280     y[9*i+7] += sum8;
1281     y[9*i+8] += sum9;
1282   }
1283 
1284   PetscLogFlops(18*a->nz);
1285   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1286   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1292 int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1293 {
1294   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1295   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1296   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1297   int           ierr,m = b->AIJ->m,n,i,*idx;
1298 
1299   PetscFunctionBegin;
1300   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1301   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1302   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1303   for (i=0; i<m; i++) {
1304     idx    = a->j + a->i[i] ;
1305     v      = a->a + a->i[i] ;
1306     n      = a->i[i+1] - a->i[i];
1307     alpha1 = x[9*i];
1308     alpha2 = x[9*i+1];
1309     alpha3 = x[9*i+2];
1310     alpha4 = x[9*i+3];
1311     alpha5 = x[9*i+4];
1312     alpha6 = x[9*i+5];
1313     alpha7 = x[9*i+6];
1314     alpha8 = x[9*i+7];
1315     alpha9 = x[9*i+8];
1316     while (n-->0) {
1317       y[9*(*idx)]   += alpha1*(*v);
1318       y[9*(*idx)+1] += alpha2*(*v);
1319       y[9*(*idx)+2] += alpha3*(*v);
1320       y[9*(*idx)+3] += alpha4*(*v);
1321       y[9*(*idx)+4] += alpha5*(*v);
1322       y[9*(*idx)+5] += alpha6*(*v);
1323       y[9*(*idx)+6] += alpha7*(*v);
1324       y[9*(*idx)+7] += alpha8*(*v);
1325       y[9*(*idx)+8] += alpha9*(*v);
1326       idx++; v++;
1327     }
1328   }
1329   PetscLogFlops(18*a->nz);
1330   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1331   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 /*--------------------------------------------------------------------------------------------*/
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1338 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1339 {
1340   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1341   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1342   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1343   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1344   int           ierr,m = b->AIJ->m,*idx,*ii;
1345   int           n,i,jrow,j;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1349   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1350   idx  = a->j;
1351   v    = a->a;
1352   ii   = a->i;
1353 
1354   for (i=0; i<m; i++) {
1355     jrow = ii[i];
1356     n    = ii[i+1] - jrow;
1357     sum1  = 0.0;
1358     sum2  = 0.0;
1359     sum3  = 0.0;
1360     sum4  = 0.0;
1361     sum5  = 0.0;
1362     sum6  = 0.0;
1363     sum7  = 0.0;
1364     sum8  = 0.0;
1365     sum9  = 0.0;
1366     sum10 = 0.0;
1367     sum11 = 0.0;
1368     sum12 = 0.0;
1369     sum13 = 0.0;
1370     sum14 = 0.0;
1371     sum15 = 0.0;
1372     sum16 = 0.0;
1373     for (j=0; j<n; j++) {
1374       sum1  += v[jrow]*x[16*idx[jrow]];
1375       sum2  += v[jrow]*x[16*idx[jrow]+1];
1376       sum3  += v[jrow]*x[16*idx[jrow]+2];
1377       sum4  += v[jrow]*x[16*idx[jrow]+3];
1378       sum5  += v[jrow]*x[16*idx[jrow]+4];
1379       sum6  += v[jrow]*x[16*idx[jrow]+5];
1380       sum7  += v[jrow]*x[16*idx[jrow]+6];
1381       sum8  += v[jrow]*x[16*idx[jrow]+7];
1382       sum9  += v[jrow]*x[16*idx[jrow]+8];
1383       sum10 += v[jrow]*x[16*idx[jrow]+9];
1384       sum11 += v[jrow]*x[16*idx[jrow]+10];
1385       sum12 += v[jrow]*x[16*idx[jrow]+11];
1386       sum13 += v[jrow]*x[16*idx[jrow]+12];
1387       sum14 += v[jrow]*x[16*idx[jrow]+13];
1388       sum15 += v[jrow]*x[16*idx[jrow]+14];
1389       sum16 += v[jrow]*x[16*idx[jrow]+15];
1390       jrow++;
1391      }
1392     y[16*i]    = sum1;
1393     y[16*i+1]  = sum2;
1394     y[16*i+2]  = sum3;
1395     y[16*i+3]  = sum4;
1396     y[16*i+4]  = sum5;
1397     y[16*i+5]  = sum6;
1398     y[16*i+6]  = sum7;
1399     y[16*i+7]  = sum8;
1400     y[16*i+8]  = sum9;
1401     y[16*i+9]  = sum10;
1402     y[16*i+10] = sum11;
1403     y[16*i+11] = sum12;
1404     y[16*i+12] = sum13;
1405     y[16*i+13] = sum14;
1406     y[16*i+14] = sum15;
1407     y[16*i+15] = sum16;
1408   }
1409 
1410   PetscLogFlops(32*a->nz - 16*m);
1411   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1412   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1418 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1419 {
1420   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1421   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1422   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1423   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1424   int           ierr,m = b->AIJ->m,n,i,*idx;
1425 
1426   PetscFunctionBegin;
1427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1428   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1429   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1430 
1431   for (i=0; i<m; i++) {
1432     idx    = a->j + a->i[i] ;
1433     v      = a->a + a->i[i] ;
1434     n      = a->i[i+1] - a->i[i];
1435     alpha1  = x[16*i];
1436     alpha2  = x[16*i+1];
1437     alpha3  = x[16*i+2];
1438     alpha4  = x[16*i+3];
1439     alpha5  = x[16*i+4];
1440     alpha6  = x[16*i+5];
1441     alpha7  = x[16*i+6];
1442     alpha8  = x[16*i+7];
1443     alpha9  = x[16*i+8];
1444     alpha10 = x[16*i+9];
1445     alpha11 = x[16*i+10];
1446     alpha12 = x[16*i+11];
1447     alpha13 = x[16*i+12];
1448     alpha14 = x[16*i+13];
1449     alpha15 = x[16*i+14];
1450     alpha16 = x[16*i+15];
1451     while (n-->0) {
1452       y[16*(*idx)]    += alpha1*(*v);
1453       y[16*(*idx)+1]  += alpha2*(*v);
1454       y[16*(*idx)+2]  += alpha3*(*v);
1455       y[16*(*idx)+3]  += alpha4*(*v);
1456       y[16*(*idx)+4]  += alpha5*(*v);
1457       y[16*(*idx)+5]  += alpha6*(*v);
1458       y[16*(*idx)+6]  += alpha7*(*v);
1459       y[16*(*idx)+7]  += alpha8*(*v);
1460       y[16*(*idx)+8]  += alpha9*(*v);
1461       y[16*(*idx)+9]  += alpha10*(*v);
1462       y[16*(*idx)+10] += alpha11*(*v);
1463       y[16*(*idx)+11] += alpha12*(*v);
1464       y[16*(*idx)+12] += alpha13*(*v);
1465       y[16*(*idx)+13] += alpha14*(*v);
1466       y[16*(*idx)+14] += alpha15*(*v);
1467       y[16*(*idx)+15] += alpha16*(*v);
1468       idx++; v++;
1469     }
1470   }
1471   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1472   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1473   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1479 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1480 {
1481   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1482   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1483   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1484   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1485   int           ierr,m = b->AIJ->m,*idx,*ii;
1486   int           n,i,jrow,j;
1487 
1488   PetscFunctionBegin;
1489   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1490   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1491   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1492   idx  = a->j;
1493   v    = a->a;
1494   ii   = a->i;
1495 
1496   for (i=0; i<m; i++) {
1497     jrow = ii[i];
1498     n    = ii[i+1] - jrow;
1499     sum1  = 0.0;
1500     sum2  = 0.0;
1501     sum3  = 0.0;
1502     sum4  = 0.0;
1503     sum5  = 0.0;
1504     sum6  = 0.0;
1505     sum7  = 0.0;
1506     sum8  = 0.0;
1507     sum9  = 0.0;
1508     sum10 = 0.0;
1509     sum11 = 0.0;
1510     sum12 = 0.0;
1511     sum13 = 0.0;
1512     sum14 = 0.0;
1513     sum15 = 0.0;
1514     sum16 = 0.0;
1515     for (j=0; j<n; j++) {
1516       sum1  += v[jrow]*x[16*idx[jrow]];
1517       sum2  += v[jrow]*x[16*idx[jrow]+1];
1518       sum3  += v[jrow]*x[16*idx[jrow]+2];
1519       sum4  += v[jrow]*x[16*idx[jrow]+3];
1520       sum5  += v[jrow]*x[16*idx[jrow]+4];
1521       sum6  += v[jrow]*x[16*idx[jrow]+5];
1522       sum7  += v[jrow]*x[16*idx[jrow]+6];
1523       sum8  += v[jrow]*x[16*idx[jrow]+7];
1524       sum9  += v[jrow]*x[16*idx[jrow]+8];
1525       sum10 += v[jrow]*x[16*idx[jrow]+9];
1526       sum11 += v[jrow]*x[16*idx[jrow]+10];
1527       sum12 += v[jrow]*x[16*idx[jrow]+11];
1528       sum13 += v[jrow]*x[16*idx[jrow]+12];
1529       sum14 += v[jrow]*x[16*idx[jrow]+13];
1530       sum15 += v[jrow]*x[16*idx[jrow]+14];
1531       sum16 += v[jrow]*x[16*idx[jrow]+15];
1532       jrow++;
1533      }
1534     y[16*i]    += sum1;
1535     y[16*i+1]  += sum2;
1536     y[16*i+2]  += sum3;
1537     y[16*i+3]  += sum4;
1538     y[16*i+4]  += sum5;
1539     y[16*i+5]  += sum6;
1540     y[16*i+6]  += sum7;
1541     y[16*i+7]  += sum8;
1542     y[16*i+8]  += sum9;
1543     y[16*i+9]  += sum10;
1544     y[16*i+10] += sum11;
1545     y[16*i+11] += sum12;
1546     y[16*i+12] += sum13;
1547     y[16*i+13] += sum14;
1548     y[16*i+14] += sum15;
1549     y[16*i+15] += sum16;
1550   }
1551 
1552   PetscLogFlops(32*a->nz);
1553   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1554   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1560 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1561 {
1562   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1563   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1564   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1565   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1566   int           ierr,m = b->AIJ->m,n,i,*idx;
1567 
1568   PetscFunctionBegin;
1569   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1570   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1571   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1572   for (i=0; i<m; i++) {
1573     idx    = a->j + a->i[i] ;
1574     v      = a->a + a->i[i] ;
1575     n      = a->i[i+1] - a->i[i];
1576     alpha1 = x[16*i];
1577     alpha2 = x[16*i+1];
1578     alpha3 = x[16*i+2];
1579     alpha4 = x[16*i+3];
1580     alpha5 = x[16*i+4];
1581     alpha6 = x[16*i+5];
1582     alpha7 = x[16*i+6];
1583     alpha8 = x[16*i+7];
1584     alpha9  = x[16*i+8];
1585     alpha10 = x[16*i+9];
1586     alpha11 = x[16*i+10];
1587     alpha12 = x[16*i+11];
1588     alpha13 = x[16*i+12];
1589     alpha14 = x[16*i+13];
1590     alpha15 = x[16*i+14];
1591     alpha16 = x[16*i+15];
1592     while (n-->0) {
1593       y[16*(*idx)]   += alpha1*(*v);
1594       y[16*(*idx)+1] += alpha2*(*v);
1595       y[16*(*idx)+2] += alpha3*(*v);
1596       y[16*(*idx)+3] += alpha4*(*v);
1597       y[16*(*idx)+4] += alpha5*(*v);
1598       y[16*(*idx)+5] += alpha6*(*v);
1599       y[16*(*idx)+6] += alpha7*(*v);
1600       y[16*(*idx)+7] += alpha8*(*v);
1601       y[16*(*idx)+8]  += alpha9*(*v);
1602       y[16*(*idx)+9]  += alpha10*(*v);
1603       y[16*(*idx)+10] += alpha11*(*v);
1604       y[16*(*idx)+11] += alpha12*(*v);
1605       y[16*(*idx)+12] += alpha13*(*v);
1606       y[16*(*idx)+13] += alpha14*(*v);
1607       y[16*(*idx)+14] += alpha15*(*v);
1608       y[16*(*idx)+15] += alpha16*(*v);
1609       idx++; v++;
1610     }
1611   }
1612   PetscLogFlops(32*a->nz);
1613   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1614   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 /*===================================================================================*/
1619 #undef __FUNCT__
1620 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1621 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1622 {
1623   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1624   int         ierr;
1625   PetscFunctionBegin;
1626 
1627   /* start the scatter */
1628   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1629   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1630   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1631   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1637 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1638 {
1639   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1640   int         ierr;
1641   PetscFunctionBegin;
1642   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1643   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1644   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1645   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1651 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1652 {
1653   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1654   int         ierr;
1655   PetscFunctionBegin;
1656 
1657   /* start the scatter */
1658   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1659   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1660   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1661   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1667 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1668 {
1669   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1670   int         ierr;
1671   PetscFunctionBegin;
1672   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1673   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1674   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1675   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 /* ---------------------------------------------------------------------------------- */
1680 /*MC
1681   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1682   operations for multicomponent problems.  It interpolates each component the same
1683   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
1684   and MATMPIAIJ for distributed matrices.
1685 
1686   Operations provided:
1687 . MatMult
1688 . MatMultTranspose
1689 . MatMultAdd
1690 . MatMultTransposeAdd
1691 
1692   Level: advanced
1693 
1694 M*/
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatCreateMAIJ"
1697 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1698 {
1699   int         ierr,size,n;
1700   Mat_MPIMAIJ *b;
1701   Mat         B;
1702 
1703   PetscFunctionBegin;
1704   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1705 
1706   if (dof == 1) {
1707     *maij = A;
1708   } else {
1709     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1710     B->assembled    = PETSC_TRUE;
1711 
1712     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1713     if (size == 1) {
1714       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
1715       B->ops->destroy = MatDestroy_SeqMAIJ;
1716       b      = (Mat_MPIMAIJ*)B->data;
1717       b->dof = dof;
1718       b->AIJ = A;
1719       if (dof == 2) {
1720         B->ops->mult             = MatMult_SeqMAIJ_2;
1721         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1722         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1723         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1724       } else if (dof == 3) {
1725         B->ops->mult             = MatMult_SeqMAIJ_3;
1726         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1727         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1728         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1729       } else if (dof == 4) {
1730         B->ops->mult             = MatMult_SeqMAIJ_4;
1731         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1732         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1733         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1734       } else if (dof == 5) {
1735         B->ops->mult             = MatMult_SeqMAIJ_5;
1736         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1737         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1738         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1739       } else if (dof == 6) {
1740         B->ops->mult             = MatMult_SeqMAIJ_6;
1741         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1742         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1743         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1744       } else if (dof == 8) {
1745         B->ops->mult             = MatMult_SeqMAIJ_8;
1746         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1747         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1748         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1749       } else if (dof == 9) {
1750         B->ops->mult             = MatMult_SeqMAIJ_9;
1751         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1752         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1753         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1754       } else if (dof == 16) {
1755         B->ops->mult             = MatMult_SeqMAIJ_16;
1756         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1757         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1758         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1759       } else {
1760         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1761       }
1762     } else {
1763       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1764       IS         from,to;
1765       Vec        gvec;
1766       int        *garray,i;
1767 
1768       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1769       B->ops->destroy = MatDestroy_MPIMAIJ;
1770       b      = (Mat_MPIMAIJ*)B->data;
1771       b->dof = dof;
1772       b->A   = A;
1773       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
1774       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
1775 
1776       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1777       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1778 
1779       /* create two temporary Index sets for build scatter gather */
1780       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1781       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1782       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1783       ierr = PetscFree(garray);CHKERRQ(ierr);
1784       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1785 
1786       /* create temporary global vector to generate scatter context */
1787       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1788 
1789       /* generate the scatter context */
1790       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1791 
1792       ierr = ISDestroy(from);CHKERRQ(ierr);
1793       ierr = ISDestroy(to);CHKERRQ(ierr);
1794       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1795 
1796       B->ops->mult             = MatMult_MPIMAIJ_dof;
1797       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1798       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1799       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1800     }
1801     *maij = B;
1802   }
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 
1807 
1808 
1809 
1810 
1811 
1812 
1813 
1814 
1815 
1816 
1817