xref: /petsc/src/mat/impls/maij/maij.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/maij/maij.h"
20 #include "src/vec/vecimpl.h"
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatMAIJGetAIJ"
24 int MatMAIJGetAIJ(Mat A,Mat *B)
25 {
26   int         ierr;
27   PetscTruth  ismpimaij,isseqmaij;
28 
29   PetscFunctionBegin;
30   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32   if (ismpimaij) {
33     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34 
35     *B = b->A;
36   } else if (isseqmaij) {
37     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38 
39     *B = b->AIJ;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatMAIJRedimension"
48 int MatMAIJRedimension(Mat A,int dof,Mat *B)
49 {
50   int ierr;
51   Mat Aij;
52 
53   PetscFunctionBegin;
54   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SeqMAIJ"
61 int MatDestroy_SeqMAIJ(Mat A)
62 {
63   int         ierr;
64   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
65 
66   PetscFunctionBegin;
67   if (b->AIJ) {
68     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
69   }
70   ierr = PetscFree(b);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatDestroy_MPIMAIJ"
76 int MatDestroy_MPIMAIJ(Mat A)
77 {
78   int         ierr;
79   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
80 
81   PetscFunctionBegin;
82   if (b->AIJ) {
83     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
84   }
85   if (b->OAIJ) {
86     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
87   }
88   if (b->A) {
89     ierr = MatDestroy(b->A);CHKERRQ(ierr);
90   }
91   if (b->ctx) {
92     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
93   }
94   if (b->w) {
95     ierr = VecDestroy(b->w);CHKERRQ(ierr);
96   }
97   ierr = PetscFree(b);CHKERRQ(ierr);
98   PetscFunctionReturn(0);
99 }
100 
101 /*MC
102   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
103   multicomponent problems, interpolating or restricting each component the same way independently.
104   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
105 
106   Operations provided:
107 . MatMult
108 . MatMultTranspose
109 . MatMultAdd
110 . MatMultTransposeAdd
111 
112   Level: advanced
113 
114 .seealso: MatCreateSeqDense
115 M*/
116 
117 EXTERN_C_BEGIN
118 #undef __FUNCT__
119 #define __FUNCT__ "MatCreate_MAIJ"
120 int MatCreate_MAIJ(Mat A)
121 {
122   int         ierr;
123   Mat_MPIMAIJ *b;
124 
125   PetscFunctionBegin;
126   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
127   A->data  = (void*)b;
128   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
129   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
130   A->factor           = 0;
131   A->mapping          = 0;
132 
133   b->AIJ  = 0;
134   b->dof  = 0;
135   b->OAIJ = 0;
136   b->ctx  = 0;
137   b->w    = 0;
138   PetscFunctionReturn(0);
139 }
140 EXTERN_C_END
141 
142 /* --------------------------------------------------------------------------------------*/
143 #undef __FUNCT__
144 #define __FUNCT__ "MatMult_SeqMAIJ_2"
145 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
146 {
147   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
148   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
149   PetscScalar   *x,*y,*v,sum1, sum2;
150   int           ierr,m = b->AIJ->m,*idx,*ii;
151   int           n,i,jrow,j;
152 
153   PetscFunctionBegin;
154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
155   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
156   idx  = a->j;
157   v    = a->a;
158   ii   = a->i;
159 
160   for (i=0; i<m; i++) {
161     jrow = ii[i];
162     n    = ii[i+1] - jrow;
163     sum1  = 0.0;
164     sum2  = 0.0;
165     for (j=0; j<n; j++) {
166       sum1 += v[jrow]*x[2*idx[jrow]];
167       sum2 += v[jrow]*x[2*idx[jrow]+1];
168       jrow++;
169      }
170     y[2*i]   = sum1;
171     y[2*i+1] = sum2;
172   }
173 
174   PetscLogFlops(4*a->nz - 2*m);
175   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
176   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
182 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183 {
184   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
185   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
186   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
187   int           ierr,m = b->AIJ->m,n,i,*idx;
188 
189   PetscFunctionBegin;
190   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
191   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
192   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
193 
194   for (i=0; i<m; i++) {
195     idx    = a->j + a->i[i] ;
196     v      = a->a + a->i[i] ;
197     n      = a->i[i+1] - a->i[i];
198     alpha1 = x[2*i];
199     alpha2 = x[2*i+1];
200     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
201   }
202   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
203   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
204   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
210 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
211 {
212   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
213   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
214   PetscScalar   *x,*y,*v,sum1, sum2;
215   int           ierr,m = b->AIJ->m,*idx,*ii;
216   int           n,i,jrow,j;
217 
218   PetscFunctionBegin;
219   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
220   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
221   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
222   idx  = a->j;
223   v    = a->a;
224   ii   = a->i;
225 
226   for (i=0; i<m; i++) {
227     jrow = ii[i];
228     n    = ii[i+1] - jrow;
229     sum1  = 0.0;
230     sum2  = 0.0;
231     for (j=0; j<n; j++) {
232       sum1 += v[jrow]*x[2*idx[jrow]];
233       sum2 += v[jrow]*x[2*idx[jrow]+1];
234       jrow++;
235      }
236     y[2*i]   += sum1;
237     y[2*i+1] += sum2;
238   }
239 
240   PetscLogFlops(4*a->nz - 2*m);
241   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 #undef __FUNCT__
246 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
247 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
248 {
249   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
250   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
251   PetscScalar   *x,*y,*v,alpha1,alpha2;
252   int           ierr,m = b->AIJ->m,n,i,*idx;
253 
254   PetscFunctionBegin;
255   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
256   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
257   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
258 
259   for (i=0; i<m; i++) {
260     idx   = a->j + a->i[i] ;
261     v     = a->a + a->i[i] ;
262     n     = a->i[i+1] - a->i[i];
263     alpha1 = x[2*i];
264     alpha2 = x[2*i+1];
265     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
266   }
267   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
268   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
269   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 /* --------------------------------------------------------------------------------------*/
273 #undef __FUNCT__
274 #define __FUNCT__ "MatMult_SeqMAIJ_3"
275 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
276 {
277   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
278   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
279   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
280   int           ierr,m = b->AIJ->m,*idx,*ii;
281   int           n,i,jrow,j;
282 
283   PetscFunctionBegin;
284   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
285   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
286   idx  = a->j;
287   v    = a->a;
288   ii   = a->i;
289 
290   for (i=0; i<m; i++) {
291     jrow = ii[i];
292     n    = ii[i+1] - jrow;
293     sum1  = 0.0;
294     sum2  = 0.0;
295     sum3  = 0.0;
296     for (j=0; j<n; j++) {
297       sum1 += v[jrow]*x[3*idx[jrow]];
298       sum2 += v[jrow]*x[3*idx[jrow]+1];
299       sum3 += v[jrow]*x[3*idx[jrow]+2];
300       jrow++;
301      }
302     y[3*i]   = sum1;
303     y[3*i+1] = sum2;
304     y[3*i+2] = sum3;
305   }
306 
307   PetscLogFlops(6*a->nz - 3*m);
308   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
309   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
315 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316 {
317   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
318   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
319   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
320   int           ierr,m = b->AIJ->m,n,i,*idx;
321 
322   PetscFunctionBegin;
323   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
324   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
325   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
326 
327   for (i=0; i<m; i++) {
328     idx    = a->j + a->i[i];
329     v      = a->a + a->i[i];
330     n      = a->i[i+1] - a->i[i];
331     alpha1 = x[3*i];
332     alpha2 = x[3*i+1];
333     alpha3 = x[3*i+2];
334     while (n-->0) {
335       y[3*(*idx)]   += alpha1*(*v);
336       y[3*(*idx)+1] += alpha2*(*v);
337       y[3*(*idx)+2] += alpha3*(*v);
338       idx++; v++;
339     }
340   }
341   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
342   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
343   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
349 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
350 {
351   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
352   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
353   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
354   int           ierr,m = b->AIJ->m,*idx,*ii;
355   int           n,i,jrow,j;
356 
357   PetscFunctionBegin;
358   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
359   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
360   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
361   idx  = a->j;
362   v    = a->a;
363   ii   = a->i;
364 
365   for (i=0; i<m; i++) {
366     jrow = ii[i];
367     n    = ii[i+1] - jrow;
368     sum1  = 0.0;
369     sum2  = 0.0;
370     sum3  = 0.0;
371     for (j=0; j<n; j++) {
372       sum1 += v[jrow]*x[3*idx[jrow]];
373       sum2 += v[jrow]*x[3*idx[jrow]+1];
374       sum3 += v[jrow]*x[3*idx[jrow]+2];
375       jrow++;
376      }
377     y[3*i]   += sum1;
378     y[3*i+1] += sum2;
379     y[3*i+2] += sum3;
380   }
381 
382   PetscLogFlops(6*a->nz);
383   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
384   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 #undef __FUNCT__
388 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
389 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
390 {
391   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
392   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
393   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
394   int           ierr,m = b->AIJ->m,n,i,*idx;
395 
396   PetscFunctionBegin;
397   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
398   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
399   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
400   for (i=0; i<m; i++) {
401     idx    = a->j + a->i[i] ;
402     v      = a->a + a->i[i] ;
403     n      = a->i[i+1] - a->i[i];
404     alpha1 = x[3*i];
405     alpha2 = x[3*i+1];
406     alpha3 = x[3*i+2];
407     while (n-->0) {
408       y[3*(*idx)]   += alpha1*(*v);
409       y[3*(*idx)+1] += alpha2*(*v);
410       y[3*(*idx)+2] += alpha3*(*v);
411       idx++; v++;
412     }
413   }
414   PetscLogFlops(6*a->nz);
415   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
416   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /* ------------------------------------------------------------------------------*/
421 #undef __FUNCT__
422 #define __FUNCT__ "MatMult_SeqMAIJ_4"
423 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
424 {
425   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
426   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
427   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
428   int           ierr,m = b->AIJ->m,*idx,*ii;
429   int           n,i,jrow,j;
430 
431   PetscFunctionBegin;
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
434   idx  = a->j;
435   v    = a->a;
436   ii   = a->i;
437 
438   for (i=0; i<m; i++) {
439     jrow = ii[i];
440     n    = ii[i+1] - jrow;
441     sum1  = 0.0;
442     sum2  = 0.0;
443     sum3  = 0.0;
444     sum4  = 0.0;
445     for (j=0; j<n; j++) {
446       sum1 += v[jrow]*x[4*idx[jrow]];
447       sum2 += v[jrow]*x[4*idx[jrow]+1];
448       sum3 += v[jrow]*x[4*idx[jrow]+2];
449       sum4 += v[jrow]*x[4*idx[jrow]+3];
450       jrow++;
451      }
452     y[4*i]   = sum1;
453     y[4*i+1] = sum2;
454     y[4*i+2] = sum3;
455     y[4*i+3] = sum4;
456   }
457 
458   PetscLogFlops(8*a->nz - 4*m);
459   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
460   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
466 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467 {
468   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
469   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
470   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471   int           ierr,m = b->AIJ->m,n,i,*idx;
472 
473   PetscFunctionBegin;
474   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
475   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
476   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
477   for (i=0; i<m; i++) {
478     idx    = a->j + a->i[i] ;
479     v      = a->a + a->i[i] ;
480     n      = a->i[i+1] - a->i[i];
481     alpha1 = x[4*i];
482     alpha2 = x[4*i+1];
483     alpha3 = x[4*i+2];
484     alpha4 = x[4*i+3];
485     while (n-->0) {
486       y[4*(*idx)]   += alpha1*(*v);
487       y[4*(*idx)+1] += alpha2*(*v);
488       y[4*(*idx)+2] += alpha3*(*v);
489       y[4*(*idx)+3] += alpha4*(*v);
490       idx++; v++;
491     }
492   }
493   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
495   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
501 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
504   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
505   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
506   int           ierr,m = b->AIJ->m,*idx,*ii;
507   int           n,i,jrow,j;
508 
509   PetscFunctionBegin;
510   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
511   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
512   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
513   idx  = a->j;
514   v    = a->a;
515   ii   = a->i;
516 
517   for (i=0; i<m; i++) {
518     jrow = ii[i];
519     n    = ii[i+1] - jrow;
520     sum1  = 0.0;
521     sum2  = 0.0;
522     sum3  = 0.0;
523     sum4  = 0.0;
524     for (j=0; j<n; j++) {
525       sum1 += v[jrow]*x[4*idx[jrow]];
526       sum2 += v[jrow]*x[4*idx[jrow]+1];
527       sum3 += v[jrow]*x[4*idx[jrow]+2];
528       sum4 += v[jrow]*x[4*idx[jrow]+3];
529       jrow++;
530      }
531     y[4*i]   += sum1;
532     y[4*i+1] += sum2;
533     y[4*i+2] += sum3;
534     y[4*i+3] += sum4;
535   }
536 
537   PetscLogFlops(8*a->nz - 4*m);
538   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
539   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 #undef __FUNCT__
543 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
544 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
545 {
546   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
547   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
548   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
549   int           ierr,m = b->AIJ->m,n,i,*idx;
550 
551   PetscFunctionBegin;
552   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
553   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
554   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
555 
556   for (i=0; i<m; i++) {
557     idx    = a->j + a->i[i] ;
558     v      = a->a + a->i[i] ;
559     n      = a->i[i+1] - a->i[i];
560     alpha1 = x[4*i];
561     alpha2 = x[4*i+1];
562     alpha3 = x[4*i+2];
563     alpha4 = x[4*i+3];
564     while (n-->0) {
565       y[4*(*idx)]   += alpha1*(*v);
566       y[4*(*idx)+1] += alpha2*(*v);
567       y[4*(*idx)+2] += alpha3*(*v);
568       y[4*(*idx)+3] += alpha4*(*v);
569       idx++; v++;
570     }
571   }
572   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
573   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
574   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 /* ------------------------------------------------------------------------------*/
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_SeqMAIJ_5"
581 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
582 {
583   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
584   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
585   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
586   int           ierr,m = b->AIJ->m,*idx,*ii;
587   int           n,i,jrow,j;
588 
589   PetscFunctionBegin;
590   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
591   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
592   idx  = a->j;
593   v    = a->a;
594   ii   = a->i;
595 
596   for (i=0; i<m; i++) {
597     jrow = ii[i];
598     n    = ii[i+1] - jrow;
599     sum1  = 0.0;
600     sum2  = 0.0;
601     sum3  = 0.0;
602     sum4  = 0.0;
603     sum5  = 0.0;
604     for (j=0; j<n; j++) {
605       sum1 += v[jrow]*x[5*idx[jrow]];
606       sum2 += v[jrow]*x[5*idx[jrow]+1];
607       sum3 += v[jrow]*x[5*idx[jrow]+2];
608       sum4 += v[jrow]*x[5*idx[jrow]+3];
609       sum5 += v[jrow]*x[5*idx[jrow]+4];
610       jrow++;
611      }
612     y[5*i]   = sum1;
613     y[5*i+1] = sum2;
614     y[5*i+2] = sum3;
615     y[5*i+3] = sum4;
616     y[5*i+4] = sum5;
617   }
618 
619   PetscLogFlops(10*a->nz - 5*m);
620   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
621   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
627 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
628 {
629   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
630   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
631   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
632   int           ierr,m = b->AIJ->m,n,i,*idx;
633 
634   PetscFunctionBegin;
635   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
636   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
637   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
638 
639   for (i=0; i<m; i++) {
640     idx    = a->j + a->i[i] ;
641     v      = a->a + a->i[i] ;
642     n      = a->i[i+1] - a->i[i];
643     alpha1 = x[5*i];
644     alpha2 = x[5*i+1];
645     alpha3 = x[5*i+2];
646     alpha4 = x[5*i+3];
647     alpha5 = x[5*i+4];
648     while (n-->0) {
649       y[5*(*idx)]   += alpha1*(*v);
650       y[5*(*idx)+1] += alpha2*(*v);
651       y[5*(*idx)+2] += alpha3*(*v);
652       y[5*(*idx)+3] += alpha4*(*v);
653       y[5*(*idx)+4] += alpha5*(*v);
654       idx++; v++;
655     }
656   }
657   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
658   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
659   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
660   PetscFunctionReturn(0);
661 }
662 
663 #undef __FUNCT__
664 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
665 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
666 {
667   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
668   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
669   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670   int           ierr,m = b->AIJ->m,*idx,*ii;
671   int           n,i,jrow,j;
672 
673   PetscFunctionBegin;
674   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
675   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
676   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
677   idx  = a->j;
678   v    = a->a;
679   ii   = a->i;
680 
681   for (i=0; i<m; i++) {
682     jrow = ii[i];
683     n    = ii[i+1] - jrow;
684     sum1  = 0.0;
685     sum2  = 0.0;
686     sum3  = 0.0;
687     sum4  = 0.0;
688     sum5  = 0.0;
689     for (j=0; j<n; j++) {
690       sum1 += v[jrow]*x[5*idx[jrow]];
691       sum2 += v[jrow]*x[5*idx[jrow]+1];
692       sum3 += v[jrow]*x[5*idx[jrow]+2];
693       sum4 += v[jrow]*x[5*idx[jrow]+3];
694       sum5 += v[jrow]*x[5*idx[jrow]+4];
695       jrow++;
696      }
697     y[5*i]   += sum1;
698     y[5*i+1] += sum2;
699     y[5*i+2] += sum3;
700     y[5*i+3] += sum4;
701     y[5*i+4] += sum5;
702   }
703 
704   PetscLogFlops(10*a->nz);
705   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
706   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
712 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713 {
714   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
715   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
716   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
717   int           ierr,m = b->AIJ->m,n,i,*idx;
718 
719   PetscFunctionBegin;
720   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
721   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
722   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
723 
724   for (i=0; i<m; i++) {
725     idx    = a->j + a->i[i] ;
726     v      = a->a + a->i[i] ;
727     n      = a->i[i+1] - a->i[i];
728     alpha1 = x[5*i];
729     alpha2 = x[5*i+1];
730     alpha3 = x[5*i+2];
731     alpha4 = x[5*i+3];
732     alpha5 = x[5*i+4];
733     while (n-->0) {
734       y[5*(*idx)]   += alpha1*(*v);
735       y[5*(*idx)+1] += alpha2*(*v);
736       y[5*(*idx)+2] += alpha3*(*v);
737       y[5*(*idx)+3] += alpha4*(*v);
738       y[5*(*idx)+4] += alpha5*(*v);
739       idx++; v++;
740     }
741   }
742   PetscLogFlops(10*a->nz);
743   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
744   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 /* ------------------------------------------------------------------------------*/
749 #undef __FUNCT__
750 #define __FUNCT__ "MatMult_SeqMAIJ_6"
751 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
752 {
753   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
754   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
755   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
756   int           ierr,m = b->AIJ->m,*idx,*ii;
757   int           n,i,jrow,j;
758 
759   PetscFunctionBegin;
760   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
761   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
762   idx  = a->j;
763   v    = a->a;
764   ii   = a->i;
765 
766   for (i=0; i<m; i++) {
767     jrow = ii[i];
768     n    = ii[i+1] - jrow;
769     sum1  = 0.0;
770     sum2  = 0.0;
771     sum3  = 0.0;
772     sum4  = 0.0;
773     sum5  = 0.0;
774     sum6  = 0.0;
775     for (j=0; j<n; j++) {
776       sum1 += v[jrow]*x[6*idx[jrow]];
777       sum2 += v[jrow]*x[6*idx[jrow]+1];
778       sum3 += v[jrow]*x[6*idx[jrow]+2];
779       sum4 += v[jrow]*x[6*idx[jrow]+3];
780       sum5 += v[jrow]*x[6*idx[jrow]+4];
781       sum6 += v[jrow]*x[6*idx[jrow]+5];
782       jrow++;
783      }
784     y[6*i]   = sum1;
785     y[6*i+1] = sum2;
786     y[6*i+2] = sum3;
787     y[6*i+3] = sum4;
788     y[6*i+4] = sum5;
789     y[6*i+5] = sum6;
790   }
791 
792   PetscLogFlops(12*a->nz - 6*m);
793   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
794   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
800 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
801 {
802   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
803   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
804   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
805   int           ierr,m = b->AIJ->m,n,i,*idx;
806 
807   PetscFunctionBegin;
808   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
809   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
810   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
811 
812   for (i=0; i<m; i++) {
813     idx    = a->j + a->i[i] ;
814     v      = a->a + a->i[i] ;
815     n      = a->i[i+1] - a->i[i];
816     alpha1 = x[6*i];
817     alpha2 = x[6*i+1];
818     alpha3 = x[6*i+2];
819     alpha4 = x[6*i+3];
820     alpha5 = x[6*i+4];
821     alpha6 = x[6*i+5];
822     while (n-->0) {
823       y[6*(*idx)]   += alpha1*(*v);
824       y[6*(*idx)+1] += alpha2*(*v);
825       y[6*(*idx)+2] += alpha3*(*v);
826       y[6*(*idx)+3] += alpha4*(*v);
827       y[6*(*idx)+4] += alpha5*(*v);
828       y[6*(*idx)+5] += alpha6*(*v);
829       idx++; v++;
830     }
831   }
832   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
833   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
840 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
841 {
842   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
843   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
844   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845   int           ierr,m = b->AIJ->m,*idx,*ii;
846   int           n,i,jrow,j;
847 
848   PetscFunctionBegin;
849   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
850   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
851   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
852   idx  = a->j;
853   v    = a->a;
854   ii   = a->i;
855 
856   for (i=0; i<m; i++) {
857     jrow = ii[i];
858     n    = ii[i+1] - jrow;
859     sum1  = 0.0;
860     sum2  = 0.0;
861     sum3  = 0.0;
862     sum4  = 0.0;
863     sum5  = 0.0;
864     sum6  = 0.0;
865     for (j=0; j<n; j++) {
866       sum1 += v[jrow]*x[6*idx[jrow]];
867       sum2 += v[jrow]*x[6*idx[jrow]+1];
868       sum3 += v[jrow]*x[6*idx[jrow]+2];
869       sum4 += v[jrow]*x[6*idx[jrow]+3];
870       sum5 += v[jrow]*x[6*idx[jrow]+4];
871       sum6 += v[jrow]*x[6*idx[jrow]+5];
872       jrow++;
873      }
874     y[6*i]   += sum1;
875     y[6*i+1] += sum2;
876     y[6*i+2] += sum3;
877     y[6*i+3] += sum4;
878     y[6*i+4] += sum5;
879     y[6*i+5] += sum6;
880   }
881 
882   PetscLogFlops(12*a->nz);
883   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
884   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
890 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
891 {
892   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
893   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
894   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
895   int           ierr,m = b->AIJ->m,n,i,*idx;
896 
897   PetscFunctionBegin;
898   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
899   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
900   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
901 
902   for (i=0; i<m; i++) {
903     idx    = a->j + a->i[i] ;
904     v      = a->a + a->i[i] ;
905     n      = a->i[i+1] - a->i[i];
906     alpha1 = x[6*i];
907     alpha2 = x[6*i+1];
908     alpha3 = x[6*i+2];
909     alpha4 = x[6*i+3];
910     alpha5 = x[6*i+4];
911     alpha6 = x[6*i+5];
912     while (n-->0) {
913       y[6*(*idx)]   += alpha1*(*v);
914       y[6*(*idx)+1] += alpha2*(*v);
915       y[6*(*idx)+2] += alpha3*(*v);
916       y[6*(*idx)+3] += alpha4*(*v);
917       y[6*(*idx)+4] += alpha5*(*v);
918       y[6*(*idx)+5] += alpha6*(*v);
919       idx++; v++;
920     }
921   }
922   PetscLogFlops(12*a->nz);
923   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
924   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 /* ------------------------------------------------------------------------------*/
929 #undef __FUNCT__
930 #define __FUNCT__ "MatMult_SeqMAIJ_8"
931 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
932 {
933   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
934   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
935   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
936   int           ierr,m = b->AIJ->m,*idx,*ii;
937   int           n,i,jrow,j;
938 
939   PetscFunctionBegin;
940   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
941   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
942   idx  = a->j;
943   v    = a->a;
944   ii   = a->i;
945 
946   for (i=0; i<m; i++) {
947     jrow = ii[i];
948     n    = ii[i+1] - jrow;
949     sum1  = 0.0;
950     sum2  = 0.0;
951     sum3  = 0.0;
952     sum4  = 0.0;
953     sum5  = 0.0;
954     sum6  = 0.0;
955     sum7  = 0.0;
956     sum8  = 0.0;
957     for (j=0; j<n; j++) {
958       sum1 += v[jrow]*x[8*idx[jrow]];
959       sum2 += v[jrow]*x[8*idx[jrow]+1];
960       sum3 += v[jrow]*x[8*idx[jrow]+2];
961       sum4 += v[jrow]*x[8*idx[jrow]+3];
962       sum5 += v[jrow]*x[8*idx[jrow]+4];
963       sum6 += v[jrow]*x[8*idx[jrow]+5];
964       sum7 += v[jrow]*x[8*idx[jrow]+6];
965       sum8 += v[jrow]*x[8*idx[jrow]+7];
966       jrow++;
967      }
968     y[8*i]   = sum1;
969     y[8*i+1] = sum2;
970     y[8*i+2] = sum3;
971     y[8*i+3] = sum4;
972     y[8*i+4] = sum5;
973     y[8*i+5] = sum6;
974     y[8*i+6] = sum7;
975     y[8*i+7] = sum8;
976   }
977 
978   PetscLogFlops(16*a->nz - 8*m);
979   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
980   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
986 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
987 {
988   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
989   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
990   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
991   int           ierr,m = b->AIJ->m,n,i,*idx;
992 
993   PetscFunctionBegin;
994   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
995   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
996   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
997 
998   for (i=0; i<m; i++) {
999     idx    = a->j + a->i[i] ;
1000     v      = a->a + a->i[i] ;
1001     n      = a->i[i+1] - a->i[i];
1002     alpha1 = x[8*i];
1003     alpha2 = x[8*i+1];
1004     alpha3 = x[8*i+2];
1005     alpha4 = x[8*i+3];
1006     alpha5 = x[8*i+4];
1007     alpha6 = x[8*i+5];
1008     alpha7 = x[8*i+6];
1009     alpha8 = x[8*i+7];
1010     while (n-->0) {
1011       y[8*(*idx)]   += alpha1*(*v);
1012       y[8*(*idx)+1] += alpha2*(*v);
1013       y[8*(*idx)+2] += alpha3*(*v);
1014       y[8*(*idx)+3] += alpha4*(*v);
1015       y[8*(*idx)+4] += alpha5*(*v);
1016       y[8*(*idx)+5] += alpha6*(*v);
1017       y[8*(*idx)+6] += alpha7*(*v);
1018       y[8*(*idx)+7] += alpha8*(*v);
1019       idx++; v++;
1020     }
1021   }
1022   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1023   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1024   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1030 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1031 {
1032   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1033   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1034   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1035   int           ierr,m = b->AIJ->m,*idx,*ii;
1036   int           n,i,jrow,j;
1037 
1038   PetscFunctionBegin;
1039   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1040   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1041   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1042   idx  = a->j;
1043   v    = a->a;
1044   ii   = a->i;
1045 
1046   for (i=0; i<m; i++) {
1047     jrow = ii[i];
1048     n    = ii[i+1] - jrow;
1049     sum1  = 0.0;
1050     sum2  = 0.0;
1051     sum3  = 0.0;
1052     sum4  = 0.0;
1053     sum5  = 0.0;
1054     sum6  = 0.0;
1055     sum7  = 0.0;
1056     sum8  = 0.0;
1057     for (j=0; j<n; j++) {
1058       sum1 += v[jrow]*x[8*idx[jrow]];
1059       sum2 += v[jrow]*x[8*idx[jrow]+1];
1060       sum3 += v[jrow]*x[8*idx[jrow]+2];
1061       sum4 += v[jrow]*x[8*idx[jrow]+3];
1062       sum5 += v[jrow]*x[8*idx[jrow]+4];
1063       sum6 += v[jrow]*x[8*idx[jrow]+5];
1064       sum7 += v[jrow]*x[8*idx[jrow]+6];
1065       sum8 += v[jrow]*x[8*idx[jrow]+7];
1066       jrow++;
1067      }
1068     y[8*i]   += sum1;
1069     y[8*i+1] += sum2;
1070     y[8*i+2] += sum3;
1071     y[8*i+3] += sum4;
1072     y[8*i+4] += sum5;
1073     y[8*i+5] += sum6;
1074     y[8*i+6] += sum7;
1075     y[8*i+7] += sum8;
1076   }
1077 
1078   PetscLogFlops(16*a->nz);
1079   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1080   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1086 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1089   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1090   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1091   int           ierr,m = b->AIJ->m,n,i,*idx;
1092 
1093   PetscFunctionBegin;
1094   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1095   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1096   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1097   for (i=0; i<m; i++) {
1098     idx    = a->j + a->i[i] ;
1099     v      = a->a + a->i[i] ;
1100     n      = a->i[i+1] - a->i[i];
1101     alpha1 = x[8*i];
1102     alpha2 = x[8*i+1];
1103     alpha3 = x[8*i+2];
1104     alpha4 = x[8*i+3];
1105     alpha5 = x[8*i+4];
1106     alpha6 = x[8*i+5];
1107     alpha7 = x[8*i+6];
1108     alpha8 = x[8*i+7];
1109     while (n-->0) {
1110       y[8*(*idx)]   += alpha1*(*v);
1111       y[8*(*idx)+1] += alpha2*(*v);
1112       y[8*(*idx)+2] += alpha3*(*v);
1113       y[8*(*idx)+3] += alpha4*(*v);
1114       y[8*(*idx)+4] += alpha5*(*v);
1115       y[8*(*idx)+5] += alpha6*(*v);
1116       y[8*(*idx)+6] += alpha7*(*v);
1117       y[8*(*idx)+7] += alpha8*(*v);
1118       idx++; v++;
1119     }
1120   }
1121   PetscLogFlops(16*a->nz);
1122   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1123   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 /* ------------------------------------------------------------------------------*/
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1130 int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1133   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1134   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1135   int           ierr,m = b->AIJ->m,*idx,*ii;
1136   int           n,i,jrow,j;
1137 
1138   PetscFunctionBegin;
1139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1140   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1141   idx  = a->j;
1142   v    = a->a;
1143   ii   = a->i;
1144 
1145   for (i=0; i<m; i++) {
1146     jrow = ii[i];
1147     n    = ii[i+1] - jrow;
1148     sum1  = 0.0;
1149     sum2  = 0.0;
1150     sum3  = 0.0;
1151     sum4  = 0.0;
1152     sum5  = 0.0;
1153     sum6  = 0.0;
1154     sum7  = 0.0;
1155     sum8  = 0.0;
1156     sum9  = 0.0;
1157     for (j=0; j<n; j++) {
1158       sum1 += v[jrow]*x[9*idx[jrow]];
1159       sum2 += v[jrow]*x[9*idx[jrow]+1];
1160       sum3 += v[jrow]*x[9*idx[jrow]+2];
1161       sum4 += v[jrow]*x[9*idx[jrow]+3];
1162       sum5 += v[jrow]*x[9*idx[jrow]+4];
1163       sum6 += v[jrow]*x[9*idx[jrow]+5];
1164       sum7 += v[jrow]*x[9*idx[jrow]+6];
1165       sum8 += v[jrow]*x[9*idx[jrow]+7];
1166       sum9 += v[jrow]*x[9*idx[jrow]+8];
1167       jrow++;
1168      }
1169     y[9*i]   = sum1;
1170     y[9*i+1] = sum2;
1171     y[9*i+2] = sum3;
1172     y[9*i+3] = sum4;
1173     y[9*i+4] = sum5;
1174     y[9*i+5] = sum6;
1175     y[9*i+6] = sum7;
1176     y[9*i+7] = sum8;
1177     y[9*i+8] = sum9;
1178   }
1179 
1180   PetscLogFlops(18*a->nz - 9*m);
1181   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1182   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1188 int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1189 {
1190   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1191   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1192   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1193   int           ierr,m = b->AIJ->m,n,i,*idx;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1197   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1198   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1199 
1200   for (i=0; i<m; i++) {
1201     idx    = a->j + a->i[i] ;
1202     v      = a->a + a->i[i] ;
1203     n      = a->i[i+1] - a->i[i];
1204     alpha1 = x[9*i];
1205     alpha2 = x[9*i+1];
1206     alpha3 = x[9*i+2];
1207     alpha4 = x[9*i+3];
1208     alpha5 = x[9*i+4];
1209     alpha6 = x[9*i+5];
1210     alpha7 = x[9*i+6];
1211     alpha8 = x[9*i+7];
1212     alpha9 = x[9*i+8];
1213     while (n-->0) {
1214       y[9*(*idx)]   += alpha1*(*v);
1215       y[9*(*idx)+1] += alpha2*(*v);
1216       y[9*(*idx)+2] += alpha3*(*v);
1217       y[9*(*idx)+3] += alpha4*(*v);
1218       y[9*(*idx)+4] += alpha5*(*v);
1219       y[9*(*idx)+5] += alpha6*(*v);
1220       y[9*(*idx)+6] += alpha7*(*v);
1221       y[9*(*idx)+7] += alpha8*(*v);
1222       y[9*(*idx)+8] += alpha9*(*v);
1223       idx++; v++;
1224     }
1225   }
1226   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1227   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1228   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1234 int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1235 {
1236   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1237   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1238   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1239   int           ierr,m = b->AIJ->m,*idx,*ii;
1240   int           n,i,jrow,j;
1241 
1242   PetscFunctionBegin;
1243   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1244   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1245   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1246   idx  = a->j;
1247   v    = a->a;
1248   ii   = a->i;
1249 
1250   for (i=0; i<m; i++) {
1251     jrow = ii[i];
1252     n    = ii[i+1] - jrow;
1253     sum1  = 0.0;
1254     sum2  = 0.0;
1255     sum3  = 0.0;
1256     sum4  = 0.0;
1257     sum5  = 0.0;
1258     sum6  = 0.0;
1259     sum7  = 0.0;
1260     sum8  = 0.0;
1261     sum9  = 0.0;
1262     for (j=0; j<n; j++) {
1263       sum1 += v[jrow]*x[9*idx[jrow]];
1264       sum2 += v[jrow]*x[9*idx[jrow]+1];
1265       sum3 += v[jrow]*x[9*idx[jrow]+2];
1266       sum4 += v[jrow]*x[9*idx[jrow]+3];
1267       sum5 += v[jrow]*x[9*idx[jrow]+4];
1268       sum6 += v[jrow]*x[9*idx[jrow]+5];
1269       sum7 += v[jrow]*x[9*idx[jrow]+6];
1270       sum8 += v[jrow]*x[9*idx[jrow]+7];
1271       sum9 += v[jrow]*x[9*idx[jrow]+8];
1272       jrow++;
1273      }
1274     y[9*i]   += sum1;
1275     y[9*i+1] += sum2;
1276     y[9*i+2] += sum3;
1277     y[9*i+3] += sum4;
1278     y[9*i+4] += sum5;
1279     y[9*i+5] += sum6;
1280     y[9*i+6] += sum7;
1281     y[9*i+7] += sum8;
1282     y[9*i+8] += sum9;
1283   }
1284 
1285   PetscLogFlops(18*a->nz);
1286   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1287   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1293 int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1294 {
1295   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1296   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1297   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1298   int           ierr,m = b->AIJ->m,n,i,*idx;
1299 
1300   PetscFunctionBegin;
1301   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1302   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1303   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1304   for (i=0; i<m; i++) {
1305     idx    = a->j + a->i[i] ;
1306     v      = a->a + a->i[i] ;
1307     n      = a->i[i+1] - a->i[i];
1308     alpha1 = x[9*i];
1309     alpha2 = x[9*i+1];
1310     alpha3 = x[9*i+2];
1311     alpha4 = x[9*i+3];
1312     alpha5 = x[9*i+4];
1313     alpha6 = x[9*i+5];
1314     alpha7 = x[9*i+6];
1315     alpha8 = x[9*i+7];
1316     alpha9 = x[9*i+8];
1317     while (n-->0) {
1318       y[9*(*idx)]   += alpha1*(*v);
1319       y[9*(*idx)+1] += alpha2*(*v);
1320       y[9*(*idx)+2] += alpha3*(*v);
1321       y[9*(*idx)+3] += alpha4*(*v);
1322       y[9*(*idx)+4] += alpha5*(*v);
1323       y[9*(*idx)+5] += alpha6*(*v);
1324       y[9*(*idx)+6] += alpha7*(*v);
1325       y[9*(*idx)+7] += alpha8*(*v);
1326       y[9*(*idx)+8] += alpha9*(*v);
1327       idx++; v++;
1328     }
1329   }
1330   PetscLogFlops(18*a->nz);
1331   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1332   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 /*--------------------------------------------------------------------------------------------*/
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1339 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1340 {
1341   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1342   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1343   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1344   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1345   int           ierr,m = b->AIJ->m,*idx,*ii;
1346   int           n,i,jrow,j;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1350   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1351   idx  = a->j;
1352   v    = a->a;
1353   ii   = a->i;
1354 
1355   for (i=0; i<m; i++) {
1356     jrow = ii[i];
1357     n    = ii[i+1] - jrow;
1358     sum1  = 0.0;
1359     sum2  = 0.0;
1360     sum3  = 0.0;
1361     sum4  = 0.0;
1362     sum5  = 0.0;
1363     sum6  = 0.0;
1364     sum7  = 0.0;
1365     sum8  = 0.0;
1366     sum9  = 0.0;
1367     sum10 = 0.0;
1368     sum11 = 0.0;
1369     sum12 = 0.0;
1370     sum13 = 0.0;
1371     sum14 = 0.0;
1372     sum15 = 0.0;
1373     sum16 = 0.0;
1374     for (j=0; j<n; j++) {
1375       sum1  += v[jrow]*x[16*idx[jrow]];
1376       sum2  += v[jrow]*x[16*idx[jrow]+1];
1377       sum3  += v[jrow]*x[16*idx[jrow]+2];
1378       sum4  += v[jrow]*x[16*idx[jrow]+3];
1379       sum5  += v[jrow]*x[16*idx[jrow]+4];
1380       sum6  += v[jrow]*x[16*idx[jrow]+5];
1381       sum7  += v[jrow]*x[16*idx[jrow]+6];
1382       sum8  += v[jrow]*x[16*idx[jrow]+7];
1383       sum9  += v[jrow]*x[16*idx[jrow]+8];
1384       sum10 += v[jrow]*x[16*idx[jrow]+9];
1385       sum11 += v[jrow]*x[16*idx[jrow]+10];
1386       sum12 += v[jrow]*x[16*idx[jrow]+11];
1387       sum13 += v[jrow]*x[16*idx[jrow]+12];
1388       sum14 += v[jrow]*x[16*idx[jrow]+13];
1389       sum15 += v[jrow]*x[16*idx[jrow]+14];
1390       sum16 += v[jrow]*x[16*idx[jrow]+15];
1391       jrow++;
1392      }
1393     y[16*i]    = sum1;
1394     y[16*i+1]  = sum2;
1395     y[16*i+2]  = sum3;
1396     y[16*i+3]  = sum4;
1397     y[16*i+4]  = sum5;
1398     y[16*i+5]  = sum6;
1399     y[16*i+6]  = sum7;
1400     y[16*i+7]  = sum8;
1401     y[16*i+8]  = sum9;
1402     y[16*i+9]  = sum10;
1403     y[16*i+10] = sum11;
1404     y[16*i+11] = sum12;
1405     y[16*i+12] = sum13;
1406     y[16*i+13] = sum14;
1407     y[16*i+14] = sum15;
1408     y[16*i+15] = sum16;
1409   }
1410 
1411   PetscLogFlops(32*a->nz - 16*m);
1412   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1413   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1419 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1420 {
1421   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1422   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1423   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1424   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1425   int           ierr,m = b->AIJ->m,n,i,*idx;
1426 
1427   PetscFunctionBegin;
1428   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1429   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1430   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1431 
1432   for (i=0; i<m; i++) {
1433     idx    = a->j + a->i[i] ;
1434     v      = a->a + a->i[i] ;
1435     n      = a->i[i+1] - a->i[i];
1436     alpha1  = x[16*i];
1437     alpha2  = x[16*i+1];
1438     alpha3  = x[16*i+2];
1439     alpha4  = x[16*i+3];
1440     alpha5  = x[16*i+4];
1441     alpha6  = x[16*i+5];
1442     alpha7  = x[16*i+6];
1443     alpha8  = x[16*i+7];
1444     alpha9  = x[16*i+8];
1445     alpha10 = x[16*i+9];
1446     alpha11 = x[16*i+10];
1447     alpha12 = x[16*i+11];
1448     alpha13 = x[16*i+12];
1449     alpha14 = x[16*i+13];
1450     alpha15 = x[16*i+14];
1451     alpha16 = x[16*i+15];
1452     while (n-->0) {
1453       y[16*(*idx)]    += alpha1*(*v);
1454       y[16*(*idx)+1]  += alpha2*(*v);
1455       y[16*(*idx)+2]  += alpha3*(*v);
1456       y[16*(*idx)+3]  += alpha4*(*v);
1457       y[16*(*idx)+4]  += alpha5*(*v);
1458       y[16*(*idx)+5]  += alpha6*(*v);
1459       y[16*(*idx)+6]  += alpha7*(*v);
1460       y[16*(*idx)+7]  += alpha8*(*v);
1461       y[16*(*idx)+8]  += alpha9*(*v);
1462       y[16*(*idx)+9]  += alpha10*(*v);
1463       y[16*(*idx)+10] += alpha11*(*v);
1464       y[16*(*idx)+11] += alpha12*(*v);
1465       y[16*(*idx)+12] += alpha13*(*v);
1466       y[16*(*idx)+13] += alpha14*(*v);
1467       y[16*(*idx)+14] += alpha15*(*v);
1468       y[16*(*idx)+15] += alpha16*(*v);
1469       idx++; v++;
1470     }
1471   }
1472   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1473   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1474   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1480 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1481 {
1482   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1483   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1484   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1485   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1486   int           ierr,m = b->AIJ->m,*idx,*ii;
1487   int           n,i,jrow,j;
1488 
1489   PetscFunctionBegin;
1490   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1491   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1492   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1493   idx  = a->j;
1494   v    = a->a;
1495   ii   = a->i;
1496 
1497   for (i=0; i<m; i++) {
1498     jrow = ii[i];
1499     n    = ii[i+1] - jrow;
1500     sum1  = 0.0;
1501     sum2  = 0.0;
1502     sum3  = 0.0;
1503     sum4  = 0.0;
1504     sum5  = 0.0;
1505     sum6  = 0.0;
1506     sum7  = 0.0;
1507     sum8  = 0.0;
1508     sum9  = 0.0;
1509     sum10 = 0.0;
1510     sum11 = 0.0;
1511     sum12 = 0.0;
1512     sum13 = 0.0;
1513     sum14 = 0.0;
1514     sum15 = 0.0;
1515     sum16 = 0.0;
1516     for (j=0; j<n; j++) {
1517       sum1  += v[jrow]*x[16*idx[jrow]];
1518       sum2  += v[jrow]*x[16*idx[jrow]+1];
1519       sum3  += v[jrow]*x[16*idx[jrow]+2];
1520       sum4  += v[jrow]*x[16*idx[jrow]+3];
1521       sum5  += v[jrow]*x[16*idx[jrow]+4];
1522       sum6  += v[jrow]*x[16*idx[jrow]+5];
1523       sum7  += v[jrow]*x[16*idx[jrow]+6];
1524       sum8  += v[jrow]*x[16*idx[jrow]+7];
1525       sum9  += v[jrow]*x[16*idx[jrow]+8];
1526       sum10 += v[jrow]*x[16*idx[jrow]+9];
1527       sum11 += v[jrow]*x[16*idx[jrow]+10];
1528       sum12 += v[jrow]*x[16*idx[jrow]+11];
1529       sum13 += v[jrow]*x[16*idx[jrow]+12];
1530       sum14 += v[jrow]*x[16*idx[jrow]+13];
1531       sum15 += v[jrow]*x[16*idx[jrow]+14];
1532       sum16 += v[jrow]*x[16*idx[jrow]+15];
1533       jrow++;
1534      }
1535     y[16*i]    += sum1;
1536     y[16*i+1]  += sum2;
1537     y[16*i+2]  += sum3;
1538     y[16*i+3]  += sum4;
1539     y[16*i+4]  += sum5;
1540     y[16*i+5]  += sum6;
1541     y[16*i+6]  += sum7;
1542     y[16*i+7]  += sum8;
1543     y[16*i+8]  += sum9;
1544     y[16*i+9]  += sum10;
1545     y[16*i+10] += sum11;
1546     y[16*i+11] += sum12;
1547     y[16*i+12] += sum13;
1548     y[16*i+13] += sum14;
1549     y[16*i+14] += sum15;
1550     y[16*i+15] += sum16;
1551   }
1552 
1553   PetscLogFlops(32*a->nz);
1554   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1555   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1561 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1562 {
1563   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1564   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1565   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1566   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1567   int           ierr,m = b->AIJ->m,n,i,*idx;
1568 
1569   PetscFunctionBegin;
1570   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1571   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1572   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1573   for (i=0; i<m; i++) {
1574     idx    = a->j + a->i[i] ;
1575     v      = a->a + a->i[i] ;
1576     n      = a->i[i+1] - a->i[i];
1577     alpha1 = x[16*i];
1578     alpha2 = x[16*i+1];
1579     alpha3 = x[16*i+2];
1580     alpha4 = x[16*i+3];
1581     alpha5 = x[16*i+4];
1582     alpha6 = x[16*i+5];
1583     alpha7 = x[16*i+6];
1584     alpha8 = x[16*i+7];
1585     alpha9  = x[16*i+8];
1586     alpha10 = x[16*i+9];
1587     alpha11 = x[16*i+10];
1588     alpha12 = x[16*i+11];
1589     alpha13 = x[16*i+12];
1590     alpha14 = x[16*i+13];
1591     alpha15 = x[16*i+14];
1592     alpha16 = x[16*i+15];
1593     while (n-->0) {
1594       y[16*(*idx)]   += alpha1*(*v);
1595       y[16*(*idx)+1] += alpha2*(*v);
1596       y[16*(*idx)+2] += alpha3*(*v);
1597       y[16*(*idx)+3] += alpha4*(*v);
1598       y[16*(*idx)+4] += alpha5*(*v);
1599       y[16*(*idx)+5] += alpha6*(*v);
1600       y[16*(*idx)+6] += alpha7*(*v);
1601       y[16*(*idx)+7] += alpha8*(*v);
1602       y[16*(*idx)+8]  += alpha9*(*v);
1603       y[16*(*idx)+9]  += alpha10*(*v);
1604       y[16*(*idx)+10] += alpha11*(*v);
1605       y[16*(*idx)+11] += alpha12*(*v);
1606       y[16*(*idx)+12] += alpha13*(*v);
1607       y[16*(*idx)+13] += alpha14*(*v);
1608       y[16*(*idx)+14] += alpha15*(*v);
1609       y[16*(*idx)+15] += alpha16*(*v);
1610       idx++; v++;
1611     }
1612   }
1613   PetscLogFlops(32*a->nz);
1614   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1615   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 /*===================================================================================*/
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1622 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1623 {
1624   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1625   int         ierr;
1626   PetscFunctionBegin;
1627 
1628   /* start the scatter */
1629   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1630   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1631   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1632   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1638 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1639 {
1640   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1641   int         ierr;
1642   PetscFunctionBegin;
1643   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1644   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1645   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1646   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1652 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1653 {
1654   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1655   int         ierr;
1656   PetscFunctionBegin;
1657 
1658   /* start the scatter */
1659   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1660   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1661   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1662   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1668 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1669 {
1670   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1671   int         ierr;
1672   PetscFunctionBegin;
1673   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1674   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1675   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1676   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 /* ---------------------------------------------------------------------------------- */
1681 /*MC
1682   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1683   operations for multicomponent problems.  It interpolates each component the same
1684   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
1685   and MATMPIAIJ for distributed matrices.
1686 
1687   Operations provided:
1688 . MatMult
1689 . MatMultTranspose
1690 . MatMultAdd
1691 . MatMultTransposeAdd
1692 
1693   Level: advanced
1694 
1695 M*/
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatCreateMAIJ"
1698 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1699 {
1700   int         ierr,size,n;
1701   Mat_MPIMAIJ *b;
1702   Mat         B;
1703 
1704   PetscFunctionBegin;
1705   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1706 
1707   if (dof == 1) {
1708     *maij = A;
1709   } else {
1710     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1711     B->assembled    = PETSC_TRUE;
1712 
1713     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1714     if (size == 1) {
1715       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
1716       B->ops->destroy = MatDestroy_SeqMAIJ;
1717       b      = (Mat_MPIMAIJ*)B->data;
1718       b->dof = dof;
1719       b->AIJ = A;
1720       if (dof == 2) {
1721         B->ops->mult             = MatMult_SeqMAIJ_2;
1722         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1723         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1724         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1725       } else if (dof == 3) {
1726         B->ops->mult             = MatMult_SeqMAIJ_3;
1727         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1728         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1729         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1730       } else if (dof == 4) {
1731         B->ops->mult             = MatMult_SeqMAIJ_4;
1732         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1733         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1734         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1735       } else if (dof == 5) {
1736         B->ops->mult             = MatMult_SeqMAIJ_5;
1737         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1738         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1739         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1740       } else if (dof == 6) {
1741         B->ops->mult             = MatMult_SeqMAIJ_6;
1742         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1743         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1744         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1745       } else if (dof == 8) {
1746         B->ops->mult             = MatMult_SeqMAIJ_8;
1747         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1748         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1749         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1750       } else if (dof == 9) {
1751         B->ops->mult             = MatMult_SeqMAIJ_9;
1752         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1753         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1754         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1755       } else if (dof == 16) {
1756         B->ops->mult             = MatMult_SeqMAIJ_16;
1757         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1758         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1759         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1760       } else {
1761         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1762       }
1763     } else {
1764       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1765       IS         from,to;
1766       Vec        gvec;
1767       int        *garray,i;
1768 
1769       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1770       B->ops->destroy = MatDestroy_MPIMAIJ;
1771       b      = (Mat_MPIMAIJ*)B->data;
1772       b->dof = dof;
1773       b->A   = A;
1774       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
1775       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
1776 
1777       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1778       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1779 
1780       /* create two temporary Index sets for build scatter gather */
1781       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1782       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1783       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1784       ierr = PetscFree(garray);CHKERRQ(ierr);
1785       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1786 
1787       /* create temporary global vector to generate scatter context */
1788       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1789 
1790       /* generate the scatter context */
1791       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1792 
1793       ierr = ISDestroy(from);CHKERRQ(ierr);
1794       ierr = ISDestroy(to);CHKERRQ(ierr);
1795       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1796 
1797       B->ops->mult             = MatMult_MPIMAIJ_dof;
1798       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1799       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1800       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1801     }
1802     *maij = B;
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 
1808 
1809 
1810 
1811 
1812 
1813 
1814 
1815 
1816 
1817 
1818