xref: /petsc/src/mat/impls/maij/maij.c (revision 2f6bd0c65edf992804de41acd0dfae495aaca8dc)
173f4d377SMatthew Knepley /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19be911618SKris Buschelman #include "src/mat/impls/maij/maij.h"
202f7816d4SBarry Smith #include "src/vec/vecimpl.h"
2182b1193eSBarry Smith 
224a2ae208SSatish Balay #undef __FUNCT__
234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
24b9b97703SBarry Smith int MatMAIJGetAIJ(Mat A,Mat *B)
25b9b97703SBarry Smith {
26b9b97703SBarry Smith   int         ierr;
27b9b97703SBarry Smith   PetscTruth  ismpimaij,isseqmaij;
28b9b97703SBarry Smith 
29b9b97703SBarry Smith   PetscFunctionBegin;
30b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32b9b97703SBarry Smith   if (ismpimaij) {
33b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34b9b97703SBarry Smith 
35b9b97703SBarry Smith     *B = b->A;
36b9b97703SBarry Smith   } else if (isseqmaij) {
37b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38b9b97703SBarry Smith 
39b9b97703SBarry Smith     *B = b->AIJ;
40b9b97703SBarry Smith   } else {
41b9b97703SBarry Smith     *B = A;
42b9b97703SBarry Smith   }
43b9b97703SBarry Smith   PetscFunctionReturn(0);
44b9b97703SBarry Smith }
45b9b97703SBarry Smith 
464a2ae208SSatish Balay #undef __FUNCT__
474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
48b9b97703SBarry Smith int MatMAIJRedimension(Mat A,int dof,Mat *B)
49b9b97703SBarry Smith {
50b9b97703SBarry Smith   int ierr;
51b9b97703SBarry Smith   Mat Aij;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith   PetscFunctionBegin;
54b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56b9b97703SBarry Smith   PetscFunctionReturn(0);
57b9b97703SBarry Smith }
58b9b97703SBarry Smith 
594a2ae208SSatish Balay #undef __FUNCT__
604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
614cb9d9b8SBarry Smith int MatDestroy_SeqMAIJ(Mat A)
6282b1193eSBarry Smith {
6382b1193eSBarry Smith   int         ierr;
644cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
6582b1193eSBarry Smith 
6682b1193eSBarry Smith   PetscFunctionBegin;
67cd3bbe55SBarry Smith   if (b->AIJ) {
68cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
6982b1193eSBarry Smith   }
704cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
714cb9d9b8SBarry Smith   PetscFunctionReturn(0);
724cb9d9b8SBarry Smith }
734cb9d9b8SBarry Smith 
744a2ae208SSatish Balay #undef __FUNCT__
754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
764cb9d9b8SBarry Smith int MatDestroy_MPIMAIJ(Mat A)
774cb9d9b8SBarry Smith {
784cb9d9b8SBarry Smith   int         ierr;
794cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
804cb9d9b8SBarry Smith 
814cb9d9b8SBarry Smith   PetscFunctionBegin;
824cb9d9b8SBarry Smith   if (b->AIJ) {
834cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
844cb9d9b8SBarry Smith   }
85f4a53059SBarry Smith   if (b->OAIJ) {
86f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
87f4a53059SBarry Smith   }
88b9b97703SBarry Smith   if (b->A) {
89b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
90b9b97703SBarry Smith   }
91f4a53059SBarry Smith   if (b->ctx) {
92f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
93f4a53059SBarry Smith   }
94f4a53059SBarry Smith   if (b->w) {
95f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
96f4a53059SBarry Smith   }
97cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
98b9b97703SBarry Smith   PetscFunctionReturn(0);
99b9b97703SBarry Smith }
100b9b97703SBarry Smith 
1010bad9183SKris Buschelman /*MC
102fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1030bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1040bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1050bad9183SKris Buschelman 
1060bad9183SKris Buschelman   Operations provided:
1070bad9183SKris Buschelman . MatMult
1080bad9183SKris Buschelman . MatMultTranspose
1090bad9183SKris Buschelman . MatMultAdd
1100bad9183SKris Buschelman . MatMultTransposeAdd
1110bad9183SKris Buschelman 
1120bad9183SKris Buschelman   Level: advanced
1130bad9183SKris Buschelman 
1140bad9183SKris Buschelman .seealso: MatCreateSeqDense
1150bad9183SKris Buschelman M*/
1160bad9183SKris Buschelman 
11782b1193eSBarry Smith EXTERN_C_BEGIN
1184a2ae208SSatish Balay #undef __FUNCT__
1194a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
120f4a53059SBarry Smith int MatCreate_MAIJ(Mat A)
12182b1193eSBarry Smith {
122cd3bbe55SBarry Smith   int         ierr;
1234cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
12482b1193eSBarry Smith 
12582b1193eSBarry Smith   PetscFunctionBegin;
126b0a32e0cSBarry Smith   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
127b0a32e0cSBarry Smith   A->data  = (void*)b;
1284cb9d9b8SBarry Smith   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
129cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
130cd3bbe55SBarry Smith   A->factor           = 0;
131cd3bbe55SBarry Smith   A->mapping          = 0;
132f4a53059SBarry Smith 
133cd3bbe55SBarry Smith   b->AIJ  = 0;
134cd3bbe55SBarry Smith   b->dof  = 0;
135f4a53059SBarry Smith   b->OAIJ = 0;
136f4a53059SBarry Smith   b->ctx  = 0;
137f4a53059SBarry Smith   b->w    = 0;
13882b1193eSBarry Smith   PetscFunctionReturn(0);
13982b1193eSBarry Smith }
14082b1193eSBarry Smith EXTERN_C_END
14182b1193eSBarry Smith 
142cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1434a2ae208SSatish Balay #undef __FUNCT__
144b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
145cd3bbe55SBarry Smith int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
14682b1193eSBarry Smith {
1474cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
148bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
14987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2;
150bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
151bcc973b7SBarry Smith   int           n,i,jrow,j;
15282b1193eSBarry Smith 
153bcc973b7SBarry Smith   PetscFunctionBegin;
1542f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1552f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
156bcc973b7SBarry Smith   idx  = a->j;
157bcc973b7SBarry Smith   v    = a->a;
158bcc973b7SBarry Smith   ii   = a->i;
159bcc973b7SBarry Smith 
160bcc973b7SBarry Smith   for (i=0; i<m; i++) {
161bcc973b7SBarry Smith     jrow = ii[i];
162bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
163bcc973b7SBarry Smith     sum1  = 0.0;
164bcc973b7SBarry Smith     sum2  = 0.0;
165bcc973b7SBarry Smith     for (j=0; j<n; j++) {
166bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
167bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
168bcc973b7SBarry Smith       jrow++;
169bcc973b7SBarry Smith      }
170bcc973b7SBarry Smith     y[2*i]   = sum1;
171bcc973b7SBarry Smith     y[2*i+1] = sum2;
172bcc973b7SBarry Smith   }
173bcc973b7SBarry Smith 
174b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
1752f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1762f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
17782b1193eSBarry Smith   PetscFunctionReturn(0);
17882b1193eSBarry Smith }
179bcc973b7SBarry Smith 
1804a2ae208SSatish Balay #undef __FUNCT__
181b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
182cd3bbe55SBarry Smith int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
18382b1193eSBarry Smith {
1844cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
185bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
18687828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
187bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
18882b1193eSBarry Smith 
189bcc973b7SBarry Smith   PetscFunctionBegin;
190bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1912f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1922f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
193bfec09a0SHong Zhang 
194bcc973b7SBarry Smith   for (i=0; i<m; i++) {
195bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
196bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
197bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
198bcc973b7SBarry Smith     alpha1 = x[2*i];
199bcc973b7SBarry Smith     alpha2 = x[2*i+1];
200bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
201bcc973b7SBarry Smith   }
202b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
2032f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2042f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
20582b1193eSBarry Smith   PetscFunctionReturn(0);
20682b1193eSBarry Smith }
207bcc973b7SBarry Smith 
2084a2ae208SSatish Balay #undef __FUNCT__
209b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
210cd3bbe55SBarry Smith int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
21182b1193eSBarry Smith {
2124cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
213bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
21487828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2;
215bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
216bcc973b7SBarry Smith   int           n,i,jrow,j;
21782b1193eSBarry Smith 
218bcc973b7SBarry Smith   PetscFunctionBegin;
219f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2202f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2212f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
222bcc973b7SBarry Smith   idx  = a->j;
223bcc973b7SBarry Smith   v    = a->a;
224bcc973b7SBarry Smith   ii   = a->i;
225bcc973b7SBarry Smith 
226bcc973b7SBarry Smith   for (i=0; i<m; i++) {
227bcc973b7SBarry Smith     jrow = ii[i];
228bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
229bcc973b7SBarry Smith     sum1  = 0.0;
230bcc973b7SBarry Smith     sum2  = 0.0;
231bcc973b7SBarry Smith     for (j=0; j<n; j++) {
232bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
233bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
234bcc973b7SBarry Smith       jrow++;
235bcc973b7SBarry Smith      }
236bcc973b7SBarry Smith     y[2*i]   += sum1;
237bcc973b7SBarry Smith     y[2*i+1] += sum2;
238bcc973b7SBarry Smith   }
239bcc973b7SBarry Smith 
240b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
2412f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2422f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
243bcc973b7SBarry Smith   PetscFunctionReturn(0);
24482b1193eSBarry Smith }
2454a2ae208SSatish Balay #undef __FUNCT__
246b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
247cd3bbe55SBarry Smith int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
24882b1193eSBarry Smith {
2494cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
250bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
25187828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2;
252bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
25382b1193eSBarry Smith 
254bcc973b7SBarry Smith   PetscFunctionBegin;
255f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2562f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2572f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
258bfec09a0SHong Zhang 
259bcc973b7SBarry Smith   for (i=0; i<m; i++) {
260bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
261bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
262bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
263bcc973b7SBarry Smith     alpha1 = x[2*i];
264bcc973b7SBarry Smith     alpha2 = x[2*i+1];
265bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
266bcc973b7SBarry Smith   }
267b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
2682f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2692f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
270bcc973b7SBarry Smith   PetscFunctionReturn(0);
27182b1193eSBarry Smith }
272cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2734a2ae208SSatish Balay #undef __FUNCT__
274b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
275bcc973b7SBarry Smith int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
276bcc973b7SBarry Smith {
2774cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
278bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
27987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
280bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
281bcc973b7SBarry Smith   int           n,i,jrow,j;
28282b1193eSBarry Smith 
283bcc973b7SBarry Smith   PetscFunctionBegin;
2842f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2852f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
286bcc973b7SBarry Smith   idx  = a->j;
287bcc973b7SBarry Smith   v    = a->a;
288bcc973b7SBarry Smith   ii   = a->i;
289bcc973b7SBarry Smith 
290bcc973b7SBarry Smith   for (i=0; i<m; i++) {
291bcc973b7SBarry Smith     jrow = ii[i];
292bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
293bcc973b7SBarry Smith     sum1  = 0.0;
294bcc973b7SBarry Smith     sum2  = 0.0;
295bcc973b7SBarry Smith     sum3  = 0.0;
296bcc973b7SBarry Smith     for (j=0; j<n; j++) {
297bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
298bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
299bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
300bcc973b7SBarry Smith       jrow++;
301bcc973b7SBarry Smith      }
302bcc973b7SBarry Smith     y[3*i]   = sum1;
303bcc973b7SBarry Smith     y[3*i+1] = sum2;
304bcc973b7SBarry Smith     y[3*i+2] = sum3;
305bcc973b7SBarry Smith   }
306bcc973b7SBarry Smith 
307b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*m);
3082f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
3092f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
310bcc973b7SBarry Smith   PetscFunctionReturn(0);
311bcc973b7SBarry Smith }
312bcc973b7SBarry Smith 
3134a2ae208SSatish Balay #undef __FUNCT__
314b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
315bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316bcc973b7SBarry Smith {
3174cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
318bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
31987828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
320bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
321bcc973b7SBarry Smith 
322bcc973b7SBarry Smith   PetscFunctionBegin;
323bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
3242f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
3252f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
326bfec09a0SHong Zhang 
327bcc973b7SBarry Smith   for (i=0; i<m; i++) {
328bfec09a0SHong Zhang     idx    = a->j + a->i[i];
329bfec09a0SHong Zhang     v      = a->a + a->i[i];
330bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
331bcc973b7SBarry Smith     alpha1 = x[3*i];
332bcc973b7SBarry Smith     alpha2 = x[3*i+1];
333bcc973b7SBarry Smith     alpha3 = x[3*i+2];
334bcc973b7SBarry Smith     while (n-->0) {
335bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
336bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
337bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
338bcc973b7SBarry Smith       idx++; v++;
339bcc973b7SBarry Smith     }
340bcc973b7SBarry Smith   }
341b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
3422f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
3432f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
344bcc973b7SBarry Smith   PetscFunctionReturn(0);
345bcc973b7SBarry Smith }
346bcc973b7SBarry Smith 
3474a2ae208SSatish Balay #undef __FUNCT__
348b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
349bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
350bcc973b7SBarry Smith {
3514cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
352bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
35387828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
354bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
355bcc973b7SBarry Smith   int           n,i,jrow,j;
356bcc973b7SBarry Smith 
357bcc973b7SBarry Smith   PetscFunctionBegin;
358f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3592f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
3602f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
361bcc973b7SBarry Smith   idx  = a->j;
362bcc973b7SBarry Smith   v    = a->a;
363bcc973b7SBarry Smith   ii   = a->i;
364bcc973b7SBarry Smith 
365bcc973b7SBarry Smith   for (i=0; i<m; i++) {
366bcc973b7SBarry Smith     jrow = ii[i];
367bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
368bcc973b7SBarry Smith     sum1  = 0.0;
369bcc973b7SBarry Smith     sum2  = 0.0;
370bcc973b7SBarry Smith     sum3  = 0.0;
371bcc973b7SBarry Smith     for (j=0; j<n; j++) {
372bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
373bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
374bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
375bcc973b7SBarry Smith       jrow++;
376bcc973b7SBarry Smith      }
377bcc973b7SBarry Smith     y[3*i]   += sum1;
378bcc973b7SBarry Smith     y[3*i+1] += sum2;
379bcc973b7SBarry Smith     y[3*i+2] += sum3;
380bcc973b7SBarry Smith   }
381bcc973b7SBarry Smith 
382b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
3832f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
3842f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
385bcc973b7SBarry Smith   PetscFunctionReturn(0);
386bcc973b7SBarry Smith }
3874a2ae208SSatish Balay #undef __FUNCT__
388b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
389bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
390bcc973b7SBarry Smith {
3914cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
392bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
39387828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
394bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
395bcc973b7SBarry Smith 
396bcc973b7SBarry Smith   PetscFunctionBegin;
397f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3982f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
3992f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
400bcc973b7SBarry Smith   for (i=0; i<m; i++) {
401bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
402bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
403bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
404bcc973b7SBarry Smith     alpha1 = x[3*i];
405bcc973b7SBarry Smith     alpha2 = x[3*i+1];
406bcc973b7SBarry Smith     alpha3 = x[3*i+2];
407bcc973b7SBarry Smith     while (n-->0) {
408bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
409bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
410bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
411bcc973b7SBarry Smith       idx++; v++;
412bcc973b7SBarry Smith     }
413bcc973b7SBarry Smith   }
414b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
4152f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
4162f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
417bcc973b7SBarry Smith   PetscFunctionReturn(0);
418bcc973b7SBarry Smith }
419bcc973b7SBarry Smith 
420bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4214a2ae208SSatish Balay #undef __FUNCT__
422b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
423bcc973b7SBarry Smith int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
424bcc973b7SBarry Smith {
4254cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
426bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
42787828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
428bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
429bcc973b7SBarry Smith   int           n,i,jrow,j;
430bcc973b7SBarry Smith 
431bcc973b7SBarry Smith   PetscFunctionBegin;
4322f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
4332f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
434bcc973b7SBarry Smith   idx  = a->j;
435bcc973b7SBarry Smith   v    = a->a;
436bcc973b7SBarry Smith   ii   = a->i;
437bcc973b7SBarry Smith 
438bcc973b7SBarry Smith   for (i=0; i<m; i++) {
439bcc973b7SBarry Smith     jrow = ii[i];
440bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
441bcc973b7SBarry Smith     sum1  = 0.0;
442bcc973b7SBarry Smith     sum2  = 0.0;
443bcc973b7SBarry Smith     sum3  = 0.0;
444bcc973b7SBarry Smith     sum4  = 0.0;
445bcc973b7SBarry Smith     for (j=0; j<n; j++) {
446bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
447bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
448bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
449bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
450bcc973b7SBarry Smith       jrow++;
451bcc973b7SBarry Smith      }
452bcc973b7SBarry Smith     y[4*i]   = sum1;
453bcc973b7SBarry Smith     y[4*i+1] = sum2;
454bcc973b7SBarry Smith     y[4*i+2] = sum3;
455bcc973b7SBarry Smith     y[4*i+3] = sum4;
456bcc973b7SBarry Smith   }
457bcc973b7SBarry Smith 
458b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
4592f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
4602f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
461bcc973b7SBarry Smith   PetscFunctionReturn(0);
462bcc973b7SBarry Smith }
463bcc973b7SBarry Smith 
4644a2ae208SSatish Balay #undef __FUNCT__
465b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
466bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467bcc973b7SBarry Smith {
4684cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
469bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
47087828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
472bcc973b7SBarry Smith 
473bcc973b7SBarry Smith   PetscFunctionBegin;
474bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
4752f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
4762f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
477bcc973b7SBarry Smith   for (i=0; i<m; i++) {
478bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
479bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
480bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
481bcc973b7SBarry Smith     alpha1 = x[4*i];
482bcc973b7SBarry Smith     alpha2 = x[4*i+1];
483bcc973b7SBarry Smith     alpha3 = x[4*i+2];
484bcc973b7SBarry Smith     alpha4 = x[4*i+3];
485bcc973b7SBarry Smith     while (n-->0) {
486bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
487bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
488bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
489bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
490bcc973b7SBarry Smith       idx++; v++;
491bcc973b7SBarry Smith     }
492bcc973b7SBarry Smith   }
493b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
4942f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
4952f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
496bcc973b7SBarry Smith   PetscFunctionReturn(0);
497bcc973b7SBarry Smith }
498bcc973b7SBarry Smith 
4994a2ae208SSatish Balay #undef __FUNCT__
500b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
501bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502bcc973b7SBarry Smith {
5034cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
504f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
50587828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
506bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
507f9fae5adSBarry Smith   int           n,i,jrow,j;
508f9fae5adSBarry Smith 
509f9fae5adSBarry Smith   PetscFunctionBegin;
510f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5112f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
5122f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
513f9fae5adSBarry Smith   idx  = a->j;
514f9fae5adSBarry Smith   v    = a->a;
515f9fae5adSBarry Smith   ii   = a->i;
516f9fae5adSBarry Smith 
517f9fae5adSBarry Smith   for (i=0; i<m; i++) {
518f9fae5adSBarry Smith     jrow = ii[i];
519f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
520f9fae5adSBarry Smith     sum1  = 0.0;
521f9fae5adSBarry Smith     sum2  = 0.0;
522f9fae5adSBarry Smith     sum3  = 0.0;
523f9fae5adSBarry Smith     sum4  = 0.0;
524f9fae5adSBarry Smith     for (j=0; j<n; j++) {
525f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
526f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
527f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
528f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
529f9fae5adSBarry Smith       jrow++;
530f9fae5adSBarry Smith      }
531f9fae5adSBarry Smith     y[4*i]   += sum1;
532f9fae5adSBarry Smith     y[4*i+1] += sum2;
533f9fae5adSBarry Smith     y[4*i+2] += sum3;
534f9fae5adSBarry Smith     y[4*i+3] += sum4;
535f9fae5adSBarry Smith   }
536f9fae5adSBarry Smith 
537b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
5382f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
5392f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
540f9fae5adSBarry Smith   PetscFunctionReturn(0);
541bcc973b7SBarry Smith }
5424a2ae208SSatish Balay #undef __FUNCT__
543b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
544bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
545bcc973b7SBarry Smith {
5464cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
547f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
54887828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
549bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
550f9fae5adSBarry Smith 
551f9fae5adSBarry Smith   PetscFunctionBegin;
552f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5532f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
5542f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
555bfec09a0SHong Zhang 
556f9fae5adSBarry Smith   for (i=0; i<m; i++) {
557bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
558bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
559f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
560f9fae5adSBarry Smith     alpha1 = x[4*i];
561f9fae5adSBarry Smith     alpha2 = x[4*i+1];
562f9fae5adSBarry Smith     alpha3 = x[4*i+2];
563f9fae5adSBarry Smith     alpha4 = x[4*i+3];
564f9fae5adSBarry Smith     while (n-->0) {
565f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
566f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
567f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
568f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
569f9fae5adSBarry Smith       idx++; v++;
570f9fae5adSBarry Smith     }
571f9fae5adSBarry Smith   }
572b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
5732f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
5742f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
575f9fae5adSBarry Smith   PetscFunctionReturn(0);
576f9fae5adSBarry Smith }
577f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
5786cd98798SBarry Smith 
5794a2ae208SSatish Balay #undef __FUNCT__
580b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
581f9fae5adSBarry Smith int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
582f9fae5adSBarry Smith {
5834cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
584f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
58587828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
586bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
587f9fae5adSBarry Smith   int           n,i,jrow,j;
588f9fae5adSBarry Smith 
589f9fae5adSBarry Smith   PetscFunctionBegin;
5902f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
5912f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
592f9fae5adSBarry Smith   idx  = a->j;
593f9fae5adSBarry Smith   v    = a->a;
594f9fae5adSBarry Smith   ii   = a->i;
595f9fae5adSBarry Smith 
596f9fae5adSBarry Smith   for (i=0; i<m; i++) {
597f9fae5adSBarry Smith     jrow = ii[i];
598f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
599f9fae5adSBarry Smith     sum1  = 0.0;
600f9fae5adSBarry Smith     sum2  = 0.0;
601f9fae5adSBarry Smith     sum3  = 0.0;
602f9fae5adSBarry Smith     sum4  = 0.0;
603f9fae5adSBarry Smith     sum5  = 0.0;
604f9fae5adSBarry Smith     for (j=0; j<n; j++) {
605f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
606f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
607f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
608f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
609f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
610f9fae5adSBarry Smith       jrow++;
611f9fae5adSBarry Smith      }
612f9fae5adSBarry Smith     y[5*i]   = sum1;
613f9fae5adSBarry Smith     y[5*i+1] = sum2;
614f9fae5adSBarry Smith     y[5*i+2] = sum3;
615f9fae5adSBarry Smith     y[5*i+3] = sum4;
616f9fae5adSBarry Smith     y[5*i+4] = sum5;
617f9fae5adSBarry Smith   }
618f9fae5adSBarry Smith 
619b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*m);
6202f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
6212f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
622f9fae5adSBarry Smith   PetscFunctionReturn(0);
623f9fae5adSBarry Smith }
624f9fae5adSBarry Smith 
6254a2ae208SSatish Balay #undef __FUNCT__
626b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
627f9fae5adSBarry Smith int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
628f9fae5adSBarry Smith {
6294cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
630f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
63187828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
632bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
633f9fae5adSBarry Smith 
634f9fae5adSBarry Smith   PetscFunctionBegin;
635f9fae5adSBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
6362f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
6372f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
638bfec09a0SHong Zhang 
639f9fae5adSBarry Smith   for (i=0; i<m; i++) {
640bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
641bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
642f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
643f9fae5adSBarry Smith     alpha1 = x[5*i];
644f9fae5adSBarry Smith     alpha2 = x[5*i+1];
645f9fae5adSBarry Smith     alpha3 = x[5*i+2];
646f9fae5adSBarry Smith     alpha4 = x[5*i+3];
647f9fae5adSBarry Smith     alpha5 = x[5*i+4];
648f9fae5adSBarry Smith     while (n-->0) {
649f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
650f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
651f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
652f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
653f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
654f9fae5adSBarry Smith       idx++; v++;
655f9fae5adSBarry Smith     }
656f9fae5adSBarry Smith   }
657b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
6582f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
6592f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
660f9fae5adSBarry Smith   PetscFunctionReturn(0);
661f9fae5adSBarry Smith }
662f9fae5adSBarry Smith 
6634a2ae208SSatish Balay #undef __FUNCT__
664b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
665f9fae5adSBarry Smith int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
666f9fae5adSBarry Smith {
6674cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
668f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
66987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
671f9fae5adSBarry Smith   int           n,i,jrow,j;
672f9fae5adSBarry Smith 
673f9fae5adSBarry Smith   PetscFunctionBegin;
674f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6752f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
6762f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
677f9fae5adSBarry Smith   idx  = a->j;
678f9fae5adSBarry Smith   v    = a->a;
679f9fae5adSBarry Smith   ii   = a->i;
680f9fae5adSBarry Smith 
681f9fae5adSBarry Smith   for (i=0; i<m; i++) {
682f9fae5adSBarry Smith     jrow = ii[i];
683f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
684f9fae5adSBarry Smith     sum1  = 0.0;
685f9fae5adSBarry Smith     sum2  = 0.0;
686f9fae5adSBarry Smith     sum3  = 0.0;
687f9fae5adSBarry Smith     sum4  = 0.0;
688f9fae5adSBarry Smith     sum5  = 0.0;
689f9fae5adSBarry Smith     for (j=0; j<n; j++) {
690f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
691f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
692f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
693f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
694f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
695f9fae5adSBarry Smith       jrow++;
696f9fae5adSBarry Smith      }
697f9fae5adSBarry Smith     y[5*i]   += sum1;
698f9fae5adSBarry Smith     y[5*i+1] += sum2;
699f9fae5adSBarry Smith     y[5*i+2] += sum3;
700f9fae5adSBarry Smith     y[5*i+3] += sum4;
701f9fae5adSBarry Smith     y[5*i+4] += sum5;
702f9fae5adSBarry Smith   }
703f9fae5adSBarry Smith 
704b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
7052f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
7062f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
707f9fae5adSBarry Smith   PetscFunctionReturn(0);
708f9fae5adSBarry Smith }
709f9fae5adSBarry Smith 
7104a2ae208SSatish Balay #undef __FUNCT__
711b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
712f9fae5adSBarry Smith int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713f9fae5adSBarry Smith {
7144cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
715f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
71687828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
717bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
718f9fae5adSBarry Smith 
719f9fae5adSBarry Smith   PetscFunctionBegin;
720f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7212f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
7222f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
723bfec09a0SHong Zhang 
724f9fae5adSBarry Smith   for (i=0; i<m; i++) {
725bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
726bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
727f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
728f9fae5adSBarry Smith     alpha1 = x[5*i];
729f9fae5adSBarry Smith     alpha2 = x[5*i+1];
730f9fae5adSBarry Smith     alpha3 = x[5*i+2];
731f9fae5adSBarry Smith     alpha4 = x[5*i+3];
732f9fae5adSBarry Smith     alpha5 = x[5*i+4];
733f9fae5adSBarry Smith     while (n-->0) {
734f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
735f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
736f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
737f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
738f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
739f9fae5adSBarry Smith       idx++; v++;
740f9fae5adSBarry Smith     }
741f9fae5adSBarry Smith   }
742b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
7432f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
7442f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
745f9fae5adSBarry Smith   PetscFunctionReturn(0);
746bcc973b7SBarry Smith }
747bcc973b7SBarry Smith 
7486cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7494a2ae208SSatish Balay #undef __FUNCT__
750b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
7516cd98798SBarry Smith int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
7526cd98798SBarry Smith {
7536cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
7546cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
75587828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
756bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
7576cd98798SBarry Smith   int           n,i,jrow,j;
7586cd98798SBarry Smith 
7596cd98798SBarry Smith   PetscFunctionBegin;
7602f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
7612f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
7626cd98798SBarry Smith   idx  = a->j;
7636cd98798SBarry Smith   v    = a->a;
7646cd98798SBarry Smith   ii   = a->i;
7656cd98798SBarry Smith 
7666cd98798SBarry Smith   for (i=0; i<m; i++) {
7676cd98798SBarry Smith     jrow = ii[i];
7686cd98798SBarry Smith     n    = ii[i+1] - jrow;
7696cd98798SBarry Smith     sum1  = 0.0;
7706cd98798SBarry Smith     sum2  = 0.0;
7716cd98798SBarry Smith     sum3  = 0.0;
7726cd98798SBarry Smith     sum4  = 0.0;
7736cd98798SBarry Smith     sum5  = 0.0;
7746cd98798SBarry Smith     sum6  = 0.0;
7756cd98798SBarry Smith     for (j=0; j<n; j++) {
7766cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
7776cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
7786cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
7796cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
7806cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
7816cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
7826cd98798SBarry Smith       jrow++;
7836cd98798SBarry Smith      }
7846cd98798SBarry Smith     y[6*i]   = sum1;
7856cd98798SBarry Smith     y[6*i+1] = sum2;
7866cd98798SBarry Smith     y[6*i+2] = sum3;
7876cd98798SBarry Smith     y[6*i+3] = sum4;
7886cd98798SBarry Smith     y[6*i+4] = sum5;
7896cd98798SBarry Smith     y[6*i+5] = sum6;
7906cd98798SBarry Smith   }
7916cd98798SBarry Smith 
792b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*m);
7932f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
7942f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
7956cd98798SBarry Smith   PetscFunctionReturn(0);
7966cd98798SBarry Smith }
7976cd98798SBarry Smith 
7984a2ae208SSatish Balay #undef __FUNCT__
799b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
8006cd98798SBarry Smith int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8016cd98798SBarry Smith {
8026cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
8036cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
80487828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
805bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
8066cd98798SBarry Smith 
8076cd98798SBarry Smith   PetscFunctionBegin;
8086cd98798SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
8092f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
8102f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
811bfec09a0SHong Zhang 
8126cd98798SBarry Smith   for (i=0; i<m; i++) {
813bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
814bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
8156cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8166cd98798SBarry Smith     alpha1 = x[6*i];
8176cd98798SBarry Smith     alpha2 = x[6*i+1];
8186cd98798SBarry Smith     alpha3 = x[6*i+2];
8196cd98798SBarry Smith     alpha4 = x[6*i+3];
8206cd98798SBarry Smith     alpha5 = x[6*i+4];
8216cd98798SBarry Smith     alpha6 = x[6*i+5];
8226cd98798SBarry Smith     while (n-->0) {
8236cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8246cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8256cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8266cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8276cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8286cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8296cd98798SBarry Smith       idx++; v++;
8306cd98798SBarry Smith     }
8316cd98798SBarry Smith   }
832b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
8332f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
8342f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
8356cd98798SBarry Smith   PetscFunctionReturn(0);
8366cd98798SBarry Smith }
8376cd98798SBarry Smith 
8384a2ae208SSatish Balay #undef __FUNCT__
839b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
8406cd98798SBarry Smith int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8416cd98798SBarry Smith {
8426cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
8436cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
84487828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
8466cd98798SBarry Smith   int           n,i,jrow,j;
8476cd98798SBarry Smith 
8486cd98798SBarry Smith   PetscFunctionBegin;
8496cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8502f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
8512f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
8526cd98798SBarry Smith   idx  = a->j;
8536cd98798SBarry Smith   v    = a->a;
8546cd98798SBarry Smith   ii   = a->i;
8556cd98798SBarry Smith 
8566cd98798SBarry Smith   for (i=0; i<m; i++) {
8576cd98798SBarry Smith     jrow = ii[i];
8586cd98798SBarry Smith     n    = ii[i+1] - jrow;
8596cd98798SBarry Smith     sum1  = 0.0;
8606cd98798SBarry Smith     sum2  = 0.0;
8616cd98798SBarry Smith     sum3  = 0.0;
8626cd98798SBarry Smith     sum4  = 0.0;
8636cd98798SBarry Smith     sum5  = 0.0;
8646cd98798SBarry Smith     sum6  = 0.0;
8656cd98798SBarry Smith     for (j=0; j<n; j++) {
8666cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8676cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8686cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8696cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8706cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8716cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8726cd98798SBarry Smith       jrow++;
8736cd98798SBarry Smith      }
8746cd98798SBarry Smith     y[6*i]   += sum1;
8756cd98798SBarry Smith     y[6*i+1] += sum2;
8766cd98798SBarry Smith     y[6*i+2] += sum3;
8776cd98798SBarry Smith     y[6*i+3] += sum4;
8786cd98798SBarry Smith     y[6*i+4] += sum5;
8796cd98798SBarry Smith     y[6*i+5] += sum6;
8806cd98798SBarry Smith   }
8816cd98798SBarry Smith 
882b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
8832f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
8842f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
8856cd98798SBarry Smith   PetscFunctionReturn(0);
8866cd98798SBarry Smith }
8876cd98798SBarry Smith 
8884a2ae208SSatish Balay #undef __FUNCT__
889b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
8906cd98798SBarry Smith int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8916cd98798SBarry Smith {
8926cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
8936cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
89487828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
895bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
8966cd98798SBarry Smith 
8976cd98798SBarry Smith   PetscFunctionBegin;
8986cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8992f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
9002f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
901bfec09a0SHong Zhang 
9026cd98798SBarry Smith   for (i=0; i<m; i++) {
903bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
904bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9056cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9066cd98798SBarry Smith     alpha1 = x[6*i];
9076cd98798SBarry Smith     alpha2 = x[6*i+1];
9086cd98798SBarry Smith     alpha3 = x[6*i+2];
9096cd98798SBarry Smith     alpha4 = x[6*i+3];
9106cd98798SBarry Smith     alpha5 = x[6*i+4];
9116cd98798SBarry Smith     alpha6 = x[6*i+5];
9126cd98798SBarry Smith     while (n-->0) {
9136cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9146cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9156cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9166cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9176cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9186cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9196cd98798SBarry Smith       idx++; v++;
9206cd98798SBarry Smith     }
9216cd98798SBarry Smith   }
922b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
9232f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
9242f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
9256cd98798SBarry Smith   PetscFunctionReturn(0);
9266cd98798SBarry Smith }
9276cd98798SBarry Smith 
92866ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
92966ed3db0SBarry Smith #undef __FUNCT__
93066ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
93166ed3db0SBarry Smith int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
93266ed3db0SBarry Smith {
93366ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
93466ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
93587828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
936bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
93766ed3db0SBarry Smith   int           n,i,jrow,j;
93866ed3db0SBarry Smith 
93966ed3db0SBarry Smith   PetscFunctionBegin;
9402f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
9412f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
94266ed3db0SBarry Smith   idx  = a->j;
94366ed3db0SBarry Smith   v    = a->a;
94466ed3db0SBarry Smith   ii   = a->i;
94566ed3db0SBarry Smith 
94666ed3db0SBarry Smith   for (i=0; i<m; i++) {
94766ed3db0SBarry Smith     jrow = ii[i];
94866ed3db0SBarry Smith     n    = ii[i+1] - jrow;
94966ed3db0SBarry Smith     sum1  = 0.0;
95066ed3db0SBarry Smith     sum2  = 0.0;
95166ed3db0SBarry Smith     sum3  = 0.0;
95266ed3db0SBarry Smith     sum4  = 0.0;
95366ed3db0SBarry Smith     sum5  = 0.0;
95466ed3db0SBarry Smith     sum6  = 0.0;
95566ed3db0SBarry Smith     sum7  = 0.0;
95666ed3db0SBarry Smith     sum8  = 0.0;
95766ed3db0SBarry Smith     for (j=0; j<n; j++) {
95866ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
95966ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
96066ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
96166ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
96266ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
96366ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
96466ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
96566ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
96666ed3db0SBarry Smith       jrow++;
96766ed3db0SBarry Smith      }
96866ed3db0SBarry Smith     y[8*i]   = sum1;
96966ed3db0SBarry Smith     y[8*i+1] = sum2;
97066ed3db0SBarry Smith     y[8*i+2] = sum3;
97166ed3db0SBarry Smith     y[8*i+3] = sum4;
97266ed3db0SBarry Smith     y[8*i+4] = sum5;
97366ed3db0SBarry Smith     y[8*i+5] = sum6;
97466ed3db0SBarry Smith     y[8*i+6] = sum7;
97566ed3db0SBarry Smith     y[8*i+7] = sum8;
97666ed3db0SBarry Smith   }
97766ed3db0SBarry Smith 
97866ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*m);
9792f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
9802f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
98166ed3db0SBarry Smith   PetscFunctionReturn(0);
98266ed3db0SBarry Smith }
98366ed3db0SBarry Smith 
98466ed3db0SBarry Smith #undef __FUNCT__
98566ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
98666ed3db0SBarry Smith int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
98766ed3db0SBarry Smith {
98866ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
98966ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
99087828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
991bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
99266ed3db0SBarry Smith 
99366ed3db0SBarry Smith   PetscFunctionBegin;
99466ed3db0SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
9952f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
9962f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
997bfec09a0SHong Zhang 
99866ed3db0SBarry Smith   for (i=0; i<m; i++) {
999bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1000bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
100166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
100266ed3db0SBarry Smith     alpha1 = x[8*i];
100366ed3db0SBarry Smith     alpha2 = x[8*i+1];
100466ed3db0SBarry Smith     alpha3 = x[8*i+2];
100566ed3db0SBarry Smith     alpha4 = x[8*i+3];
100666ed3db0SBarry Smith     alpha5 = x[8*i+4];
100766ed3db0SBarry Smith     alpha6 = x[8*i+5];
100866ed3db0SBarry Smith     alpha7 = x[8*i+6];
100966ed3db0SBarry Smith     alpha8 = x[8*i+7];
101066ed3db0SBarry Smith     while (n-->0) {
101166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
101266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
101366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
101466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
101566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
101666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
101766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
101866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
101966ed3db0SBarry Smith       idx++; v++;
102066ed3db0SBarry Smith     }
102166ed3db0SBarry Smith   }
102266ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
10232f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
10242f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
102566ed3db0SBarry Smith   PetscFunctionReturn(0);
102666ed3db0SBarry Smith }
102766ed3db0SBarry Smith 
102866ed3db0SBarry Smith #undef __FUNCT__
102966ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
103066ed3db0SBarry Smith int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
103166ed3db0SBarry Smith {
103266ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
103366ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
103487828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1035bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
103666ed3db0SBarry Smith   int           n,i,jrow,j;
103766ed3db0SBarry Smith 
103866ed3db0SBarry Smith   PetscFunctionBegin;
103966ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10402f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
10412f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
104266ed3db0SBarry Smith   idx  = a->j;
104366ed3db0SBarry Smith   v    = a->a;
104466ed3db0SBarry Smith   ii   = a->i;
104566ed3db0SBarry Smith 
104666ed3db0SBarry Smith   for (i=0; i<m; i++) {
104766ed3db0SBarry Smith     jrow = ii[i];
104866ed3db0SBarry Smith     n    = ii[i+1] - jrow;
104966ed3db0SBarry Smith     sum1  = 0.0;
105066ed3db0SBarry Smith     sum2  = 0.0;
105166ed3db0SBarry Smith     sum3  = 0.0;
105266ed3db0SBarry Smith     sum4  = 0.0;
105366ed3db0SBarry Smith     sum5  = 0.0;
105466ed3db0SBarry Smith     sum6  = 0.0;
105566ed3db0SBarry Smith     sum7  = 0.0;
105666ed3db0SBarry Smith     sum8  = 0.0;
105766ed3db0SBarry Smith     for (j=0; j<n; j++) {
105866ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
105966ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
106066ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
106166ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
106266ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
106366ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
106466ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
106566ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
106666ed3db0SBarry Smith       jrow++;
106766ed3db0SBarry Smith      }
106866ed3db0SBarry Smith     y[8*i]   += sum1;
106966ed3db0SBarry Smith     y[8*i+1] += sum2;
107066ed3db0SBarry Smith     y[8*i+2] += sum3;
107166ed3db0SBarry Smith     y[8*i+3] += sum4;
107266ed3db0SBarry Smith     y[8*i+4] += sum5;
107366ed3db0SBarry Smith     y[8*i+5] += sum6;
107466ed3db0SBarry Smith     y[8*i+6] += sum7;
107566ed3db0SBarry Smith     y[8*i+7] += sum8;
107666ed3db0SBarry Smith   }
107766ed3db0SBarry Smith 
107866ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
10792f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
10802f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
108166ed3db0SBarry Smith   PetscFunctionReturn(0);
108266ed3db0SBarry Smith }
108366ed3db0SBarry Smith 
108466ed3db0SBarry Smith #undef __FUNCT__
108566ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
108666ed3db0SBarry Smith int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
108766ed3db0SBarry Smith {
108866ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
108966ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
109087828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1091bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
109266ed3db0SBarry Smith 
109366ed3db0SBarry Smith   PetscFunctionBegin;
109466ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10952f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
10962f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
109766ed3db0SBarry Smith   for (i=0; i<m; i++) {
1098bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1099bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
110066ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
110166ed3db0SBarry Smith     alpha1 = x[8*i];
110266ed3db0SBarry Smith     alpha2 = x[8*i+1];
110366ed3db0SBarry Smith     alpha3 = x[8*i+2];
110466ed3db0SBarry Smith     alpha4 = x[8*i+3];
110566ed3db0SBarry Smith     alpha5 = x[8*i+4];
110666ed3db0SBarry Smith     alpha6 = x[8*i+5];
110766ed3db0SBarry Smith     alpha7 = x[8*i+6];
110866ed3db0SBarry Smith     alpha8 = x[8*i+7];
110966ed3db0SBarry Smith     while (n-->0) {
111066ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
111166ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
111266ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
111366ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
111466ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
111566ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
111666ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
111766ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
111866ed3db0SBarry Smith       idx++; v++;
111966ed3db0SBarry Smith     }
112066ed3db0SBarry Smith   }
112166ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
11222f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
11232f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
11242f7816d4SBarry Smith   PetscFunctionReturn(0);
11252f7816d4SBarry Smith }
11262f7816d4SBarry Smith 
11272b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
11282b692628SMatthew Knepley #undef __FUNCT__
11292b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
11302b692628SMatthew Knepley int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
11312b692628SMatthew Knepley {
11322b692628SMatthew Knepley   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
11332b692628SMatthew Knepley   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
11342b692628SMatthew Knepley   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
11352b692628SMatthew Knepley   int           ierr,m = b->AIJ->m,*idx,*ii;
11362b692628SMatthew Knepley   int           n,i,jrow,j;
11372b692628SMatthew Knepley 
11382b692628SMatthew Knepley   PetscFunctionBegin;
11392b692628SMatthew Knepley   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
11402b692628SMatthew Knepley   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
11412b692628SMatthew Knepley   idx  = a->j;
11422b692628SMatthew Knepley   v    = a->a;
11432b692628SMatthew Knepley   ii   = a->i;
11442b692628SMatthew Knepley 
11452b692628SMatthew Knepley   for (i=0; i<m; i++) {
11462b692628SMatthew Knepley     jrow = ii[i];
11472b692628SMatthew Knepley     n    = ii[i+1] - jrow;
11482b692628SMatthew Knepley     sum1  = 0.0;
11492b692628SMatthew Knepley     sum2  = 0.0;
11502b692628SMatthew Knepley     sum3  = 0.0;
11512b692628SMatthew Knepley     sum4  = 0.0;
11522b692628SMatthew Knepley     sum5  = 0.0;
11532b692628SMatthew Knepley     sum6  = 0.0;
11542b692628SMatthew Knepley     sum7  = 0.0;
11552b692628SMatthew Knepley     sum8  = 0.0;
11562b692628SMatthew Knepley     sum9  = 0.0;
11572b692628SMatthew Knepley     for (j=0; j<n; j++) {
11582b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
11592b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
11602b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
11612b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
11622b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
11632b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
11642b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
11652b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
11662b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
11672b692628SMatthew Knepley       jrow++;
11682b692628SMatthew Knepley      }
11692b692628SMatthew Knepley     y[9*i]   = sum1;
11702b692628SMatthew Knepley     y[9*i+1] = sum2;
11712b692628SMatthew Knepley     y[9*i+2] = sum3;
11722b692628SMatthew Knepley     y[9*i+3] = sum4;
11732b692628SMatthew Knepley     y[9*i+4] = sum5;
11742b692628SMatthew Knepley     y[9*i+5] = sum6;
11752b692628SMatthew Knepley     y[9*i+6] = sum7;
11762b692628SMatthew Knepley     y[9*i+7] = sum8;
11772b692628SMatthew Knepley     y[9*i+8] = sum9;
11782b692628SMatthew Knepley   }
11792b692628SMatthew Knepley 
11802b692628SMatthew Knepley   PetscLogFlops(18*a->nz - 9*m);
11812b692628SMatthew Knepley   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
11822b692628SMatthew Knepley   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
11832b692628SMatthew Knepley   PetscFunctionReturn(0);
11842b692628SMatthew Knepley }
11852b692628SMatthew Knepley 
11862b692628SMatthew Knepley #undef __FUNCT__
11872b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
11882b692628SMatthew Knepley int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
11892b692628SMatthew Knepley {
11902b692628SMatthew Knepley   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
11912b692628SMatthew Knepley   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
11922b692628SMatthew Knepley   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
11932b692628SMatthew Knepley   int           ierr,m = b->AIJ->m,n,i,*idx;
11942b692628SMatthew Knepley 
11952b692628SMatthew Knepley   PetscFunctionBegin;
11962b692628SMatthew Knepley   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
11972b692628SMatthew Knepley   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
11982b692628SMatthew Knepley   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
11992b692628SMatthew Knepley 
12002b692628SMatthew Knepley   for (i=0; i<m; i++) {
12012b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
12022b692628SMatthew Knepley     v      = a->a + a->i[i] ;
12032b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
12042b692628SMatthew Knepley     alpha1 = x[9*i];
12052b692628SMatthew Knepley     alpha2 = x[9*i+1];
12062b692628SMatthew Knepley     alpha3 = x[9*i+2];
12072b692628SMatthew Knepley     alpha4 = x[9*i+3];
12082b692628SMatthew Knepley     alpha5 = x[9*i+4];
12092b692628SMatthew Knepley     alpha6 = x[9*i+5];
12102b692628SMatthew Knepley     alpha7 = x[9*i+6];
12112b692628SMatthew Knepley     alpha8 = x[9*i+7];
1212*2f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
12132b692628SMatthew Knepley     while (n-->0) {
12142b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
12152b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
12162b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
12172b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
12182b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
12192b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
12202b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
12212b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
12222b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
12232b692628SMatthew Knepley       idx++; v++;
12242b692628SMatthew Knepley     }
12252b692628SMatthew Knepley   }
12262b692628SMatthew Knepley   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
12272b692628SMatthew Knepley   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
12282b692628SMatthew Knepley   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
12292b692628SMatthew Knepley   PetscFunctionReturn(0);
12302b692628SMatthew Knepley }
12312b692628SMatthew Knepley 
12322b692628SMatthew Knepley #undef __FUNCT__
12332b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
12342b692628SMatthew Knepley int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
12352b692628SMatthew Knepley {
12362b692628SMatthew Knepley   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
12372b692628SMatthew Knepley   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
12382b692628SMatthew Knepley   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
12392b692628SMatthew Knepley   int           ierr,m = b->AIJ->m,*idx,*ii;
12402b692628SMatthew Knepley   int           n,i,jrow,j;
12412b692628SMatthew Knepley 
12422b692628SMatthew Knepley   PetscFunctionBegin;
12432b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12442b692628SMatthew Knepley   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
12452b692628SMatthew Knepley   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
12462b692628SMatthew Knepley   idx  = a->j;
12472b692628SMatthew Knepley   v    = a->a;
12482b692628SMatthew Knepley   ii   = a->i;
12492b692628SMatthew Knepley 
12502b692628SMatthew Knepley   for (i=0; i<m; i++) {
12512b692628SMatthew Knepley     jrow = ii[i];
12522b692628SMatthew Knepley     n    = ii[i+1] - jrow;
12532b692628SMatthew Knepley     sum1  = 0.0;
12542b692628SMatthew Knepley     sum2  = 0.0;
12552b692628SMatthew Knepley     sum3  = 0.0;
12562b692628SMatthew Knepley     sum4  = 0.0;
12572b692628SMatthew Knepley     sum5  = 0.0;
12582b692628SMatthew Knepley     sum6  = 0.0;
12592b692628SMatthew Knepley     sum7  = 0.0;
12602b692628SMatthew Knepley     sum8  = 0.0;
12612b692628SMatthew Knepley     sum9  = 0.0;
12622b692628SMatthew Knepley     for (j=0; j<n; j++) {
12632b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
12642b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
12652b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
12662b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
12672b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
12682b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
12692b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
12702b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
12712b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
12722b692628SMatthew Knepley       jrow++;
12732b692628SMatthew Knepley      }
12742b692628SMatthew Knepley     y[9*i]   += sum1;
12752b692628SMatthew Knepley     y[9*i+1] += sum2;
12762b692628SMatthew Knepley     y[9*i+2] += sum3;
12772b692628SMatthew Knepley     y[9*i+3] += sum4;
12782b692628SMatthew Knepley     y[9*i+4] += sum5;
12792b692628SMatthew Knepley     y[9*i+5] += sum6;
12802b692628SMatthew Knepley     y[9*i+6] += sum7;
12812b692628SMatthew Knepley     y[9*i+7] += sum8;
12822b692628SMatthew Knepley     y[9*i+8] += sum9;
12832b692628SMatthew Knepley   }
12842b692628SMatthew Knepley 
12852b692628SMatthew Knepley   PetscLogFlops(18*a->nz);
12862b692628SMatthew Knepley   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
12872b692628SMatthew Knepley   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
12882b692628SMatthew Knepley   PetscFunctionReturn(0);
12892b692628SMatthew Knepley }
12902b692628SMatthew Knepley 
12912b692628SMatthew Knepley #undef __FUNCT__
12922b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
12932b692628SMatthew Knepley int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
12942b692628SMatthew Knepley {
12952b692628SMatthew Knepley   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
12962b692628SMatthew Knepley   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
12972b692628SMatthew Knepley   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
12982b692628SMatthew Knepley   int           ierr,m = b->AIJ->m,n,i,*idx;
12992b692628SMatthew Knepley 
13002b692628SMatthew Knepley   PetscFunctionBegin;
13012b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13022b692628SMatthew Knepley   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
13032b692628SMatthew Knepley   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
13042b692628SMatthew Knepley   for (i=0; i<m; i++) {
13052b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
13062b692628SMatthew Knepley     v      = a->a + a->i[i] ;
13072b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
13082b692628SMatthew Knepley     alpha1 = x[9*i];
13092b692628SMatthew Knepley     alpha2 = x[9*i+1];
13102b692628SMatthew Knepley     alpha3 = x[9*i+2];
13112b692628SMatthew Knepley     alpha4 = x[9*i+3];
13122b692628SMatthew Knepley     alpha5 = x[9*i+4];
13132b692628SMatthew Knepley     alpha6 = x[9*i+5];
13142b692628SMatthew Knepley     alpha7 = x[9*i+6];
13152b692628SMatthew Knepley     alpha8 = x[9*i+7];
13162b692628SMatthew Knepley     alpha9 = x[9*i+8];
13172b692628SMatthew Knepley     while (n-->0) {
13182b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
13192b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
13202b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
13212b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
13222b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
13232b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
13242b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
13252b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
13262b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
13272b692628SMatthew Knepley       idx++; v++;
13282b692628SMatthew Knepley     }
13292b692628SMatthew Knepley   }
13302b692628SMatthew Knepley   PetscLogFlops(18*a->nz);
13312b692628SMatthew Knepley   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
13322b692628SMatthew Knepley   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
13332b692628SMatthew Knepley   PetscFunctionReturn(0);
13342b692628SMatthew Knepley }
13352b692628SMatthew Knepley 
13362f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
13372f7816d4SBarry Smith #undef __FUNCT__
13382f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
13392f7816d4SBarry Smith int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
13402f7816d4SBarry Smith {
13412f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
13422f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
13432f7816d4SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
13442f7816d4SBarry Smith   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1345bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
13462f7816d4SBarry Smith   int           n,i,jrow,j;
13472f7816d4SBarry Smith 
13482f7816d4SBarry Smith   PetscFunctionBegin;
13492f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
13502f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
13512f7816d4SBarry Smith   idx  = a->j;
13522f7816d4SBarry Smith   v    = a->a;
13532f7816d4SBarry Smith   ii   = a->i;
13542f7816d4SBarry Smith 
13552f7816d4SBarry Smith   for (i=0; i<m; i++) {
13562f7816d4SBarry Smith     jrow = ii[i];
13572f7816d4SBarry Smith     n    = ii[i+1] - jrow;
13582f7816d4SBarry Smith     sum1  = 0.0;
13592f7816d4SBarry Smith     sum2  = 0.0;
13602f7816d4SBarry Smith     sum3  = 0.0;
13612f7816d4SBarry Smith     sum4  = 0.0;
13622f7816d4SBarry Smith     sum5  = 0.0;
13632f7816d4SBarry Smith     sum6  = 0.0;
13642f7816d4SBarry Smith     sum7  = 0.0;
13652f7816d4SBarry Smith     sum8  = 0.0;
13662f7816d4SBarry Smith     sum9  = 0.0;
13672f7816d4SBarry Smith     sum10 = 0.0;
13682f7816d4SBarry Smith     sum11 = 0.0;
13692f7816d4SBarry Smith     sum12 = 0.0;
13702f7816d4SBarry Smith     sum13 = 0.0;
13712f7816d4SBarry Smith     sum14 = 0.0;
13722f7816d4SBarry Smith     sum15 = 0.0;
13732f7816d4SBarry Smith     sum16 = 0.0;
13742f7816d4SBarry Smith     for (j=0; j<n; j++) {
13752f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
13762f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
13772f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
13782f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
13792f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
13802f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
13812f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
13822f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
13832f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
13842f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
13852f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
13862f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
13872f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
13882f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
13892f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
13902f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
13912f7816d4SBarry Smith       jrow++;
13922f7816d4SBarry Smith      }
13932f7816d4SBarry Smith     y[16*i]    = sum1;
13942f7816d4SBarry Smith     y[16*i+1]  = sum2;
13952f7816d4SBarry Smith     y[16*i+2]  = sum3;
13962f7816d4SBarry Smith     y[16*i+3]  = sum4;
13972f7816d4SBarry Smith     y[16*i+4]  = sum5;
13982f7816d4SBarry Smith     y[16*i+5]  = sum6;
13992f7816d4SBarry Smith     y[16*i+6]  = sum7;
14002f7816d4SBarry Smith     y[16*i+7]  = sum8;
14012f7816d4SBarry Smith     y[16*i+8]  = sum9;
14022f7816d4SBarry Smith     y[16*i+9]  = sum10;
14032f7816d4SBarry Smith     y[16*i+10] = sum11;
14042f7816d4SBarry Smith     y[16*i+11] = sum12;
14052f7816d4SBarry Smith     y[16*i+12] = sum13;
14062f7816d4SBarry Smith     y[16*i+13] = sum14;
14072f7816d4SBarry Smith     y[16*i+14] = sum15;
14082f7816d4SBarry Smith     y[16*i+15] = sum16;
14092f7816d4SBarry Smith   }
14102f7816d4SBarry Smith 
14112f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*m);
14122f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
14132f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
14142f7816d4SBarry Smith   PetscFunctionReturn(0);
14152f7816d4SBarry Smith }
14162f7816d4SBarry Smith 
14172f7816d4SBarry Smith #undef __FUNCT__
14182f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
14192f7816d4SBarry Smith int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
14202f7816d4SBarry Smith {
14212f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
14222f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
14232f7816d4SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
14242f7816d4SBarry Smith   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1425bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
14262f7816d4SBarry Smith 
14272f7816d4SBarry Smith   PetscFunctionBegin;
14282f7816d4SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
14292f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
14302f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1431bfec09a0SHong Zhang 
14322f7816d4SBarry Smith   for (i=0; i<m; i++) {
1433bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1434bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
14352f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
14362f7816d4SBarry Smith     alpha1  = x[16*i];
14372f7816d4SBarry Smith     alpha2  = x[16*i+1];
14382f7816d4SBarry Smith     alpha3  = x[16*i+2];
14392f7816d4SBarry Smith     alpha4  = x[16*i+3];
14402f7816d4SBarry Smith     alpha5  = x[16*i+4];
14412f7816d4SBarry Smith     alpha6  = x[16*i+5];
14422f7816d4SBarry Smith     alpha7  = x[16*i+6];
14432f7816d4SBarry Smith     alpha8  = x[16*i+7];
14442f7816d4SBarry Smith     alpha9  = x[16*i+8];
14452f7816d4SBarry Smith     alpha10 = x[16*i+9];
14462f7816d4SBarry Smith     alpha11 = x[16*i+10];
14472f7816d4SBarry Smith     alpha12 = x[16*i+11];
14482f7816d4SBarry Smith     alpha13 = x[16*i+12];
14492f7816d4SBarry Smith     alpha14 = x[16*i+13];
14502f7816d4SBarry Smith     alpha15 = x[16*i+14];
14512f7816d4SBarry Smith     alpha16 = x[16*i+15];
14522f7816d4SBarry Smith     while (n-->0) {
14532f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
14542f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
14552f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
14562f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
14572f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
14582f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
14592f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
14602f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
14612f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
14622f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
14632f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
14642f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
14652f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
14662f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
14672f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
14682f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
14692f7816d4SBarry Smith       idx++; v++;
14702f7816d4SBarry Smith     }
14712f7816d4SBarry Smith   }
14722f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
14732f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
14742f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
14752f7816d4SBarry Smith   PetscFunctionReturn(0);
14762f7816d4SBarry Smith }
14772f7816d4SBarry Smith 
14782f7816d4SBarry Smith #undef __FUNCT__
14792f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
14802f7816d4SBarry Smith int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
14812f7816d4SBarry Smith {
14822f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
14832f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
14842f7816d4SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
14852f7816d4SBarry Smith   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1486bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
14872f7816d4SBarry Smith   int           n,i,jrow,j;
14882f7816d4SBarry Smith 
14892f7816d4SBarry Smith   PetscFunctionBegin;
14902f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14912f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
14922f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
14932f7816d4SBarry Smith   idx  = a->j;
14942f7816d4SBarry Smith   v    = a->a;
14952f7816d4SBarry Smith   ii   = a->i;
14962f7816d4SBarry Smith 
14972f7816d4SBarry Smith   for (i=0; i<m; i++) {
14982f7816d4SBarry Smith     jrow = ii[i];
14992f7816d4SBarry Smith     n    = ii[i+1] - jrow;
15002f7816d4SBarry Smith     sum1  = 0.0;
15012f7816d4SBarry Smith     sum2  = 0.0;
15022f7816d4SBarry Smith     sum3  = 0.0;
15032f7816d4SBarry Smith     sum4  = 0.0;
15042f7816d4SBarry Smith     sum5  = 0.0;
15052f7816d4SBarry Smith     sum6  = 0.0;
15062f7816d4SBarry Smith     sum7  = 0.0;
15072f7816d4SBarry Smith     sum8  = 0.0;
15082f7816d4SBarry Smith     sum9  = 0.0;
15092f7816d4SBarry Smith     sum10 = 0.0;
15102f7816d4SBarry Smith     sum11 = 0.0;
15112f7816d4SBarry Smith     sum12 = 0.0;
15122f7816d4SBarry Smith     sum13 = 0.0;
15132f7816d4SBarry Smith     sum14 = 0.0;
15142f7816d4SBarry Smith     sum15 = 0.0;
15152f7816d4SBarry Smith     sum16 = 0.0;
15162f7816d4SBarry Smith     for (j=0; j<n; j++) {
15172f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
15182f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
15192f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
15202f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
15212f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
15222f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
15232f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
15242f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
15252f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
15262f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
15272f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
15282f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
15292f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
15302f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
15312f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
15322f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
15332f7816d4SBarry Smith       jrow++;
15342f7816d4SBarry Smith      }
15352f7816d4SBarry Smith     y[16*i]    += sum1;
15362f7816d4SBarry Smith     y[16*i+1]  += sum2;
15372f7816d4SBarry Smith     y[16*i+2]  += sum3;
15382f7816d4SBarry Smith     y[16*i+3]  += sum4;
15392f7816d4SBarry Smith     y[16*i+4]  += sum5;
15402f7816d4SBarry Smith     y[16*i+5]  += sum6;
15412f7816d4SBarry Smith     y[16*i+6]  += sum7;
15422f7816d4SBarry Smith     y[16*i+7]  += sum8;
15432f7816d4SBarry Smith     y[16*i+8]  += sum9;
15442f7816d4SBarry Smith     y[16*i+9]  += sum10;
15452f7816d4SBarry Smith     y[16*i+10] += sum11;
15462f7816d4SBarry Smith     y[16*i+11] += sum12;
15472f7816d4SBarry Smith     y[16*i+12] += sum13;
15482f7816d4SBarry Smith     y[16*i+13] += sum14;
15492f7816d4SBarry Smith     y[16*i+14] += sum15;
15502f7816d4SBarry Smith     y[16*i+15] += sum16;
15512f7816d4SBarry Smith   }
15522f7816d4SBarry Smith 
15532f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
15542f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
15552f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
15562f7816d4SBarry Smith   PetscFunctionReturn(0);
15572f7816d4SBarry Smith }
15582f7816d4SBarry Smith 
15592f7816d4SBarry Smith #undef __FUNCT__
15602f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
15612f7816d4SBarry Smith int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
15622f7816d4SBarry Smith {
15632f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
15642f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
15652f7816d4SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
15662f7816d4SBarry Smith   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1567bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
15682f7816d4SBarry Smith 
15692f7816d4SBarry Smith   PetscFunctionBegin;
15702f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15712f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
15722f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
15732f7816d4SBarry Smith   for (i=0; i<m; i++) {
1574bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1575bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
15762f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
15772f7816d4SBarry Smith     alpha1 = x[16*i];
15782f7816d4SBarry Smith     alpha2 = x[16*i+1];
15792f7816d4SBarry Smith     alpha3 = x[16*i+2];
15802f7816d4SBarry Smith     alpha4 = x[16*i+3];
15812f7816d4SBarry Smith     alpha5 = x[16*i+4];
15822f7816d4SBarry Smith     alpha6 = x[16*i+5];
15832f7816d4SBarry Smith     alpha7 = x[16*i+6];
15842f7816d4SBarry Smith     alpha8 = x[16*i+7];
15852f7816d4SBarry Smith     alpha9  = x[16*i+8];
15862f7816d4SBarry Smith     alpha10 = x[16*i+9];
15872f7816d4SBarry Smith     alpha11 = x[16*i+10];
15882f7816d4SBarry Smith     alpha12 = x[16*i+11];
15892f7816d4SBarry Smith     alpha13 = x[16*i+12];
15902f7816d4SBarry Smith     alpha14 = x[16*i+13];
15912f7816d4SBarry Smith     alpha15 = x[16*i+14];
15922f7816d4SBarry Smith     alpha16 = x[16*i+15];
15932f7816d4SBarry Smith     while (n-->0) {
15942f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
15952f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
15962f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
15972f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
15982f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
15992f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
16002f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
16012f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
16022f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
16032f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
16042f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
16052f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
16062f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
16072f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
16082f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
16092f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
16102f7816d4SBarry Smith       idx++; v++;
16112f7816d4SBarry Smith     }
16122f7816d4SBarry Smith   }
16132f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
16142f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
16152f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
161666ed3db0SBarry Smith   PetscFunctionReturn(0);
161766ed3db0SBarry Smith }
161866ed3db0SBarry Smith 
1619f4a53059SBarry Smith /*===================================================================================*/
16204a2ae208SSatish Balay #undef __FUNCT__
16214a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1622f4a53059SBarry Smith int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1623f4a53059SBarry Smith {
16244cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1625f4a53059SBarry Smith   int         ierr;
1626f4a53059SBarry Smith   PetscFunctionBegin;
1627f4a53059SBarry Smith 
1628f4a53059SBarry Smith   /* start the scatter */
1629f4a53059SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
16304cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1631f4a53059SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
16324cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1633f4a53059SBarry Smith   PetscFunctionReturn(0);
1634f4a53059SBarry Smith }
1635f4a53059SBarry Smith 
16364a2ae208SSatish Balay #undef __FUNCT__
16374a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
16384cb9d9b8SBarry Smith int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
16394cb9d9b8SBarry Smith {
16404cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
16414cb9d9b8SBarry Smith   int         ierr;
16424cb9d9b8SBarry Smith   PetscFunctionBegin;
16434cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
16444cb9d9b8SBarry Smith   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
16454cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
16464cb9d9b8SBarry Smith   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
16474cb9d9b8SBarry Smith   PetscFunctionReturn(0);
16484cb9d9b8SBarry Smith }
16494cb9d9b8SBarry Smith 
16504a2ae208SSatish Balay #undef __FUNCT__
16514a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1652d72c5c08SBarry Smith int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
16534cb9d9b8SBarry Smith {
16544cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
16554cb9d9b8SBarry Smith   int         ierr;
16564cb9d9b8SBarry Smith   PetscFunctionBegin;
16574cb9d9b8SBarry Smith 
16584cb9d9b8SBarry Smith   /* start the scatter */
16594cb9d9b8SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1660d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
16614cb9d9b8SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1662d72c5c08SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
16634cb9d9b8SBarry Smith   PetscFunctionReturn(0);
16644cb9d9b8SBarry Smith }
16654cb9d9b8SBarry Smith 
16664a2ae208SSatish Balay #undef __FUNCT__
16674a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1668d72c5c08SBarry Smith int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
16694cb9d9b8SBarry Smith {
16704cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
16714cb9d9b8SBarry Smith   int         ierr;
16724cb9d9b8SBarry Smith   PetscFunctionBegin;
16734cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1674d72c5c08SBarry Smith   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1675d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1676d72c5c08SBarry Smith   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
16774cb9d9b8SBarry Smith   PetscFunctionReturn(0);
16784cb9d9b8SBarry Smith }
16794cb9d9b8SBarry Smith 
1680bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
16815983afb6SSatish Balay /*MC
16820bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
16830bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
16840bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
16850bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
16860bad9183SKris Buschelman 
16870bad9183SKris Buschelman   Operations provided:
16880bad9183SKris Buschelman . MatMult
16890bad9183SKris Buschelman . MatMultTranspose
16900bad9183SKris Buschelman . MatMultAdd
16910bad9183SKris Buschelman . MatMultTransposeAdd
16920bad9183SKris Buschelman 
16930bad9183SKris Buschelman   Level: advanced
16940bad9183SKris Buschelman 
16950bad9183SKris Buschelman M*/
16964a2ae208SSatish Balay #undef __FUNCT__
16974a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ"
1698cd3bbe55SBarry Smith int MatCreateMAIJ(Mat A,int dof,Mat *maij)
169982b1193eSBarry Smith {
1700f4a53059SBarry Smith   int         ierr,size,n;
17014cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
170282b1193eSBarry Smith   Mat         B;
170382b1193eSBarry Smith 
170482b1193eSBarry Smith   PetscFunctionBegin;
1705d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1706d72c5c08SBarry Smith 
1707d72c5c08SBarry Smith   if (dof == 1) {
1708d72c5c08SBarry Smith     *maij = A;
1709d72c5c08SBarry Smith   } else {
171083903688SBarry Smith     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1711cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
1712d72c5c08SBarry Smith 
1713f4a53059SBarry Smith     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1714f4a53059SBarry Smith     if (size == 1) {
1715b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
17164cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
1717b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
1718b9b97703SBarry Smith       b->dof = dof;
17194cb9d9b8SBarry Smith       b->AIJ = A;
1720d72c5c08SBarry Smith       if (dof == 2) {
1721cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
1722cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1723cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1724cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1725bcc973b7SBarry Smith       } else if (dof == 3) {
1726bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
1727bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1728bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1729bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1730bcc973b7SBarry Smith       } else if (dof == 4) {
1731bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
1732bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1733bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1734bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1735f9fae5adSBarry Smith       } else if (dof == 5) {
1736f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
1737f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1738f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1739f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
17406cd98798SBarry Smith       } else if (dof == 6) {
17416cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
17426cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
17436cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
17446cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
174566ed3db0SBarry Smith       } else if (dof == 8) {
174666ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
174766ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
174866ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
174966ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
17502b692628SMatthew Knepley       } else if (dof == 9) {
17512b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
17522b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
17532b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
17542b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
17552f7816d4SBarry Smith       } else if (dof == 16) {
17562f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
17572f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
17582f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
17592f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
176082b1193eSBarry Smith       } else {
176166ed3db0SBarry Smith         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
176282b1193eSBarry Smith       }
1763f4a53059SBarry Smith     } else {
1764f4a53059SBarry Smith       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1765f4a53059SBarry Smith       IS         from,to;
1766f4a53059SBarry Smith       Vec        gvec;
1767f4a53059SBarry Smith       int        *garray,i;
1768f4a53059SBarry Smith 
1769b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1770d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
1771b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
1772b9b97703SBarry Smith       b->dof = dof;
1773b9b97703SBarry Smith       b->A   = A;
17744cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
17754cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
17764cb9d9b8SBarry Smith 
1777f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1778f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1779f4a53059SBarry Smith 
1780f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
1781b0a32e0cSBarry Smith       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1782f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1783f4a53059SBarry Smith       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1784f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
1785f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1786f4a53059SBarry Smith 
1787f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
1788f4a53059SBarry Smith       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1789f4a53059SBarry Smith 
1790f4a53059SBarry Smith       /* generate the scatter context */
1791f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1792f4a53059SBarry Smith 
1793f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
1794f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
1795f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1796f4a53059SBarry Smith 
1797f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
17984cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
17994cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
18004cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1801f4a53059SBarry Smith     }
1802cd3bbe55SBarry Smith     *maij = B;
1803d72c5c08SBarry Smith   }
180482b1193eSBarry Smith   PetscFunctionReturn(0);
180582b1193eSBarry Smith }
180682b1193eSBarry Smith 
180782b1193eSBarry Smith 
180882b1193eSBarry Smith 
180982b1193eSBarry Smith 
181082b1193eSBarry Smith 
181182b1193eSBarry Smith 
181282b1193eSBarry Smith 
181382b1193eSBarry Smith 
181482b1193eSBarry Smith 
181582b1193eSBarry Smith 
181682b1193eSBarry Smith 
181782b1193eSBarry Smith 
1818