xref: /petsc/src/mat/impls/maij/maij.c (revision bfec09a0818ad2e3acd405413d1adce61f4a48a2)
173f4d377SMatthew Knepley /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19be911618SKris Buschelman #include "src/mat/impls/maij/maij.h"
202f7816d4SBarry Smith #include "src/vec/vecimpl.h"
2182b1193eSBarry Smith 
224a2ae208SSatish Balay #undef __FUNCT__
234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
24b9b97703SBarry Smith int MatMAIJGetAIJ(Mat A,Mat *B)
25b9b97703SBarry Smith {
26b9b97703SBarry Smith   int         ierr;
27b9b97703SBarry Smith   PetscTruth  ismpimaij,isseqmaij;
28b9b97703SBarry Smith 
29b9b97703SBarry Smith   PetscFunctionBegin;
30b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32b9b97703SBarry Smith   if (ismpimaij) {
33b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34b9b97703SBarry Smith 
35b9b97703SBarry Smith     *B = b->A;
36b9b97703SBarry Smith   } else if (isseqmaij) {
37b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38b9b97703SBarry Smith 
39b9b97703SBarry Smith     *B = b->AIJ;
40b9b97703SBarry Smith   } else {
41b9b97703SBarry Smith     *B = A;
42b9b97703SBarry Smith   }
43b9b97703SBarry Smith   PetscFunctionReturn(0);
44b9b97703SBarry Smith }
45b9b97703SBarry Smith 
464a2ae208SSatish Balay #undef __FUNCT__
474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
48b9b97703SBarry Smith int MatMAIJRedimension(Mat A,int dof,Mat *B)
49b9b97703SBarry Smith {
50b9b97703SBarry Smith   int ierr;
51b9b97703SBarry Smith   Mat Aij;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith   PetscFunctionBegin;
54b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56b9b97703SBarry Smith   PetscFunctionReturn(0);
57b9b97703SBarry Smith }
58b9b97703SBarry Smith 
594a2ae208SSatish Balay #undef __FUNCT__
604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
614cb9d9b8SBarry Smith int MatDestroy_SeqMAIJ(Mat A)
6282b1193eSBarry Smith {
6382b1193eSBarry Smith   int         ierr;
644cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
6582b1193eSBarry Smith 
6682b1193eSBarry Smith   PetscFunctionBegin;
67cd3bbe55SBarry Smith   if (b->AIJ) {
68cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
6982b1193eSBarry Smith   }
704cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
714cb9d9b8SBarry Smith   PetscFunctionReturn(0);
724cb9d9b8SBarry Smith }
734cb9d9b8SBarry Smith 
744a2ae208SSatish Balay #undef __FUNCT__
754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
764cb9d9b8SBarry Smith int MatDestroy_MPIMAIJ(Mat A)
774cb9d9b8SBarry Smith {
784cb9d9b8SBarry Smith   int         ierr;
794cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
804cb9d9b8SBarry Smith 
814cb9d9b8SBarry Smith   PetscFunctionBegin;
824cb9d9b8SBarry Smith   if (b->AIJ) {
834cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
844cb9d9b8SBarry Smith   }
85f4a53059SBarry Smith   if (b->OAIJ) {
86f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
87f4a53059SBarry Smith   }
88b9b97703SBarry Smith   if (b->A) {
89b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
90b9b97703SBarry Smith   }
91f4a53059SBarry Smith   if (b->ctx) {
92f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
93f4a53059SBarry Smith   }
94f4a53059SBarry Smith   if (b->w) {
95f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
96f4a53059SBarry Smith   }
97cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
98b9b97703SBarry Smith   PetscFunctionReturn(0);
99b9b97703SBarry Smith }
100b9b97703SBarry Smith 
10182b1193eSBarry Smith EXTERN_C_BEGIN
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
104f4a53059SBarry Smith int MatCreate_MAIJ(Mat A)
10582b1193eSBarry Smith {
106cd3bbe55SBarry Smith   int         ierr;
1074cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
10882b1193eSBarry Smith 
10982b1193eSBarry Smith   PetscFunctionBegin;
110b0a32e0cSBarry Smith   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
111b0a32e0cSBarry Smith   A->data  = (void*)b;
1124cb9d9b8SBarry Smith   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
113cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
114cd3bbe55SBarry Smith   A->factor           = 0;
115cd3bbe55SBarry Smith   A->mapping          = 0;
116f4a53059SBarry Smith 
117cd3bbe55SBarry Smith   b->AIJ  = 0;
118cd3bbe55SBarry Smith   b->dof  = 0;
119f4a53059SBarry Smith   b->OAIJ = 0;
120f4a53059SBarry Smith   b->ctx  = 0;
121f4a53059SBarry Smith   b->w    = 0;
12282b1193eSBarry Smith   PetscFunctionReturn(0);
12382b1193eSBarry Smith }
12482b1193eSBarry Smith EXTERN_C_END
12582b1193eSBarry Smith 
126cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1274a2ae208SSatish Balay #undef __FUNCT__
128b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
129cd3bbe55SBarry Smith int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
13082b1193eSBarry Smith {
1314cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
132bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
13387828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2;
134*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
135bcc973b7SBarry Smith   int           n,i,jrow,j;
13682b1193eSBarry Smith 
137bcc973b7SBarry Smith   PetscFunctionBegin;
1382f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1392f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
140bcc973b7SBarry Smith   idx  = a->j;
141bcc973b7SBarry Smith   v    = a->a;
142bcc973b7SBarry Smith   ii   = a->i;
143bcc973b7SBarry Smith 
144bcc973b7SBarry Smith   for (i=0; i<m; i++) {
145bcc973b7SBarry Smith     jrow = ii[i];
146bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
147bcc973b7SBarry Smith     sum1  = 0.0;
148bcc973b7SBarry Smith     sum2  = 0.0;
149bcc973b7SBarry Smith     for (j=0; j<n; j++) {
150bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
151bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
152bcc973b7SBarry Smith       jrow++;
153bcc973b7SBarry Smith      }
154bcc973b7SBarry Smith     y[2*i]   = sum1;
155bcc973b7SBarry Smith     y[2*i+1] = sum2;
156bcc973b7SBarry Smith   }
157bcc973b7SBarry Smith 
158b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
1592f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1602f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
16182b1193eSBarry Smith   PetscFunctionReturn(0);
16282b1193eSBarry Smith }
163bcc973b7SBarry Smith 
1644a2ae208SSatish Balay #undef __FUNCT__
165b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
166cd3bbe55SBarry Smith int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
16782b1193eSBarry Smith {
1684cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
169bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
17087828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
171*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
17282b1193eSBarry Smith 
173bcc973b7SBarry Smith   PetscFunctionBegin;
174bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1752f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1762f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
177*bfec09a0SHong Zhang 
178bcc973b7SBarry Smith   for (i=0; i<m; i++) {
179*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
180*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
181bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
182bcc973b7SBarry Smith     alpha1 = x[2*i];
183bcc973b7SBarry Smith     alpha2 = x[2*i+1];
184bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
185bcc973b7SBarry Smith   }
186b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
1872f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1882f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
18982b1193eSBarry Smith   PetscFunctionReturn(0);
19082b1193eSBarry Smith }
191bcc973b7SBarry Smith 
1924a2ae208SSatish Balay #undef __FUNCT__
193b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
194cd3bbe55SBarry Smith int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
19582b1193eSBarry Smith {
1964cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
197bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
19887828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2;
199*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
200bcc973b7SBarry Smith   int           n,i,jrow,j;
20182b1193eSBarry Smith 
202bcc973b7SBarry Smith   PetscFunctionBegin;
203f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2042f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2052f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
206bcc973b7SBarry Smith   idx  = a->j;
207bcc973b7SBarry Smith   v    = a->a;
208bcc973b7SBarry Smith   ii   = a->i;
209bcc973b7SBarry Smith 
210bcc973b7SBarry Smith   for (i=0; i<m; i++) {
211bcc973b7SBarry Smith     jrow = ii[i];
212bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
213bcc973b7SBarry Smith     sum1  = 0.0;
214bcc973b7SBarry Smith     sum2  = 0.0;
215bcc973b7SBarry Smith     for (j=0; j<n; j++) {
216bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
217bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
218bcc973b7SBarry Smith       jrow++;
219bcc973b7SBarry Smith      }
220bcc973b7SBarry Smith     y[2*i]   += sum1;
221bcc973b7SBarry Smith     y[2*i+1] += sum2;
222bcc973b7SBarry Smith   }
223bcc973b7SBarry Smith 
224b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
2252f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2262f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
227bcc973b7SBarry Smith   PetscFunctionReturn(0);
22882b1193eSBarry Smith }
2294a2ae208SSatish Balay #undef __FUNCT__
230b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
231cd3bbe55SBarry Smith int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
23282b1193eSBarry Smith {
2334cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
234bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
23587828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2;
236*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
23782b1193eSBarry Smith 
238bcc973b7SBarry Smith   PetscFunctionBegin;
239f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2402f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2412f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
242*bfec09a0SHong Zhang 
243bcc973b7SBarry Smith   for (i=0; i<m; i++) {
244*bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
245*bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
246bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
247bcc973b7SBarry Smith     alpha1 = x[2*i];
248bcc973b7SBarry Smith     alpha2 = x[2*i+1];
249bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
250bcc973b7SBarry Smith   }
251b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
2522f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2532f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
254bcc973b7SBarry Smith   PetscFunctionReturn(0);
25582b1193eSBarry Smith }
256cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2574a2ae208SSatish Balay #undef __FUNCT__
258b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
259bcc973b7SBarry Smith int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
260bcc973b7SBarry Smith {
2614cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
262bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
26387828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
264*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
265bcc973b7SBarry Smith   int           n,i,jrow,j;
26682b1193eSBarry Smith 
267bcc973b7SBarry Smith   PetscFunctionBegin;
2682f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2692f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
270bcc973b7SBarry Smith   idx  = a->j;
271bcc973b7SBarry Smith   v    = a->a;
272bcc973b7SBarry Smith   ii   = a->i;
273bcc973b7SBarry Smith 
274bcc973b7SBarry Smith   for (i=0; i<m; i++) {
275bcc973b7SBarry Smith     jrow = ii[i];
276bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
277bcc973b7SBarry Smith     sum1  = 0.0;
278bcc973b7SBarry Smith     sum2  = 0.0;
279bcc973b7SBarry Smith     sum3  = 0.0;
280bcc973b7SBarry Smith     for (j=0; j<n; j++) {
281bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
282bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
283bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
284bcc973b7SBarry Smith       jrow++;
285bcc973b7SBarry Smith      }
286bcc973b7SBarry Smith     y[3*i]   = sum1;
287bcc973b7SBarry Smith     y[3*i+1] = sum2;
288bcc973b7SBarry Smith     y[3*i+2] = sum3;
289bcc973b7SBarry Smith   }
290bcc973b7SBarry Smith 
291b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*m);
2922f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2932f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
294bcc973b7SBarry Smith   PetscFunctionReturn(0);
295bcc973b7SBarry Smith }
296bcc973b7SBarry Smith 
2974a2ae208SSatish Balay #undef __FUNCT__
298b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
299bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
300bcc973b7SBarry Smith {
3014cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
302bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
30387828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
304*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
305bcc973b7SBarry Smith 
306bcc973b7SBarry Smith   PetscFunctionBegin;
307bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
3082f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
3092f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
310*bfec09a0SHong Zhang 
311bcc973b7SBarry Smith   for (i=0; i<m; i++) {
312*bfec09a0SHong Zhang     idx    = a->j + a->i[i];
313*bfec09a0SHong Zhang     v      = a->a + a->i[i];
314bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
315bcc973b7SBarry Smith     alpha1 = x[3*i];
316bcc973b7SBarry Smith     alpha2 = x[3*i+1];
317bcc973b7SBarry Smith     alpha3 = x[3*i+2];
318bcc973b7SBarry Smith     while (n-->0) {
319bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
320bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
321bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
322bcc973b7SBarry Smith       idx++; v++;
323bcc973b7SBarry Smith     }
324bcc973b7SBarry Smith   }
325b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
3262f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
3272f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
328bcc973b7SBarry Smith   PetscFunctionReturn(0);
329bcc973b7SBarry Smith }
330bcc973b7SBarry Smith 
3314a2ae208SSatish Balay #undef __FUNCT__
332b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
333bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
334bcc973b7SBarry Smith {
3354cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
336bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
33787828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
338*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
339bcc973b7SBarry Smith   int           n,i,jrow,j;
340bcc973b7SBarry Smith 
341bcc973b7SBarry Smith   PetscFunctionBegin;
342f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3432f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
3442f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
345bcc973b7SBarry Smith   idx  = a->j;
346bcc973b7SBarry Smith   v    = a->a;
347bcc973b7SBarry Smith   ii   = a->i;
348bcc973b7SBarry Smith 
349bcc973b7SBarry Smith   for (i=0; i<m; i++) {
350bcc973b7SBarry Smith     jrow = ii[i];
351bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
352bcc973b7SBarry Smith     sum1  = 0.0;
353bcc973b7SBarry Smith     sum2  = 0.0;
354bcc973b7SBarry Smith     sum3  = 0.0;
355bcc973b7SBarry Smith     for (j=0; j<n; j++) {
356bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
357bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
358bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
359bcc973b7SBarry Smith       jrow++;
360bcc973b7SBarry Smith      }
361bcc973b7SBarry Smith     y[3*i]   += sum1;
362bcc973b7SBarry Smith     y[3*i+1] += sum2;
363bcc973b7SBarry Smith     y[3*i+2] += sum3;
364bcc973b7SBarry Smith   }
365bcc973b7SBarry Smith 
366b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
3672f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
3682f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
369bcc973b7SBarry Smith   PetscFunctionReturn(0);
370bcc973b7SBarry Smith }
3714a2ae208SSatish Balay #undef __FUNCT__
372b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
373bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
374bcc973b7SBarry Smith {
3754cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
376bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
37787828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
378*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
379bcc973b7SBarry Smith 
380bcc973b7SBarry Smith   PetscFunctionBegin;
381f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3822f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
3832f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
384bcc973b7SBarry Smith   for (i=0; i<m; i++) {
385*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
386*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
387bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
388bcc973b7SBarry Smith     alpha1 = x[3*i];
389bcc973b7SBarry Smith     alpha2 = x[3*i+1];
390bcc973b7SBarry Smith     alpha3 = x[3*i+2];
391bcc973b7SBarry Smith     while (n-->0) {
392bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
393bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
394bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
395bcc973b7SBarry Smith       idx++; v++;
396bcc973b7SBarry Smith     }
397bcc973b7SBarry Smith   }
398b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
3992f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
4002f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
401bcc973b7SBarry Smith   PetscFunctionReturn(0);
402bcc973b7SBarry Smith }
403bcc973b7SBarry Smith 
404bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4054a2ae208SSatish Balay #undef __FUNCT__
406b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
407bcc973b7SBarry Smith int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
408bcc973b7SBarry Smith {
4094cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
410bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
41187828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
412*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
413bcc973b7SBarry Smith   int           n,i,jrow,j;
414bcc973b7SBarry Smith 
415bcc973b7SBarry Smith   PetscFunctionBegin;
4162f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
4172f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
418bcc973b7SBarry Smith   idx  = a->j;
419bcc973b7SBarry Smith   v    = a->a;
420bcc973b7SBarry Smith   ii   = a->i;
421bcc973b7SBarry Smith 
422bcc973b7SBarry Smith   for (i=0; i<m; i++) {
423bcc973b7SBarry Smith     jrow = ii[i];
424bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
425bcc973b7SBarry Smith     sum1  = 0.0;
426bcc973b7SBarry Smith     sum2  = 0.0;
427bcc973b7SBarry Smith     sum3  = 0.0;
428bcc973b7SBarry Smith     sum4  = 0.0;
429bcc973b7SBarry Smith     for (j=0; j<n; j++) {
430bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
431bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
432bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
433bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
434bcc973b7SBarry Smith       jrow++;
435bcc973b7SBarry Smith      }
436bcc973b7SBarry Smith     y[4*i]   = sum1;
437bcc973b7SBarry Smith     y[4*i+1] = sum2;
438bcc973b7SBarry Smith     y[4*i+2] = sum3;
439bcc973b7SBarry Smith     y[4*i+3] = sum4;
440bcc973b7SBarry Smith   }
441bcc973b7SBarry Smith 
442b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
4432f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
4442f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
445bcc973b7SBarry Smith   PetscFunctionReturn(0);
446bcc973b7SBarry Smith }
447bcc973b7SBarry Smith 
4484a2ae208SSatish Balay #undef __FUNCT__
449b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
450bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
451bcc973b7SBarry Smith {
4524cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
453bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
45487828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
455*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
456bcc973b7SBarry Smith 
457bcc973b7SBarry Smith   PetscFunctionBegin;
458bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
4592f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
4602f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
461bcc973b7SBarry Smith   for (i=0; i<m; i++) {
462*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
463*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
464bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
465bcc973b7SBarry Smith     alpha1 = x[4*i];
466bcc973b7SBarry Smith     alpha2 = x[4*i+1];
467bcc973b7SBarry Smith     alpha3 = x[4*i+2];
468bcc973b7SBarry Smith     alpha4 = x[4*i+3];
469bcc973b7SBarry Smith     while (n-->0) {
470bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
471bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
472bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
473bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
474bcc973b7SBarry Smith       idx++; v++;
475bcc973b7SBarry Smith     }
476bcc973b7SBarry Smith   }
477b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
4782f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
4792f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
480bcc973b7SBarry Smith   PetscFunctionReturn(0);
481bcc973b7SBarry Smith }
482bcc973b7SBarry Smith 
4834a2ae208SSatish Balay #undef __FUNCT__
484b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
485bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
486bcc973b7SBarry Smith {
4874cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
488f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
48987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
490*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
491f9fae5adSBarry Smith   int           n,i,jrow,j;
492f9fae5adSBarry Smith 
493f9fae5adSBarry Smith   PetscFunctionBegin;
494f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4952f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
4962f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
497f9fae5adSBarry Smith   idx  = a->j;
498f9fae5adSBarry Smith   v    = a->a;
499f9fae5adSBarry Smith   ii   = a->i;
500f9fae5adSBarry Smith 
501f9fae5adSBarry Smith   for (i=0; i<m; i++) {
502f9fae5adSBarry Smith     jrow = ii[i];
503f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
504f9fae5adSBarry Smith     sum1  = 0.0;
505f9fae5adSBarry Smith     sum2  = 0.0;
506f9fae5adSBarry Smith     sum3  = 0.0;
507f9fae5adSBarry Smith     sum4  = 0.0;
508f9fae5adSBarry Smith     for (j=0; j<n; j++) {
509f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
510f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
511f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
512f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
513f9fae5adSBarry Smith       jrow++;
514f9fae5adSBarry Smith      }
515f9fae5adSBarry Smith     y[4*i]   += sum1;
516f9fae5adSBarry Smith     y[4*i+1] += sum2;
517f9fae5adSBarry Smith     y[4*i+2] += sum3;
518f9fae5adSBarry Smith     y[4*i+3] += sum4;
519f9fae5adSBarry Smith   }
520f9fae5adSBarry Smith 
521b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
5222f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
5232f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
524f9fae5adSBarry Smith   PetscFunctionReturn(0);
525bcc973b7SBarry Smith }
5264a2ae208SSatish Balay #undef __FUNCT__
527b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
528bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
529bcc973b7SBarry Smith {
5304cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
531f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
53287828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
533*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
534f9fae5adSBarry Smith 
535f9fae5adSBarry Smith   PetscFunctionBegin;
536f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5372f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
5382f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
539*bfec09a0SHong Zhang 
540f9fae5adSBarry Smith   for (i=0; i<m; i++) {
541*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
542*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
543f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
544f9fae5adSBarry Smith     alpha1 = x[4*i];
545f9fae5adSBarry Smith     alpha2 = x[4*i+1];
546f9fae5adSBarry Smith     alpha3 = x[4*i+2];
547f9fae5adSBarry Smith     alpha4 = x[4*i+3];
548f9fae5adSBarry Smith     while (n-->0) {
549f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
550f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
551f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
552f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
553f9fae5adSBarry Smith       idx++; v++;
554f9fae5adSBarry Smith     }
555f9fae5adSBarry Smith   }
556b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
5572f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
5582f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
559f9fae5adSBarry Smith   PetscFunctionReturn(0);
560f9fae5adSBarry Smith }
561f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
5626cd98798SBarry Smith 
5634a2ae208SSatish Balay #undef __FUNCT__
564b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
565f9fae5adSBarry Smith int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
566f9fae5adSBarry Smith {
5674cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
568f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
56987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
570*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
571f9fae5adSBarry Smith   int           n,i,jrow,j;
572f9fae5adSBarry Smith 
573f9fae5adSBarry Smith   PetscFunctionBegin;
5742f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
5752f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
576f9fae5adSBarry Smith   idx  = a->j;
577f9fae5adSBarry Smith   v    = a->a;
578f9fae5adSBarry Smith   ii   = a->i;
579f9fae5adSBarry Smith 
580f9fae5adSBarry Smith   for (i=0; i<m; i++) {
581f9fae5adSBarry Smith     jrow = ii[i];
582f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
583f9fae5adSBarry Smith     sum1  = 0.0;
584f9fae5adSBarry Smith     sum2  = 0.0;
585f9fae5adSBarry Smith     sum3  = 0.0;
586f9fae5adSBarry Smith     sum4  = 0.0;
587f9fae5adSBarry Smith     sum5  = 0.0;
588f9fae5adSBarry Smith     for (j=0; j<n; j++) {
589f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
590f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
591f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
592f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
593f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
594f9fae5adSBarry Smith       jrow++;
595f9fae5adSBarry Smith      }
596f9fae5adSBarry Smith     y[5*i]   = sum1;
597f9fae5adSBarry Smith     y[5*i+1] = sum2;
598f9fae5adSBarry Smith     y[5*i+2] = sum3;
599f9fae5adSBarry Smith     y[5*i+3] = sum4;
600f9fae5adSBarry Smith     y[5*i+4] = sum5;
601f9fae5adSBarry Smith   }
602f9fae5adSBarry Smith 
603b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*m);
6042f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
6052f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
606f9fae5adSBarry Smith   PetscFunctionReturn(0);
607f9fae5adSBarry Smith }
608f9fae5adSBarry Smith 
6094a2ae208SSatish Balay #undef __FUNCT__
610b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
611f9fae5adSBarry Smith int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
612f9fae5adSBarry Smith {
6134cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
614f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
61587828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
616*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
617f9fae5adSBarry Smith 
618f9fae5adSBarry Smith   PetscFunctionBegin;
619f9fae5adSBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
6202f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
6212f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
622*bfec09a0SHong Zhang 
623f9fae5adSBarry Smith   for (i=0; i<m; i++) {
624*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
625*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
626f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
627f9fae5adSBarry Smith     alpha1 = x[5*i];
628f9fae5adSBarry Smith     alpha2 = x[5*i+1];
629f9fae5adSBarry Smith     alpha3 = x[5*i+2];
630f9fae5adSBarry Smith     alpha4 = x[5*i+3];
631f9fae5adSBarry Smith     alpha5 = x[5*i+4];
632f9fae5adSBarry Smith     while (n-->0) {
633f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
634f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
635f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
636f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
637f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
638f9fae5adSBarry Smith       idx++; v++;
639f9fae5adSBarry Smith     }
640f9fae5adSBarry Smith   }
641b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
6422f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
6432f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
644f9fae5adSBarry Smith   PetscFunctionReturn(0);
645f9fae5adSBarry Smith }
646f9fae5adSBarry Smith 
6474a2ae208SSatish Balay #undef __FUNCT__
648b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
649f9fae5adSBarry Smith int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
650f9fae5adSBarry Smith {
6514cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
652f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
65387828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
654*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
655f9fae5adSBarry Smith   int           n,i,jrow,j;
656f9fae5adSBarry Smith 
657f9fae5adSBarry Smith   PetscFunctionBegin;
658f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6592f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
6602f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
661f9fae5adSBarry Smith   idx  = a->j;
662f9fae5adSBarry Smith   v    = a->a;
663f9fae5adSBarry Smith   ii   = a->i;
664f9fae5adSBarry Smith 
665f9fae5adSBarry Smith   for (i=0; i<m; i++) {
666f9fae5adSBarry Smith     jrow = ii[i];
667f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
668f9fae5adSBarry Smith     sum1  = 0.0;
669f9fae5adSBarry Smith     sum2  = 0.0;
670f9fae5adSBarry Smith     sum3  = 0.0;
671f9fae5adSBarry Smith     sum4  = 0.0;
672f9fae5adSBarry Smith     sum5  = 0.0;
673f9fae5adSBarry Smith     for (j=0; j<n; j++) {
674f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
675f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
676f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
677f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
678f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
679f9fae5adSBarry Smith       jrow++;
680f9fae5adSBarry Smith      }
681f9fae5adSBarry Smith     y[5*i]   += sum1;
682f9fae5adSBarry Smith     y[5*i+1] += sum2;
683f9fae5adSBarry Smith     y[5*i+2] += sum3;
684f9fae5adSBarry Smith     y[5*i+3] += sum4;
685f9fae5adSBarry Smith     y[5*i+4] += sum5;
686f9fae5adSBarry Smith   }
687f9fae5adSBarry Smith 
688b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
6892f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
6902f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
691f9fae5adSBarry Smith   PetscFunctionReturn(0);
692f9fae5adSBarry Smith }
693f9fae5adSBarry Smith 
6944a2ae208SSatish Balay #undef __FUNCT__
695b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
696f9fae5adSBarry Smith int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
697f9fae5adSBarry Smith {
6984cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
699f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
70087828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
701*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
702f9fae5adSBarry Smith 
703f9fae5adSBarry Smith   PetscFunctionBegin;
704f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7052f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
7062f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
707*bfec09a0SHong Zhang 
708f9fae5adSBarry Smith   for (i=0; i<m; i++) {
709*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
710*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
711f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
712f9fae5adSBarry Smith     alpha1 = x[5*i];
713f9fae5adSBarry Smith     alpha2 = x[5*i+1];
714f9fae5adSBarry Smith     alpha3 = x[5*i+2];
715f9fae5adSBarry Smith     alpha4 = x[5*i+3];
716f9fae5adSBarry Smith     alpha5 = x[5*i+4];
717f9fae5adSBarry Smith     while (n-->0) {
718f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
719f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
720f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
721f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
722f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
723f9fae5adSBarry Smith       idx++; v++;
724f9fae5adSBarry Smith     }
725f9fae5adSBarry Smith   }
726b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
7272f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
7282f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
729f9fae5adSBarry Smith   PetscFunctionReturn(0);
730bcc973b7SBarry Smith }
731bcc973b7SBarry Smith 
7326cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7334a2ae208SSatish Balay #undef __FUNCT__
734b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
7356cd98798SBarry Smith int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
7366cd98798SBarry Smith {
7376cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
7386cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
73987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
740*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
7416cd98798SBarry Smith   int           n,i,jrow,j;
7426cd98798SBarry Smith 
7436cd98798SBarry Smith   PetscFunctionBegin;
7442f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
7452f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
7466cd98798SBarry Smith   idx  = a->j;
7476cd98798SBarry Smith   v    = a->a;
7486cd98798SBarry Smith   ii   = a->i;
7496cd98798SBarry Smith 
7506cd98798SBarry Smith   for (i=0; i<m; i++) {
7516cd98798SBarry Smith     jrow = ii[i];
7526cd98798SBarry Smith     n    = ii[i+1] - jrow;
7536cd98798SBarry Smith     sum1  = 0.0;
7546cd98798SBarry Smith     sum2  = 0.0;
7556cd98798SBarry Smith     sum3  = 0.0;
7566cd98798SBarry Smith     sum4  = 0.0;
7576cd98798SBarry Smith     sum5  = 0.0;
7586cd98798SBarry Smith     sum6  = 0.0;
7596cd98798SBarry Smith     for (j=0; j<n; j++) {
7606cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
7616cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
7626cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
7636cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
7646cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
7656cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
7666cd98798SBarry Smith       jrow++;
7676cd98798SBarry Smith      }
7686cd98798SBarry Smith     y[6*i]   = sum1;
7696cd98798SBarry Smith     y[6*i+1] = sum2;
7706cd98798SBarry Smith     y[6*i+2] = sum3;
7716cd98798SBarry Smith     y[6*i+3] = sum4;
7726cd98798SBarry Smith     y[6*i+4] = sum5;
7736cd98798SBarry Smith     y[6*i+5] = sum6;
7746cd98798SBarry Smith   }
7756cd98798SBarry Smith 
776b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*m);
7772f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
7782f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
7796cd98798SBarry Smith   PetscFunctionReturn(0);
7806cd98798SBarry Smith }
7816cd98798SBarry Smith 
7824a2ae208SSatish Balay #undef __FUNCT__
783b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
7846cd98798SBarry Smith int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
7856cd98798SBarry Smith {
7866cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
7876cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
78887828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
789*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
7906cd98798SBarry Smith 
7916cd98798SBarry Smith   PetscFunctionBegin;
7926cd98798SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
7932f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
7942f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
795*bfec09a0SHong Zhang 
7966cd98798SBarry Smith   for (i=0; i<m; i++) {
797*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
798*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
7996cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8006cd98798SBarry Smith     alpha1 = x[6*i];
8016cd98798SBarry Smith     alpha2 = x[6*i+1];
8026cd98798SBarry Smith     alpha3 = x[6*i+2];
8036cd98798SBarry Smith     alpha4 = x[6*i+3];
8046cd98798SBarry Smith     alpha5 = x[6*i+4];
8056cd98798SBarry Smith     alpha6 = x[6*i+5];
8066cd98798SBarry Smith     while (n-->0) {
8076cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8086cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8096cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8106cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8116cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8126cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8136cd98798SBarry Smith       idx++; v++;
8146cd98798SBarry Smith     }
8156cd98798SBarry Smith   }
816b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
8172f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
8182f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
8196cd98798SBarry Smith   PetscFunctionReturn(0);
8206cd98798SBarry Smith }
8216cd98798SBarry Smith 
8224a2ae208SSatish Balay #undef __FUNCT__
823b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
8246cd98798SBarry Smith int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8256cd98798SBarry Smith {
8266cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
8276cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
82887828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
829*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
8306cd98798SBarry Smith   int           n,i,jrow,j;
8316cd98798SBarry Smith 
8326cd98798SBarry Smith   PetscFunctionBegin;
8336cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8342f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
8352f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
8366cd98798SBarry Smith   idx  = a->j;
8376cd98798SBarry Smith   v    = a->a;
8386cd98798SBarry Smith   ii   = a->i;
8396cd98798SBarry Smith 
8406cd98798SBarry Smith   for (i=0; i<m; i++) {
8416cd98798SBarry Smith     jrow = ii[i];
8426cd98798SBarry Smith     n    = ii[i+1] - jrow;
8436cd98798SBarry Smith     sum1  = 0.0;
8446cd98798SBarry Smith     sum2  = 0.0;
8456cd98798SBarry Smith     sum3  = 0.0;
8466cd98798SBarry Smith     sum4  = 0.0;
8476cd98798SBarry Smith     sum5  = 0.0;
8486cd98798SBarry Smith     sum6  = 0.0;
8496cd98798SBarry Smith     for (j=0; j<n; j++) {
8506cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8516cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8526cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8536cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8546cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8556cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8566cd98798SBarry Smith       jrow++;
8576cd98798SBarry Smith      }
8586cd98798SBarry Smith     y[6*i]   += sum1;
8596cd98798SBarry Smith     y[6*i+1] += sum2;
8606cd98798SBarry Smith     y[6*i+2] += sum3;
8616cd98798SBarry Smith     y[6*i+3] += sum4;
8626cd98798SBarry Smith     y[6*i+4] += sum5;
8636cd98798SBarry Smith     y[6*i+5] += sum6;
8646cd98798SBarry Smith   }
8656cd98798SBarry Smith 
866b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
8672f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
8682f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
8696cd98798SBarry Smith   PetscFunctionReturn(0);
8706cd98798SBarry Smith }
8716cd98798SBarry Smith 
8724a2ae208SSatish Balay #undef __FUNCT__
873b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
8746cd98798SBarry Smith int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8756cd98798SBarry Smith {
8766cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
8776cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
87887828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
879*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
8806cd98798SBarry Smith 
8816cd98798SBarry Smith   PetscFunctionBegin;
8826cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8832f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
8842f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
885*bfec09a0SHong Zhang 
8866cd98798SBarry Smith   for (i=0; i<m; i++) {
887*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
888*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
8896cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8906cd98798SBarry Smith     alpha1 = x[6*i];
8916cd98798SBarry Smith     alpha2 = x[6*i+1];
8926cd98798SBarry Smith     alpha3 = x[6*i+2];
8936cd98798SBarry Smith     alpha4 = x[6*i+3];
8946cd98798SBarry Smith     alpha5 = x[6*i+4];
8956cd98798SBarry Smith     alpha6 = x[6*i+5];
8966cd98798SBarry Smith     while (n-->0) {
8976cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8986cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8996cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9006cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9016cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9026cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9036cd98798SBarry Smith       idx++; v++;
9046cd98798SBarry Smith     }
9056cd98798SBarry Smith   }
906b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
9072f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
9082f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
9096cd98798SBarry Smith   PetscFunctionReturn(0);
9106cd98798SBarry Smith }
9116cd98798SBarry Smith 
91266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
91366ed3db0SBarry Smith #undef __FUNCT__
91466ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
91566ed3db0SBarry Smith int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
91666ed3db0SBarry Smith {
91766ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
91866ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
91987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
920*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
92166ed3db0SBarry Smith   int           n,i,jrow,j;
92266ed3db0SBarry Smith 
92366ed3db0SBarry Smith   PetscFunctionBegin;
9242f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
9252f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
92666ed3db0SBarry Smith   idx  = a->j;
92766ed3db0SBarry Smith   v    = a->a;
92866ed3db0SBarry Smith   ii   = a->i;
92966ed3db0SBarry Smith 
93066ed3db0SBarry Smith   for (i=0; i<m; i++) {
93166ed3db0SBarry Smith     jrow = ii[i];
93266ed3db0SBarry Smith     n    = ii[i+1] - jrow;
93366ed3db0SBarry Smith     sum1  = 0.0;
93466ed3db0SBarry Smith     sum2  = 0.0;
93566ed3db0SBarry Smith     sum3  = 0.0;
93666ed3db0SBarry Smith     sum4  = 0.0;
93766ed3db0SBarry Smith     sum5  = 0.0;
93866ed3db0SBarry Smith     sum6  = 0.0;
93966ed3db0SBarry Smith     sum7  = 0.0;
94066ed3db0SBarry Smith     sum8  = 0.0;
94166ed3db0SBarry Smith     for (j=0; j<n; j++) {
94266ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
94366ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
94466ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
94566ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
94666ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
94766ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
94866ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
94966ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
95066ed3db0SBarry Smith       jrow++;
95166ed3db0SBarry Smith      }
95266ed3db0SBarry Smith     y[8*i]   = sum1;
95366ed3db0SBarry Smith     y[8*i+1] = sum2;
95466ed3db0SBarry Smith     y[8*i+2] = sum3;
95566ed3db0SBarry Smith     y[8*i+3] = sum4;
95666ed3db0SBarry Smith     y[8*i+4] = sum5;
95766ed3db0SBarry Smith     y[8*i+5] = sum6;
95866ed3db0SBarry Smith     y[8*i+6] = sum7;
95966ed3db0SBarry Smith     y[8*i+7] = sum8;
96066ed3db0SBarry Smith   }
96166ed3db0SBarry Smith 
96266ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*m);
9632f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
9642f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
96566ed3db0SBarry Smith   PetscFunctionReturn(0);
96666ed3db0SBarry Smith }
96766ed3db0SBarry Smith 
96866ed3db0SBarry Smith #undef __FUNCT__
96966ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
97066ed3db0SBarry Smith int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
97166ed3db0SBarry Smith {
97266ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
97366ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
97487828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
975*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
97666ed3db0SBarry Smith 
97766ed3db0SBarry Smith   PetscFunctionBegin;
97866ed3db0SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
9792f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
9802f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
981*bfec09a0SHong Zhang 
98266ed3db0SBarry Smith   for (i=0; i<m; i++) {
983*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
984*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
98566ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
98666ed3db0SBarry Smith     alpha1 = x[8*i];
98766ed3db0SBarry Smith     alpha2 = x[8*i+1];
98866ed3db0SBarry Smith     alpha3 = x[8*i+2];
98966ed3db0SBarry Smith     alpha4 = x[8*i+3];
99066ed3db0SBarry Smith     alpha5 = x[8*i+4];
99166ed3db0SBarry Smith     alpha6 = x[8*i+5];
99266ed3db0SBarry Smith     alpha7 = x[8*i+6];
99366ed3db0SBarry Smith     alpha8 = x[8*i+7];
99466ed3db0SBarry Smith     while (n-->0) {
99566ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
99666ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
99766ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
99866ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
99966ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
100066ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
100166ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
100266ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
100366ed3db0SBarry Smith       idx++; v++;
100466ed3db0SBarry Smith     }
100566ed3db0SBarry Smith   }
100666ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
10072f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
10082f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
100966ed3db0SBarry Smith   PetscFunctionReturn(0);
101066ed3db0SBarry Smith }
101166ed3db0SBarry Smith 
101266ed3db0SBarry Smith #undef __FUNCT__
101366ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
101466ed3db0SBarry Smith int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
101566ed3db0SBarry Smith {
101666ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
101766ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
101887828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1019*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
102066ed3db0SBarry Smith   int           n,i,jrow,j;
102166ed3db0SBarry Smith 
102266ed3db0SBarry Smith   PetscFunctionBegin;
102366ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10242f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
10252f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
102666ed3db0SBarry Smith   idx  = a->j;
102766ed3db0SBarry Smith   v    = a->a;
102866ed3db0SBarry Smith   ii   = a->i;
102966ed3db0SBarry Smith 
103066ed3db0SBarry Smith   for (i=0; i<m; i++) {
103166ed3db0SBarry Smith     jrow = ii[i];
103266ed3db0SBarry Smith     n    = ii[i+1] - jrow;
103366ed3db0SBarry Smith     sum1  = 0.0;
103466ed3db0SBarry Smith     sum2  = 0.0;
103566ed3db0SBarry Smith     sum3  = 0.0;
103666ed3db0SBarry Smith     sum4  = 0.0;
103766ed3db0SBarry Smith     sum5  = 0.0;
103866ed3db0SBarry Smith     sum6  = 0.0;
103966ed3db0SBarry Smith     sum7  = 0.0;
104066ed3db0SBarry Smith     sum8  = 0.0;
104166ed3db0SBarry Smith     for (j=0; j<n; j++) {
104266ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
104366ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
104466ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
104566ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
104666ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
104766ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
104866ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
104966ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
105066ed3db0SBarry Smith       jrow++;
105166ed3db0SBarry Smith      }
105266ed3db0SBarry Smith     y[8*i]   += sum1;
105366ed3db0SBarry Smith     y[8*i+1] += sum2;
105466ed3db0SBarry Smith     y[8*i+2] += sum3;
105566ed3db0SBarry Smith     y[8*i+3] += sum4;
105666ed3db0SBarry Smith     y[8*i+4] += sum5;
105766ed3db0SBarry Smith     y[8*i+5] += sum6;
105866ed3db0SBarry Smith     y[8*i+6] += sum7;
105966ed3db0SBarry Smith     y[8*i+7] += sum8;
106066ed3db0SBarry Smith   }
106166ed3db0SBarry Smith 
106266ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
10632f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
10642f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
106566ed3db0SBarry Smith   PetscFunctionReturn(0);
106666ed3db0SBarry Smith }
106766ed3db0SBarry Smith 
106866ed3db0SBarry Smith #undef __FUNCT__
106966ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
107066ed3db0SBarry Smith int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
107166ed3db0SBarry Smith {
107266ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
107366ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
107487828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1075*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
107666ed3db0SBarry Smith 
107766ed3db0SBarry Smith   PetscFunctionBegin;
107866ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10792f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
10802f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
108166ed3db0SBarry Smith   for (i=0; i<m; i++) {
1082*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1083*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
108466ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
108566ed3db0SBarry Smith     alpha1 = x[8*i];
108666ed3db0SBarry Smith     alpha2 = x[8*i+1];
108766ed3db0SBarry Smith     alpha3 = x[8*i+2];
108866ed3db0SBarry Smith     alpha4 = x[8*i+3];
108966ed3db0SBarry Smith     alpha5 = x[8*i+4];
109066ed3db0SBarry Smith     alpha6 = x[8*i+5];
109166ed3db0SBarry Smith     alpha7 = x[8*i+6];
109266ed3db0SBarry Smith     alpha8 = x[8*i+7];
109366ed3db0SBarry Smith     while (n-->0) {
109466ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
109566ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
109666ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
109766ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
109866ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
109966ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
110066ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
110166ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
110266ed3db0SBarry Smith       idx++; v++;
110366ed3db0SBarry Smith     }
110466ed3db0SBarry Smith   }
110566ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
11062f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
11072f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
11082f7816d4SBarry Smith   PetscFunctionReturn(0);
11092f7816d4SBarry Smith }
11102f7816d4SBarry Smith 
11112f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
11122f7816d4SBarry Smith #undef __FUNCT__
11132f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
11142f7816d4SBarry Smith int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
11152f7816d4SBarry Smith {
11162f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
11172f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
11182f7816d4SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
11192f7816d4SBarry Smith   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1120*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
11212f7816d4SBarry Smith   int           n,i,jrow,j;
11222f7816d4SBarry Smith 
11232f7816d4SBarry Smith   PetscFunctionBegin;
11242f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
11252f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
11262f7816d4SBarry Smith   idx  = a->j;
11272f7816d4SBarry Smith   v    = a->a;
11282f7816d4SBarry Smith   ii   = a->i;
11292f7816d4SBarry Smith 
11302f7816d4SBarry Smith   for (i=0; i<m; i++) {
11312f7816d4SBarry Smith     jrow = ii[i];
11322f7816d4SBarry Smith     n    = ii[i+1] - jrow;
11332f7816d4SBarry Smith     sum1  = 0.0;
11342f7816d4SBarry Smith     sum2  = 0.0;
11352f7816d4SBarry Smith     sum3  = 0.0;
11362f7816d4SBarry Smith     sum4  = 0.0;
11372f7816d4SBarry Smith     sum5  = 0.0;
11382f7816d4SBarry Smith     sum6  = 0.0;
11392f7816d4SBarry Smith     sum7  = 0.0;
11402f7816d4SBarry Smith     sum8  = 0.0;
11412f7816d4SBarry Smith     sum9  = 0.0;
11422f7816d4SBarry Smith     sum10 = 0.0;
11432f7816d4SBarry Smith     sum11 = 0.0;
11442f7816d4SBarry Smith     sum12 = 0.0;
11452f7816d4SBarry Smith     sum13 = 0.0;
11462f7816d4SBarry Smith     sum14 = 0.0;
11472f7816d4SBarry Smith     sum15 = 0.0;
11482f7816d4SBarry Smith     sum16 = 0.0;
11492f7816d4SBarry Smith     for (j=0; j<n; j++) {
11502f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
11512f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
11522f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
11532f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
11542f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
11552f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
11562f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
11572f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
11582f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
11592f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
11602f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
11612f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
11622f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
11632f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
11642f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
11652f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
11662f7816d4SBarry Smith       jrow++;
11672f7816d4SBarry Smith      }
11682f7816d4SBarry Smith     y[16*i]    = sum1;
11692f7816d4SBarry Smith     y[16*i+1]  = sum2;
11702f7816d4SBarry Smith     y[16*i+2]  = sum3;
11712f7816d4SBarry Smith     y[16*i+3]  = sum4;
11722f7816d4SBarry Smith     y[16*i+4]  = sum5;
11732f7816d4SBarry Smith     y[16*i+5]  = sum6;
11742f7816d4SBarry Smith     y[16*i+6]  = sum7;
11752f7816d4SBarry Smith     y[16*i+7]  = sum8;
11762f7816d4SBarry Smith     y[16*i+8]  = sum9;
11772f7816d4SBarry Smith     y[16*i+9]  = sum10;
11782f7816d4SBarry Smith     y[16*i+10] = sum11;
11792f7816d4SBarry Smith     y[16*i+11] = sum12;
11802f7816d4SBarry Smith     y[16*i+12] = sum13;
11812f7816d4SBarry Smith     y[16*i+13] = sum14;
11822f7816d4SBarry Smith     y[16*i+14] = sum15;
11832f7816d4SBarry Smith     y[16*i+15] = sum16;
11842f7816d4SBarry Smith   }
11852f7816d4SBarry Smith 
11862f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*m);
11872f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
11882f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
11892f7816d4SBarry Smith   PetscFunctionReturn(0);
11902f7816d4SBarry Smith }
11912f7816d4SBarry Smith 
11922f7816d4SBarry Smith #undef __FUNCT__
11932f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
11942f7816d4SBarry Smith int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
11952f7816d4SBarry Smith {
11962f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
11972f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
11982f7816d4SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
11992f7816d4SBarry Smith   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1200*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
12012f7816d4SBarry Smith 
12022f7816d4SBarry Smith   PetscFunctionBegin;
12032f7816d4SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
12042f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
12052f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1206*bfec09a0SHong Zhang 
12072f7816d4SBarry Smith   for (i=0; i<m; i++) {
1208*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1209*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
12102f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
12112f7816d4SBarry Smith     alpha1  = x[16*i];
12122f7816d4SBarry Smith     alpha2  = x[16*i+1];
12132f7816d4SBarry Smith     alpha3  = x[16*i+2];
12142f7816d4SBarry Smith     alpha4  = x[16*i+3];
12152f7816d4SBarry Smith     alpha5  = x[16*i+4];
12162f7816d4SBarry Smith     alpha6  = x[16*i+5];
12172f7816d4SBarry Smith     alpha7  = x[16*i+6];
12182f7816d4SBarry Smith     alpha8  = x[16*i+7];
12192f7816d4SBarry Smith     alpha9  = x[16*i+8];
12202f7816d4SBarry Smith     alpha10 = x[16*i+9];
12212f7816d4SBarry Smith     alpha11 = x[16*i+10];
12222f7816d4SBarry Smith     alpha12 = x[16*i+11];
12232f7816d4SBarry Smith     alpha13 = x[16*i+12];
12242f7816d4SBarry Smith     alpha14 = x[16*i+13];
12252f7816d4SBarry Smith     alpha15 = x[16*i+14];
12262f7816d4SBarry Smith     alpha16 = x[16*i+15];
12272f7816d4SBarry Smith     while (n-->0) {
12282f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
12292f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
12302f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
12312f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
12322f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
12332f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
12342f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
12352f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
12362f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
12372f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
12382f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
12392f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
12402f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
12412f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
12422f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
12432f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
12442f7816d4SBarry Smith       idx++; v++;
12452f7816d4SBarry Smith     }
12462f7816d4SBarry Smith   }
12472f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
12482f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
12492f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
12502f7816d4SBarry Smith   PetscFunctionReturn(0);
12512f7816d4SBarry Smith }
12522f7816d4SBarry Smith 
12532f7816d4SBarry Smith #undef __FUNCT__
12542f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
12552f7816d4SBarry Smith int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
12562f7816d4SBarry Smith {
12572f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
12582f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
12592f7816d4SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
12602f7816d4SBarry Smith   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1261*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,*idx,*ii;
12622f7816d4SBarry Smith   int           n,i,jrow,j;
12632f7816d4SBarry Smith 
12642f7816d4SBarry Smith   PetscFunctionBegin;
12652f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12662f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
12672f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
12682f7816d4SBarry Smith   idx  = a->j;
12692f7816d4SBarry Smith   v    = a->a;
12702f7816d4SBarry Smith   ii   = a->i;
12712f7816d4SBarry Smith 
12722f7816d4SBarry Smith   for (i=0; i<m; i++) {
12732f7816d4SBarry Smith     jrow = ii[i];
12742f7816d4SBarry Smith     n    = ii[i+1] - jrow;
12752f7816d4SBarry Smith     sum1  = 0.0;
12762f7816d4SBarry Smith     sum2  = 0.0;
12772f7816d4SBarry Smith     sum3  = 0.0;
12782f7816d4SBarry Smith     sum4  = 0.0;
12792f7816d4SBarry Smith     sum5  = 0.0;
12802f7816d4SBarry Smith     sum6  = 0.0;
12812f7816d4SBarry Smith     sum7  = 0.0;
12822f7816d4SBarry Smith     sum8  = 0.0;
12832f7816d4SBarry Smith     sum9  = 0.0;
12842f7816d4SBarry Smith     sum10 = 0.0;
12852f7816d4SBarry Smith     sum11 = 0.0;
12862f7816d4SBarry Smith     sum12 = 0.0;
12872f7816d4SBarry Smith     sum13 = 0.0;
12882f7816d4SBarry Smith     sum14 = 0.0;
12892f7816d4SBarry Smith     sum15 = 0.0;
12902f7816d4SBarry Smith     sum16 = 0.0;
12912f7816d4SBarry Smith     for (j=0; j<n; j++) {
12922f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
12932f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
12942f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
12952f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
12962f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
12972f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
12982f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
12992f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
13002f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
13012f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
13022f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
13032f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
13042f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
13052f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
13062f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
13072f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
13082f7816d4SBarry Smith       jrow++;
13092f7816d4SBarry Smith      }
13102f7816d4SBarry Smith     y[16*i]    += sum1;
13112f7816d4SBarry Smith     y[16*i+1]  += sum2;
13122f7816d4SBarry Smith     y[16*i+2]  += sum3;
13132f7816d4SBarry Smith     y[16*i+3]  += sum4;
13142f7816d4SBarry Smith     y[16*i+4]  += sum5;
13152f7816d4SBarry Smith     y[16*i+5]  += sum6;
13162f7816d4SBarry Smith     y[16*i+6]  += sum7;
13172f7816d4SBarry Smith     y[16*i+7]  += sum8;
13182f7816d4SBarry Smith     y[16*i+8]  += sum9;
13192f7816d4SBarry Smith     y[16*i+9]  += sum10;
13202f7816d4SBarry Smith     y[16*i+10] += sum11;
13212f7816d4SBarry Smith     y[16*i+11] += sum12;
13222f7816d4SBarry Smith     y[16*i+12] += sum13;
13232f7816d4SBarry Smith     y[16*i+13] += sum14;
13242f7816d4SBarry Smith     y[16*i+14] += sum15;
13252f7816d4SBarry Smith     y[16*i+15] += sum16;
13262f7816d4SBarry Smith   }
13272f7816d4SBarry Smith 
13282f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
13292f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
13302f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
13312f7816d4SBarry Smith   PetscFunctionReturn(0);
13322f7816d4SBarry Smith }
13332f7816d4SBarry Smith 
13342f7816d4SBarry Smith #undef __FUNCT__
13352f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
13362f7816d4SBarry Smith int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
13372f7816d4SBarry Smith {
13382f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
13392f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
13402f7816d4SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
13412f7816d4SBarry Smith   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1342*bfec09a0SHong Zhang   int           ierr,m = b->AIJ->m,n,i,*idx;
13432f7816d4SBarry Smith 
13442f7816d4SBarry Smith   PetscFunctionBegin;
13452f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13462f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
13472f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
13482f7816d4SBarry Smith   for (i=0; i<m; i++) {
1349*bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1350*bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
13512f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
13522f7816d4SBarry Smith     alpha1 = x[16*i];
13532f7816d4SBarry Smith     alpha2 = x[16*i+1];
13542f7816d4SBarry Smith     alpha3 = x[16*i+2];
13552f7816d4SBarry Smith     alpha4 = x[16*i+3];
13562f7816d4SBarry Smith     alpha5 = x[16*i+4];
13572f7816d4SBarry Smith     alpha6 = x[16*i+5];
13582f7816d4SBarry Smith     alpha7 = x[16*i+6];
13592f7816d4SBarry Smith     alpha8 = x[16*i+7];
13602f7816d4SBarry Smith     alpha9  = x[16*i+8];
13612f7816d4SBarry Smith     alpha10 = x[16*i+9];
13622f7816d4SBarry Smith     alpha11 = x[16*i+10];
13632f7816d4SBarry Smith     alpha12 = x[16*i+11];
13642f7816d4SBarry Smith     alpha13 = x[16*i+12];
13652f7816d4SBarry Smith     alpha14 = x[16*i+13];
13662f7816d4SBarry Smith     alpha15 = x[16*i+14];
13672f7816d4SBarry Smith     alpha16 = x[16*i+15];
13682f7816d4SBarry Smith     while (n-->0) {
13692f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
13702f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
13712f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
13722f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
13732f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
13742f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
13752f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
13762f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
13772f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
13782f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
13792f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
13802f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
13812f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
13822f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
13832f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
13842f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
13852f7816d4SBarry Smith       idx++; v++;
13862f7816d4SBarry Smith     }
13872f7816d4SBarry Smith   }
13882f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
13892f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
13902f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
139166ed3db0SBarry Smith   PetscFunctionReturn(0);
139266ed3db0SBarry Smith }
139366ed3db0SBarry Smith 
1394f4a53059SBarry Smith /*===================================================================================*/
13954a2ae208SSatish Balay #undef __FUNCT__
13964a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1397f4a53059SBarry Smith int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1398f4a53059SBarry Smith {
13994cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1400f4a53059SBarry Smith   int         ierr;
1401f4a53059SBarry Smith   PetscFunctionBegin;
1402f4a53059SBarry Smith 
1403f4a53059SBarry Smith   /* start the scatter */
1404f4a53059SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
14054cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1406f4a53059SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
14074cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1408f4a53059SBarry Smith   PetscFunctionReturn(0);
1409f4a53059SBarry Smith }
1410f4a53059SBarry Smith 
14114a2ae208SSatish Balay #undef __FUNCT__
14124a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
14134cb9d9b8SBarry Smith int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
14144cb9d9b8SBarry Smith {
14154cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
14164cb9d9b8SBarry Smith   int         ierr;
14174cb9d9b8SBarry Smith   PetscFunctionBegin;
14184cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
14194cb9d9b8SBarry Smith   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
14204cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
14214cb9d9b8SBarry Smith   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
14224cb9d9b8SBarry Smith   PetscFunctionReturn(0);
14234cb9d9b8SBarry Smith }
14244cb9d9b8SBarry Smith 
14254a2ae208SSatish Balay #undef __FUNCT__
14264a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1427d72c5c08SBarry Smith int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
14284cb9d9b8SBarry Smith {
14294cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
14304cb9d9b8SBarry Smith   int         ierr;
14314cb9d9b8SBarry Smith   PetscFunctionBegin;
14324cb9d9b8SBarry Smith 
14334cb9d9b8SBarry Smith   /* start the scatter */
14344cb9d9b8SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1435d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
14364cb9d9b8SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1437d72c5c08SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
14384cb9d9b8SBarry Smith   PetscFunctionReturn(0);
14394cb9d9b8SBarry Smith }
14404cb9d9b8SBarry Smith 
14414a2ae208SSatish Balay #undef __FUNCT__
14424a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1443d72c5c08SBarry Smith int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
14444cb9d9b8SBarry Smith {
14454cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
14464cb9d9b8SBarry Smith   int         ierr;
14474cb9d9b8SBarry Smith   PetscFunctionBegin;
14484cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1449d72c5c08SBarry Smith   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1450d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1451d72c5c08SBarry Smith   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
14524cb9d9b8SBarry Smith   PetscFunctionReturn(0);
14534cb9d9b8SBarry Smith }
14544cb9d9b8SBarry Smith 
1455bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
14564a2ae208SSatish Balay #undef __FUNCT__
14574a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ"
1458cd3bbe55SBarry Smith int MatCreateMAIJ(Mat A,int dof,Mat *maij)
145982b1193eSBarry Smith {
1460f4a53059SBarry Smith   int         ierr,size,n;
14614cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
146282b1193eSBarry Smith   Mat         B;
146382b1193eSBarry Smith 
146482b1193eSBarry Smith   PetscFunctionBegin;
1465d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1466d72c5c08SBarry Smith 
1467d72c5c08SBarry Smith   if (dof == 1) {
1468d72c5c08SBarry Smith     *maij = A;
1469d72c5c08SBarry Smith   } else {
147083903688SBarry Smith     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1471cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
1472d72c5c08SBarry Smith 
1473f4a53059SBarry Smith     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1474f4a53059SBarry Smith     if (size == 1) {
1475b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
14764cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
1477b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
1478b9b97703SBarry Smith       b->dof = dof;
14794cb9d9b8SBarry Smith       b->AIJ = A;
1480d72c5c08SBarry Smith       if (dof == 2) {
1481cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
1482cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1483cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1484cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1485bcc973b7SBarry Smith       } else if (dof == 3) {
1486bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
1487bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1488bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1489bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1490bcc973b7SBarry Smith       } else if (dof == 4) {
1491bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
1492bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1493bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1494bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1495f9fae5adSBarry Smith       } else if (dof == 5) {
1496f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
1497f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1498f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1499f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
15006cd98798SBarry Smith       } else if (dof == 6) {
15016cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
15026cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
15036cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
15046cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
150566ed3db0SBarry Smith       } else if (dof == 8) {
150666ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
150766ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
150866ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
150966ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
15102f7816d4SBarry Smith       } else if (dof == 16) {
15112f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
15122f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
15132f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
15142f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
151582b1193eSBarry Smith       } else {
151666ed3db0SBarry Smith         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
151782b1193eSBarry Smith       }
1518f4a53059SBarry Smith     } else {
1519f4a53059SBarry Smith       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1520f4a53059SBarry Smith       IS         from,to;
1521f4a53059SBarry Smith       Vec        gvec;
1522f4a53059SBarry Smith       int        *garray,i;
1523f4a53059SBarry Smith 
1524b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1525d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
1526b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
1527b9b97703SBarry Smith       b->dof = dof;
1528b9b97703SBarry Smith       b->A   = A;
15294cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
15304cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
15314cb9d9b8SBarry Smith 
1532f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1533f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1534f4a53059SBarry Smith 
1535f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
1536b0a32e0cSBarry Smith       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1537f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1538f4a53059SBarry Smith       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1539f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
1540f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1541f4a53059SBarry Smith 
1542f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
1543f4a53059SBarry Smith       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1544f4a53059SBarry Smith 
1545f4a53059SBarry Smith       /* generate the scatter context */
1546f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1547f4a53059SBarry Smith 
1548f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
1549f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
1550f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1551f4a53059SBarry Smith 
1552f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
15534cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
15544cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
15554cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1556f4a53059SBarry Smith     }
1557cd3bbe55SBarry Smith     *maij = B;
1558d72c5c08SBarry Smith   }
155982b1193eSBarry Smith   PetscFunctionReturn(0);
156082b1193eSBarry Smith }
156182b1193eSBarry Smith 
156282b1193eSBarry Smith 
156382b1193eSBarry Smith 
156482b1193eSBarry Smith 
156582b1193eSBarry Smith 
156682b1193eSBarry Smith 
156782b1193eSBarry Smith 
156882b1193eSBarry Smith 
156982b1193eSBarry Smith 
157082b1193eSBarry Smith 
157182b1193eSBarry Smith 
157282b1193eSBarry Smith 
1573