xref: /petsc/src/mat/impls/maij/maij.c (revision 2f7816d4c89443b75c106afa2f3503e743191845)
173f4d377SMatthew Knepley /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19be911618SKris Buschelman #include "src/mat/impls/maij/maij.h"
20*2f7816d4SBarry Smith #include "src/vec/vecimpl.h"
2182b1193eSBarry Smith 
224a2ae208SSatish Balay #undef __FUNCT__
234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
24b9b97703SBarry Smith int MatMAIJGetAIJ(Mat A,Mat *B)
25b9b97703SBarry Smith {
26b9b97703SBarry Smith   int         ierr;
27b9b97703SBarry Smith   PetscTruth  ismpimaij,isseqmaij;
28b9b97703SBarry Smith 
29b9b97703SBarry Smith   PetscFunctionBegin;
30b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32b9b97703SBarry Smith   if (ismpimaij) {
33b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34b9b97703SBarry Smith 
35b9b97703SBarry Smith     *B = b->A;
36b9b97703SBarry Smith   } else if (isseqmaij) {
37b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38b9b97703SBarry Smith 
39b9b97703SBarry Smith     *B = b->AIJ;
40b9b97703SBarry Smith   } else {
41b9b97703SBarry Smith     *B = A;
42b9b97703SBarry Smith   }
43b9b97703SBarry Smith   PetscFunctionReturn(0);
44b9b97703SBarry Smith }
45b9b97703SBarry Smith 
464a2ae208SSatish Balay #undef __FUNCT__
474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
48b9b97703SBarry Smith int MatMAIJRedimension(Mat A,int dof,Mat *B)
49b9b97703SBarry Smith {
50b9b97703SBarry Smith   int ierr;
51b9b97703SBarry Smith   Mat Aij;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith   PetscFunctionBegin;
54b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56b9b97703SBarry Smith   PetscFunctionReturn(0);
57b9b97703SBarry Smith }
58b9b97703SBarry Smith 
594a2ae208SSatish Balay #undef __FUNCT__
604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
614cb9d9b8SBarry Smith int MatDestroy_SeqMAIJ(Mat A)
6282b1193eSBarry Smith {
6382b1193eSBarry Smith   int         ierr;
644cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
6582b1193eSBarry Smith 
6682b1193eSBarry Smith   PetscFunctionBegin;
67cd3bbe55SBarry Smith   if (b->AIJ) {
68cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
6982b1193eSBarry Smith   }
704cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
714cb9d9b8SBarry Smith   PetscFunctionReturn(0);
724cb9d9b8SBarry Smith }
734cb9d9b8SBarry Smith 
744a2ae208SSatish Balay #undef __FUNCT__
754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
764cb9d9b8SBarry Smith int MatDestroy_MPIMAIJ(Mat A)
774cb9d9b8SBarry Smith {
784cb9d9b8SBarry Smith   int         ierr;
794cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
804cb9d9b8SBarry Smith 
814cb9d9b8SBarry Smith   PetscFunctionBegin;
824cb9d9b8SBarry Smith   if (b->AIJ) {
834cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
844cb9d9b8SBarry Smith   }
85f4a53059SBarry Smith   if (b->OAIJ) {
86f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
87f4a53059SBarry Smith   }
88b9b97703SBarry Smith   if (b->A) {
89b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
90b9b97703SBarry Smith   }
91f4a53059SBarry Smith   if (b->ctx) {
92f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
93f4a53059SBarry Smith   }
94f4a53059SBarry Smith   if (b->w) {
95f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
96f4a53059SBarry Smith   }
97cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
98b9b97703SBarry Smith   PetscFunctionReturn(0);
99b9b97703SBarry Smith }
100b9b97703SBarry Smith 
10182b1193eSBarry Smith EXTERN_C_BEGIN
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
104f4a53059SBarry Smith int MatCreate_MAIJ(Mat A)
10582b1193eSBarry Smith {
106cd3bbe55SBarry Smith   int         ierr;
1074cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
10882b1193eSBarry Smith 
10982b1193eSBarry Smith   PetscFunctionBegin;
110b0a32e0cSBarry Smith   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
111b0a32e0cSBarry Smith   A->data  = (void*)b;
1124cb9d9b8SBarry Smith   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
113cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
114cd3bbe55SBarry Smith   A->factor           = 0;
115cd3bbe55SBarry Smith   A->mapping          = 0;
116f4a53059SBarry Smith 
117cd3bbe55SBarry Smith   b->AIJ  = 0;
118cd3bbe55SBarry Smith   b->dof  = 0;
119f4a53059SBarry Smith   b->OAIJ = 0;
120f4a53059SBarry Smith   b->ctx  = 0;
121f4a53059SBarry Smith   b->w    = 0;
12282b1193eSBarry Smith   PetscFunctionReturn(0);
12382b1193eSBarry Smith }
12482b1193eSBarry Smith EXTERN_C_END
12582b1193eSBarry Smith 
126cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1274a2ae208SSatish Balay #undef __FUNCT__
128b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
129cd3bbe55SBarry Smith int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
13082b1193eSBarry Smith {
1314cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
132bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
13387828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2;
134273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
135bcc973b7SBarry Smith   int           n,i,jrow,j;
13682b1193eSBarry Smith 
137bcc973b7SBarry Smith   PetscFunctionBegin;
138*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
139*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
140bcc973b7SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
141bcc973b7SBarry Smith   idx  = a->j;
142bcc973b7SBarry Smith   v    = a->a;
143bcc973b7SBarry Smith   ii   = a->i;
144bcc973b7SBarry Smith 
145bcc973b7SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
146bcc973b7SBarry Smith   idx  += shift;
147bcc973b7SBarry Smith   for (i=0; i<m; i++) {
148bcc973b7SBarry Smith     jrow = ii[i];
149bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
150bcc973b7SBarry Smith     sum1  = 0.0;
151bcc973b7SBarry Smith     sum2  = 0.0;
152bcc973b7SBarry Smith     for (j=0; j<n; j++) {
153bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
154bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
155bcc973b7SBarry Smith       jrow++;
156bcc973b7SBarry Smith      }
157bcc973b7SBarry Smith     y[2*i]   = sum1;
158bcc973b7SBarry Smith     y[2*i+1] = sum2;
159bcc973b7SBarry Smith   }
160bcc973b7SBarry Smith 
161b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
162*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
163*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
16482b1193eSBarry Smith   PetscFunctionReturn(0);
16582b1193eSBarry Smith }
166bcc973b7SBarry Smith 
1674a2ae208SSatish Balay #undef __FUNCT__
168b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
169cd3bbe55SBarry Smith int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
17082b1193eSBarry Smith {
1714cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
172bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
17387828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
174273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
17582b1193eSBarry Smith 
176bcc973b7SBarry Smith   PetscFunctionBegin;
177bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
178*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
179*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
180bcc973b7SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
181bcc973b7SBarry Smith   for (i=0; i<m; i++) {
182bcc973b7SBarry Smith     idx    = a->j + a->i[i] + shift;
183bcc973b7SBarry Smith     v      = a->a + a->i[i] + shift;
184bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
185bcc973b7SBarry Smith     alpha1 = x[2*i];
186bcc973b7SBarry Smith     alpha2 = x[2*i+1];
187bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
188bcc973b7SBarry Smith   }
189b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
190*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
191*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
19282b1193eSBarry Smith   PetscFunctionReturn(0);
19382b1193eSBarry Smith }
194bcc973b7SBarry Smith 
1954a2ae208SSatish Balay #undef __FUNCT__
196b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
197cd3bbe55SBarry Smith int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
19882b1193eSBarry Smith {
1994cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
200bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
20187828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2;
202273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
203bcc973b7SBarry Smith   int           n,i,jrow,j;
20482b1193eSBarry Smith 
205bcc973b7SBarry Smith   PetscFunctionBegin;
206f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
207*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
208*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
209bcc973b7SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
210bcc973b7SBarry Smith   idx  = a->j;
211bcc973b7SBarry Smith   v    = a->a;
212bcc973b7SBarry Smith   ii   = a->i;
213bcc973b7SBarry Smith 
214bcc973b7SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
215bcc973b7SBarry Smith   idx  += shift;
216bcc973b7SBarry Smith   for (i=0; i<m; i++) {
217bcc973b7SBarry Smith     jrow = ii[i];
218bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
219bcc973b7SBarry Smith     sum1  = 0.0;
220bcc973b7SBarry Smith     sum2  = 0.0;
221bcc973b7SBarry Smith     for (j=0; j<n; j++) {
222bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
223bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
224bcc973b7SBarry Smith       jrow++;
225bcc973b7SBarry Smith      }
226bcc973b7SBarry Smith     y[2*i]   += sum1;
227bcc973b7SBarry Smith     y[2*i+1] += sum2;
228bcc973b7SBarry Smith   }
229bcc973b7SBarry Smith 
230b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
231*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
232*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
233bcc973b7SBarry Smith   PetscFunctionReturn(0);
23482b1193eSBarry Smith }
2354a2ae208SSatish Balay #undef __FUNCT__
236b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
237cd3bbe55SBarry Smith int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
23882b1193eSBarry Smith {
2394cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
240bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
24187828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2;
242273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
24382b1193eSBarry Smith 
244bcc973b7SBarry Smith   PetscFunctionBegin;
245f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
246*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
247*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
248bcc973b7SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
249bcc973b7SBarry Smith   for (i=0; i<m; i++) {
250bcc973b7SBarry Smith     idx   = a->j + a->i[i] + shift;
251bcc973b7SBarry Smith     v     = a->a + a->i[i] + shift;
252bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
253bcc973b7SBarry Smith     alpha1 = x[2*i];
254bcc973b7SBarry Smith     alpha2 = x[2*i+1];
255bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
256bcc973b7SBarry Smith   }
257b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
258*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
259*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
260bcc973b7SBarry Smith   PetscFunctionReturn(0);
26182b1193eSBarry Smith }
262cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2634a2ae208SSatish Balay #undef __FUNCT__
264b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
265bcc973b7SBarry Smith int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
266bcc973b7SBarry Smith {
2674cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
268bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
26987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
270273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
271bcc973b7SBarry Smith   int           n,i,jrow,j;
27282b1193eSBarry Smith 
273bcc973b7SBarry Smith   PetscFunctionBegin;
274*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
275*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
276bcc973b7SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
277bcc973b7SBarry Smith   idx  = a->j;
278bcc973b7SBarry Smith   v    = a->a;
279bcc973b7SBarry Smith   ii   = a->i;
280bcc973b7SBarry Smith 
281bcc973b7SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
282bcc973b7SBarry Smith   idx  += shift;
283bcc973b7SBarry Smith   for (i=0; i<m; i++) {
284bcc973b7SBarry Smith     jrow = ii[i];
285bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
286bcc973b7SBarry Smith     sum1  = 0.0;
287bcc973b7SBarry Smith     sum2  = 0.0;
288bcc973b7SBarry Smith     sum3  = 0.0;
289bcc973b7SBarry Smith     for (j=0; j<n; j++) {
290bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
291bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
292bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
293bcc973b7SBarry Smith       jrow++;
294bcc973b7SBarry Smith      }
295bcc973b7SBarry Smith     y[3*i]   = sum1;
296bcc973b7SBarry Smith     y[3*i+1] = sum2;
297bcc973b7SBarry Smith     y[3*i+2] = sum3;
298bcc973b7SBarry Smith   }
299bcc973b7SBarry Smith 
300b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*m);
301*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
302*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
303bcc973b7SBarry Smith   PetscFunctionReturn(0);
304bcc973b7SBarry Smith }
305bcc973b7SBarry Smith 
3064a2ae208SSatish Balay #undef __FUNCT__
307b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
308bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309bcc973b7SBarry Smith {
3104cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
311bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
31287828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
313273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
314bcc973b7SBarry Smith 
315bcc973b7SBarry Smith   PetscFunctionBegin;
316bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
317*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
318*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
319bcc973b7SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
320bcc973b7SBarry Smith   for (i=0; i<m; i++) {
321bcc973b7SBarry Smith     idx    = a->j + a->i[i] + shift;
322bcc973b7SBarry Smith     v      = a->a + a->i[i] + shift;
323bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
324bcc973b7SBarry Smith     alpha1 = x[3*i];
325bcc973b7SBarry Smith     alpha2 = x[3*i+1];
326bcc973b7SBarry Smith     alpha3 = x[3*i+2];
327bcc973b7SBarry Smith     while (n-->0) {
328bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
329bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
330bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
331bcc973b7SBarry Smith       idx++; v++;
332bcc973b7SBarry Smith     }
333bcc973b7SBarry Smith   }
334b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
335*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
336*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
337bcc973b7SBarry Smith   PetscFunctionReturn(0);
338bcc973b7SBarry Smith }
339bcc973b7SBarry Smith 
3404a2ae208SSatish Balay #undef __FUNCT__
341b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
342bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
343bcc973b7SBarry Smith {
3444cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
345bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
34687828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
347273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
348bcc973b7SBarry Smith   int           n,i,jrow,j;
349bcc973b7SBarry Smith 
350bcc973b7SBarry Smith   PetscFunctionBegin;
351f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
352*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
353*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
354bcc973b7SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
355bcc973b7SBarry Smith   idx  = a->j;
356bcc973b7SBarry Smith   v    = a->a;
357bcc973b7SBarry Smith   ii   = a->i;
358bcc973b7SBarry Smith 
359bcc973b7SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
360bcc973b7SBarry Smith   idx  += shift;
361bcc973b7SBarry Smith   for (i=0; i<m; i++) {
362bcc973b7SBarry Smith     jrow = ii[i];
363bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
364bcc973b7SBarry Smith     sum1  = 0.0;
365bcc973b7SBarry Smith     sum2  = 0.0;
366bcc973b7SBarry Smith     sum3  = 0.0;
367bcc973b7SBarry Smith     for (j=0; j<n; j++) {
368bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
369bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
370bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
371bcc973b7SBarry Smith       jrow++;
372bcc973b7SBarry Smith      }
373bcc973b7SBarry Smith     y[3*i]   += sum1;
374bcc973b7SBarry Smith     y[3*i+1] += sum2;
375bcc973b7SBarry Smith     y[3*i+2] += sum3;
376bcc973b7SBarry Smith   }
377bcc973b7SBarry Smith 
378b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
379*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
380*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
381bcc973b7SBarry Smith   PetscFunctionReturn(0);
382bcc973b7SBarry Smith }
3834a2ae208SSatish Balay #undef __FUNCT__
384b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
385bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
386bcc973b7SBarry Smith {
3874cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
388bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
38987828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
390273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
391bcc973b7SBarry Smith 
392bcc973b7SBarry Smith   PetscFunctionBegin;
393f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
394*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
395*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
396bcc973b7SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
397bcc973b7SBarry Smith   for (i=0; i<m; i++) {
398bcc973b7SBarry Smith     idx    = a->j + a->i[i] + shift;
399bcc973b7SBarry Smith     v      = a->a + a->i[i] + shift;
400bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
401bcc973b7SBarry Smith     alpha1 = x[3*i];
402bcc973b7SBarry Smith     alpha2 = x[3*i+1];
403bcc973b7SBarry Smith     alpha3 = x[3*i+2];
404bcc973b7SBarry Smith     while (n-->0) {
405bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
406bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
407bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
408bcc973b7SBarry Smith       idx++; v++;
409bcc973b7SBarry Smith     }
410bcc973b7SBarry Smith   }
411b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
412*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
413*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
414bcc973b7SBarry Smith   PetscFunctionReturn(0);
415bcc973b7SBarry Smith }
416bcc973b7SBarry Smith 
417bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4184a2ae208SSatish Balay #undef __FUNCT__
419b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
420bcc973b7SBarry Smith int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
421bcc973b7SBarry Smith {
4224cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
423bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
42487828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
425273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
426bcc973b7SBarry Smith   int           n,i,jrow,j;
427bcc973b7SBarry Smith 
428bcc973b7SBarry Smith   PetscFunctionBegin;
429*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
430*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
431bcc973b7SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
432bcc973b7SBarry Smith   idx  = a->j;
433bcc973b7SBarry Smith   v    = a->a;
434bcc973b7SBarry Smith   ii   = a->i;
435bcc973b7SBarry Smith 
436bcc973b7SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
437bcc973b7SBarry Smith   idx  += shift;
438bcc973b7SBarry Smith   for (i=0; i<m; i++) {
439bcc973b7SBarry Smith     jrow = ii[i];
440bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
441bcc973b7SBarry Smith     sum1  = 0.0;
442bcc973b7SBarry Smith     sum2  = 0.0;
443bcc973b7SBarry Smith     sum3  = 0.0;
444bcc973b7SBarry Smith     sum4  = 0.0;
445bcc973b7SBarry Smith     for (j=0; j<n; j++) {
446bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
447bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
448bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
449bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
450bcc973b7SBarry Smith       jrow++;
451bcc973b7SBarry Smith      }
452bcc973b7SBarry Smith     y[4*i]   = sum1;
453bcc973b7SBarry Smith     y[4*i+1] = sum2;
454bcc973b7SBarry Smith     y[4*i+2] = sum3;
455bcc973b7SBarry Smith     y[4*i+3] = sum4;
456bcc973b7SBarry Smith   }
457bcc973b7SBarry Smith 
458b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
459*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
460*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
461bcc973b7SBarry Smith   PetscFunctionReturn(0);
462bcc973b7SBarry Smith }
463bcc973b7SBarry Smith 
4644a2ae208SSatish Balay #undef __FUNCT__
465b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
466bcc973b7SBarry Smith int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467bcc973b7SBarry Smith {
4684cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
469bcc973b7SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
47087828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
472bcc973b7SBarry Smith 
473bcc973b7SBarry Smith   PetscFunctionBegin;
474bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
475*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
476*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
477bcc973b7SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
478bcc973b7SBarry Smith   for (i=0; i<m; i++) {
479bcc973b7SBarry Smith     idx    = a->j + a->i[i] + shift;
480bcc973b7SBarry Smith     v      = a->a + a->i[i] + shift;
481bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
482bcc973b7SBarry Smith     alpha1 = x[4*i];
483bcc973b7SBarry Smith     alpha2 = x[4*i+1];
484bcc973b7SBarry Smith     alpha3 = x[4*i+2];
485bcc973b7SBarry Smith     alpha4 = x[4*i+3];
486bcc973b7SBarry Smith     while (n-->0) {
487bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
488bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
489bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
490bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
491bcc973b7SBarry Smith       idx++; v++;
492bcc973b7SBarry Smith     }
493bcc973b7SBarry Smith   }
494b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
495*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
496*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
497bcc973b7SBarry Smith   PetscFunctionReturn(0);
498bcc973b7SBarry Smith }
499bcc973b7SBarry Smith 
5004a2ae208SSatish Balay #undef __FUNCT__
501b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
502bcc973b7SBarry Smith int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
503bcc973b7SBarry Smith {
5044cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
505f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
50687828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
507273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
508f9fae5adSBarry Smith   int           n,i,jrow,j;
509f9fae5adSBarry Smith 
510f9fae5adSBarry Smith   PetscFunctionBegin;
511f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
512*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
513*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
514f9fae5adSBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
515f9fae5adSBarry Smith   idx  = a->j;
516f9fae5adSBarry Smith   v    = a->a;
517f9fae5adSBarry Smith   ii   = a->i;
518f9fae5adSBarry Smith 
519f9fae5adSBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
520f9fae5adSBarry Smith   idx  += shift;
521f9fae5adSBarry Smith   for (i=0; i<m; i++) {
522f9fae5adSBarry Smith     jrow = ii[i];
523f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
524f9fae5adSBarry Smith     sum1  = 0.0;
525f9fae5adSBarry Smith     sum2  = 0.0;
526f9fae5adSBarry Smith     sum3  = 0.0;
527f9fae5adSBarry Smith     sum4  = 0.0;
528f9fae5adSBarry Smith     for (j=0; j<n; j++) {
529f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
530f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
531f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
532f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
533f9fae5adSBarry Smith       jrow++;
534f9fae5adSBarry Smith      }
535f9fae5adSBarry Smith     y[4*i]   += sum1;
536f9fae5adSBarry Smith     y[4*i+1] += sum2;
537f9fae5adSBarry Smith     y[4*i+2] += sum3;
538f9fae5adSBarry Smith     y[4*i+3] += sum4;
539f9fae5adSBarry Smith   }
540f9fae5adSBarry Smith 
541b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
542*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
543*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
544f9fae5adSBarry Smith   PetscFunctionReturn(0);
545bcc973b7SBarry Smith }
5464a2ae208SSatish Balay #undef __FUNCT__
547b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
548bcc973b7SBarry Smith int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549bcc973b7SBarry Smith {
5504cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
551f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
55287828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
553273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
554f9fae5adSBarry Smith 
555f9fae5adSBarry Smith   PetscFunctionBegin;
556f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
557*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
558*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
559f9fae5adSBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
560f9fae5adSBarry Smith   for (i=0; i<m; i++) {
561f9fae5adSBarry Smith     idx    = a->j + a->i[i] + shift;
562f9fae5adSBarry Smith     v      = a->a + a->i[i] + shift;
563f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
564f9fae5adSBarry Smith     alpha1 = x[4*i];
565f9fae5adSBarry Smith     alpha2 = x[4*i+1];
566f9fae5adSBarry Smith     alpha3 = x[4*i+2];
567f9fae5adSBarry Smith     alpha4 = x[4*i+3];
568f9fae5adSBarry Smith     while (n-->0) {
569f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
570f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
571f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
572f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
573f9fae5adSBarry Smith       idx++; v++;
574f9fae5adSBarry Smith     }
575f9fae5adSBarry Smith   }
576b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
577*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
578*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
579f9fae5adSBarry Smith   PetscFunctionReturn(0);
580f9fae5adSBarry Smith }
581f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
5826cd98798SBarry Smith 
5834a2ae208SSatish Balay #undef __FUNCT__
584b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
585f9fae5adSBarry Smith int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
586f9fae5adSBarry Smith {
5874cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
588f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
58987828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
590273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
591f9fae5adSBarry Smith   int           n,i,jrow,j;
592f9fae5adSBarry Smith 
593f9fae5adSBarry Smith   PetscFunctionBegin;
594*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
595*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
596f9fae5adSBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
597f9fae5adSBarry Smith   idx  = a->j;
598f9fae5adSBarry Smith   v    = a->a;
599f9fae5adSBarry Smith   ii   = a->i;
600f9fae5adSBarry Smith 
601f9fae5adSBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
602f9fae5adSBarry Smith   idx  += shift;
603f9fae5adSBarry Smith   for (i=0; i<m; i++) {
604f9fae5adSBarry Smith     jrow = ii[i];
605f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
606f9fae5adSBarry Smith     sum1  = 0.0;
607f9fae5adSBarry Smith     sum2  = 0.0;
608f9fae5adSBarry Smith     sum3  = 0.0;
609f9fae5adSBarry Smith     sum4  = 0.0;
610f9fae5adSBarry Smith     sum5  = 0.0;
611f9fae5adSBarry Smith     for (j=0; j<n; j++) {
612f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
613f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
614f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
615f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
616f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
617f9fae5adSBarry Smith       jrow++;
618f9fae5adSBarry Smith      }
619f9fae5adSBarry Smith     y[5*i]   = sum1;
620f9fae5adSBarry Smith     y[5*i+1] = sum2;
621f9fae5adSBarry Smith     y[5*i+2] = sum3;
622f9fae5adSBarry Smith     y[5*i+3] = sum4;
623f9fae5adSBarry Smith     y[5*i+4] = sum5;
624f9fae5adSBarry Smith   }
625f9fae5adSBarry Smith 
626b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*m);
627*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
628*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
629f9fae5adSBarry Smith   PetscFunctionReturn(0);
630f9fae5adSBarry Smith }
631f9fae5adSBarry Smith 
6324a2ae208SSatish Balay #undef __FUNCT__
633b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
634f9fae5adSBarry Smith int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
635f9fae5adSBarry Smith {
6364cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
637f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
63887828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
639273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
640f9fae5adSBarry Smith 
641f9fae5adSBarry Smith   PetscFunctionBegin;
642f9fae5adSBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
643*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
644*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
645f9fae5adSBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
646f9fae5adSBarry Smith   for (i=0; i<m; i++) {
647f9fae5adSBarry Smith     idx    = a->j + a->i[i] + shift;
648f9fae5adSBarry Smith     v      = a->a + a->i[i] + shift;
649f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
650f9fae5adSBarry Smith     alpha1 = x[5*i];
651f9fae5adSBarry Smith     alpha2 = x[5*i+1];
652f9fae5adSBarry Smith     alpha3 = x[5*i+2];
653f9fae5adSBarry Smith     alpha4 = x[5*i+3];
654f9fae5adSBarry Smith     alpha5 = x[5*i+4];
655f9fae5adSBarry Smith     while (n-->0) {
656f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
657f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
658f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
659f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
660f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
661f9fae5adSBarry Smith       idx++; v++;
662f9fae5adSBarry Smith     }
663f9fae5adSBarry Smith   }
664b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
665*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
666*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
667f9fae5adSBarry Smith   PetscFunctionReturn(0);
668f9fae5adSBarry Smith }
669f9fae5adSBarry Smith 
6704a2ae208SSatish Balay #undef __FUNCT__
671b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
672f9fae5adSBarry Smith int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
673f9fae5adSBarry Smith {
6744cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
675f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
67687828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
677273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
678f9fae5adSBarry Smith   int           n,i,jrow,j;
679f9fae5adSBarry Smith 
680f9fae5adSBarry Smith   PetscFunctionBegin;
681f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
682*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
683*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
684f9fae5adSBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
685f9fae5adSBarry Smith   idx  = a->j;
686f9fae5adSBarry Smith   v    = a->a;
687f9fae5adSBarry Smith   ii   = a->i;
688f9fae5adSBarry Smith 
689f9fae5adSBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
690f9fae5adSBarry Smith   idx  += shift;
691f9fae5adSBarry Smith   for (i=0; i<m; i++) {
692f9fae5adSBarry Smith     jrow = ii[i];
693f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
694f9fae5adSBarry Smith     sum1  = 0.0;
695f9fae5adSBarry Smith     sum2  = 0.0;
696f9fae5adSBarry Smith     sum3  = 0.0;
697f9fae5adSBarry Smith     sum4  = 0.0;
698f9fae5adSBarry Smith     sum5  = 0.0;
699f9fae5adSBarry Smith     for (j=0; j<n; j++) {
700f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
701f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
702f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
703f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
704f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
705f9fae5adSBarry Smith       jrow++;
706f9fae5adSBarry Smith      }
707f9fae5adSBarry Smith     y[5*i]   += sum1;
708f9fae5adSBarry Smith     y[5*i+1] += sum2;
709f9fae5adSBarry Smith     y[5*i+2] += sum3;
710f9fae5adSBarry Smith     y[5*i+3] += sum4;
711f9fae5adSBarry Smith     y[5*i+4] += sum5;
712f9fae5adSBarry Smith   }
713f9fae5adSBarry Smith 
714b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
715*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
716*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
717f9fae5adSBarry Smith   PetscFunctionReturn(0);
718f9fae5adSBarry Smith }
719f9fae5adSBarry Smith 
7204a2ae208SSatish Balay #undef __FUNCT__
721b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
722f9fae5adSBarry Smith int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
723f9fae5adSBarry Smith {
7244cb9d9b8SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
725f9fae5adSBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
72687828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
727273d9f13SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
728f9fae5adSBarry Smith 
729f9fae5adSBarry Smith   PetscFunctionBegin;
730f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
731*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
732*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
733f9fae5adSBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
734f9fae5adSBarry Smith   for (i=0; i<m; i++) {
735f9fae5adSBarry Smith     idx    = a->j + a->i[i] + shift;
736f9fae5adSBarry Smith     v      = a->a + a->i[i] + shift;
737f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
738f9fae5adSBarry Smith     alpha1 = x[5*i];
739f9fae5adSBarry Smith     alpha2 = x[5*i+1];
740f9fae5adSBarry Smith     alpha3 = x[5*i+2];
741f9fae5adSBarry Smith     alpha4 = x[5*i+3];
742f9fae5adSBarry Smith     alpha5 = x[5*i+4];
743f9fae5adSBarry Smith     while (n-->0) {
744f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
745f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
746f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
747f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
748f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
749f9fae5adSBarry Smith       idx++; v++;
750f9fae5adSBarry Smith     }
751f9fae5adSBarry Smith   }
752b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
753*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
754*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
755f9fae5adSBarry Smith   PetscFunctionReturn(0);
756bcc973b7SBarry Smith }
757bcc973b7SBarry Smith 
7586cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7594a2ae208SSatish Balay #undef __FUNCT__
760b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
7616cd98798SBarry Smith int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
7626cd98798SBarry Smith {
7636cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
7646cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
76587828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
7666cd98798SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
7676cd98798SBarry Smith   int           n,i,jrow,j;
7686cd98798SBarry Smith 
7696cd98798SBarry Smith   PetscFunctionBegin;
770*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
771*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
7726cd98798SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
7736cd98798SBarry Smith   idx  = a->j;
7746cd98798SBarry Smith   v    = a->a;
7756cd98798SBarry Smith   ii   = a->i;
7766cd98798SBarry Smith 
7776cd98798SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
7786cd98798SBarry Smith   idx  += shift;
7796cd98798SBarry Smith   for (i=0; i<m; i++) {
7806cd98798SBarry Smith     jrow = ii[i];
7816cd98798SBarry Smith     n    = ii[i+1] - jrow;
7826cd98798SBarry Smith     sum1  = 0.0;
7836cd98798SBarry Smith     sum2  = 0.0;
7846cd98798SBarry Smith     sum3  = 0.0;
7856cd98798SBarry Smith     sum4  = 0.0;
7866cd98798SBarry Smith     sum5  = 0.0;
7876cd98798SBarry Smith     sum6  = 0.0;
7886cd98798SBarry Smith     for (j=0; j<n; j++) {
7896cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
7906cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
7916cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
7926cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
7936cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
7946cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
7956cd98798SBarry Smith       jrow++;
7966cd98798SBarry Smith      }
7976cd98798SBarry Smith     y[6*i]   = sum1;
7986cd98798SBarry Smith     y[6*i+1] = sum2;
7996cd98798SBarry Smith     y[6*i+2] = sum3;
8006cd98798SBarry Smith     y[6*i+3] = sum4;
8016cd98798SBarry Smith     y[6*i+4] = sum5;
8026cd98798SBarry Smith     y[6*i+5] = sum6;
8036cd98798SBarry Smith   }
8046cd98798SBarry Smith 
805b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*m);
806*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
807*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
8086cd98798SBarry Smith   PetscFunctionReturn(0);
8096cd98798SBarry Smith }
8106cd98798SBarry Smith 
8114a2ae208SSatish Balay #undef __FUNCT__
812b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
8136cd98798SBarry Smith int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8146cd98798SBarry Smith {
8156cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
8166cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
81787828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
8186cd98798SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
8196cd98798SBarry Smith 
8206cd98798SBarry Smith   PetscFunctionBegin;
8216cd98798SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
822*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
823*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
8246cd98798SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
8256cd98798SBarry Smith   for (i=0; i<m; i++) {
8266cd98798SBarry Smith     idx    = a->j + a->i[i] + shift;
8276cd98798SBarry Smith     v      = a->a + a->i[i] + shift;
8286cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8296cd98798SBarry Smith     alpha1 = x[6*i];
8306cd98798SBarry Smith     alpha2 = x[6*i+1];
8316cd98798SBarry Smith     alpha3 = x[6*i+2];
8326cd98798SBarry Smith     alpha4 = x[6*i+3];
8336cd98798SBarry Smith     alpha5 = x[6*i+4];
8346cd98798SBarry Smith     alpha6 = x[6*i+5];
8356cd98798SBarry Smith     while (n-->0) {
8366cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8376cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8386cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8396cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8406cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8416cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8426cd98798SBarry Smith       idx++; v++;
8436cd98798SBarry Smith     }
8446cd98798SBarry Smith   }
845b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
846*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
847*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
8486cd98798SBarry Smith   PetscFunctionReturn(0);
8496cd98798SBarry Smith }
8506cd98798SBarry Smith 
8514a2ae208SSatish Balay #undef __FUNCT__
852b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
8536cd98798SBarry Smith int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8546cd98798SBarry Smith {
8556cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
8566cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
85787828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
8586cd98798SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
8596cd98798SBarry Smith   int           n,i,jrow,j;
8606cd98798SBarry Smith 
8616cd98798SBarry Smith   PetscFunctionBegin;
8626cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
863*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
864*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
8656cd98798SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
8666cd98798SBarry Smith   idx  = a->j;
8676cd98798SBarry Smith   v    = a->a;
8686cd98798SBarry Smith   ii   = a->i;
8696cd98798SBarry Smith 
8706cd98798SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
8716cd98798SBarry Smith   idx  += shift;
8726cd98798SBarry Smith   for (i=0; i<m; i++) {
8736cd98798SBarry Smith     jrow = ii[i];
8746cd98798SBarry Smith     n    = ii[i+1] - jrow;
8756cd98798SBarry Smith     sum1  = 0.0;
8766cd98798SBarry Smith     sum2  = 0.0;
8776cd98798SBarry Smith     sum3  = 0.0;
8786cd98798SBarry Smith     sum4  = 0.0;
8796cd98798SBarry Smith     sum5  = 0.0;
8806cd98798SBarry Smith     sum6  = 0.0;
8816cd98798SBarry Smith     for (j=0; j<n; j++) {
8826cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8836cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8846cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8856cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8866cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8876cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8886cd98798SBarry Smith       jrow++;
8896cd98798SBarry Smith      }
8906cd98798SBarry Smith     y[6*i]   += sum1;
8916cd98798SBarry Smith     y[6*i+1] += sum2;
8926cd98798SBarry Smith     y[6*i+2] += sum3;
8936cd98798SBarry Smith     y[6*i+3] += sum4;
8946cd98798SBarry Smith     y[6*i+4] += sum5;
8956cd98798SBarry Smith     y[6*i+5] += sum6;
8966cd98798SBarry Smith   }
8976cd98798SBarry Smith 
898b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
899*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
900*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
9016cd98798SBarry Smith   PetscFunctionReturn(0);
9026cd98798SBarry Smith }
9036cd98798SBarry Smith 
9044a2ae208SSatish Balay #undef __FUNCT__
905b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
9066cd98798SBarry Smith int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9076cd98798SBarry Smith {
9086cd98798SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
9096cd98798SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
91087828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
9116cd98798SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
9126cd98798SBarry Smith 
9136cd98798SBarry Smith   PetscFunctionBegin;
9146cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
915*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
916*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
9176cd98798SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
9186cd98798SBarry Smith   for (i=0; i<m; i++) {
9196cd98798SBarry Smith     idx    = a->j + a->i[i] + shift;
9206cd98798SBarry Smith     v      = a->a + a->i[i] + shift;
9216cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9226cd98798SBarry Smith     alpha1 = x[6*i];
9236cd98798SBarry Smith     alpha2 = x[6*i+1];
9246cd98798SBarry Smith     alpha3 = x[6*i+2];
9256cd98798SBarry Smith     alpha4 = x[6*i+3];
9266cd98798SBarry Smith     alpha5 = x[6*i+4];
9276cd98798SBarry Smith     alpha6 = x[6*i+5];
9286cd98798SBarry Smith     while (n-->0) {
9296cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9306cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9316cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9326cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9336cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9346cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9356cd98798SBarry Smith       idx++; v++;
9366cd98798SBarry Smith     }
9376cd98798SBarry Smith   }
938b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
939*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
940*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
9416cd98798SBarry Smith   PetscFunctionReturn(0);
9426cd98798SBarry Smith }
9436cd98798SBarry Smith 
94466ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
94566ed3db0SBarry Smith #undef __FUNCT__
94666ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
94766ed3db0SBarry Smith int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
94866ed3db0SBarry Smith {
94966ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
95066ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
95187828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
95266ed3db0SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
95366ed3db0SBarry Smith   int           n,i,jrow,j;
95466ed3db0SBarry Smith 
95566ed3db0SBarry Smith   PetscFunctionBegin;
956*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
957*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
95866ed3db0SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
95966ed3db0SBarry Smith   idx  = a->j;
96066ed3db0SBarry Smith   v    = a->a;
96166ed3db0SBarry Smith   ii   = a->i;
96266ed3db0SBarry Smith 
96366ed3db0SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
96466ed3db0SBarry Smith   idx  += shift;
96566ed3db0SBarry Smith   for (i=0; i<m; i++) {
96666ed3db0SBarry Smith     jrow = ii[i];
96766ed3db0SBarry Smith     n    = ii[i+1] - jrow;
96866ed3db0SBarry Smith     sum1  = 0.0;
96966ed3db0SBarry Smith     sum2  = 0.0;
97066ed3db0SBarry Smith     sum3  = 0.0;
97166ed3db0SBarry Smith     sum4  = 0.0;
97266ed3db0SBarry Smith     sum5  = 0.0;
97366ed3db0SBarry Smith     sum6  = 0.0;
97466ed3db0SBarry Smith     sum7  = 0.0;
97566ed3db0SBarry Smith     sum8  = 0.0;
97666ed3db0SBarry Smith     for (j=0; j<n; j++) {
97766ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
97866ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
97966ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
98066ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
98166ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
98266ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
98366ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
98466ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
98566ed3db0SBarry Smith       jrow++;
98666ed3db0SBarry Smith      }
98766ed3db0SBarry Smith     y[8*i]   = sum1;
98866ed3db0SBarry Smith     y[8*i+1] = sum2;
98966ed3db0SBarry Smith     y[8*i+2] = sum3;
99066ed3db0SBarry Smith     y[8*i+3] = sum4;
99166ed3db0SBarry Smith     y[8*i+4] = sum5;
99266ed3db0SBarry Smith     y[8*i+5] = sum6;
99366ed3db0SBarry Smith     y[8*i+6] = sum7;
99466ed3db0SBarry Smith     y[8*i+7] = sum8;
99566ed3db0SBarry Smith   }
99666ed3db0SBarry Smith 
99766ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*m);
998*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
999*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
100066ed3db0SBarry Smith   PetscFunctionReturn(0);
100166ed3db0SBarry Smith }
100266ed3db0SBarry Smith 
100366ed3db0SBarry Smith #undef __FUNCT__
100466ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
100566ed3db0SBarry Smith int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
100666ed3db0SBarry Smith {
100766ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
100866ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
100987828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
101066ed3db0SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
101166ed3db0SBarry Smith 
101266ed3db0SBarry Smith   PetscFunctionBegin;
101366ed3db0SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1014*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1015*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
101666ed3db0SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
101766ed3db0SBarry Smith   for (i=0; i<m; i++) {
101866ed3db0SBarry Smith     idx    = a->j + a->i[i] + shift;
101966ed3db0SBarry Smith     v      = a->a + a->i[i] + shift;
102066ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
102166ed3db0SBarry Smith     alpha1 = x[8*i];
102266ed3db0SBarry Smith     alpha2 = x[8*i+1];
102366ed3db0SBarry Smith     alpha3 = x[8*i+2];
102466ed3db0SBarry Smith     alpha4 = x[8*i+3];
102566ed3db0SBarry Smith     alpha5 = x[8*i+4];
102666ed3db0SBarry Smith     alpha6 = x[8*i+5];
102766ed3db0SBarry Smith     alpha7 = x[8*i+6];
102866ed3db0SBarry Smith     alpha8 = x[8*i+7];
102966ed3db0SBarry Smith     while (n-->0) {
103066ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
103166ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
103266ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
103366ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
103466ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
103566ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
103666ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
103766ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
103866ed3db0SBarry Smith       idx++; v++;
103966ed3db0SBarry Smith     }
104066ed3db0SBarry Smith   }
104166ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1042*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1043*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
104466ed3db0SBarry Smith   PetscFunctionReturn(0);
104566ed3db0SBarry Smith }
104666ed3db0SBarry Smith 
104766ed3db0SBarry Smith #undef __FUNCT__
104866ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
104966ed3db0SBarry Smith int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
105066ed3db0SBarry Smith {
105166ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
105266ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
105387828ca2SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
105466ed3db0SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
105566ed3db0SBarry Smith   int           n,i,jrow,j;
105666ed3db0SBarry Smith 
105766ed3db0SBarry Smith   PetscFunctionBegin;
105866ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1059*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1060*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
106166ed3db0SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
106266ed3db0SBarry Smith   idx  = a->j;
106366ed3db0SBarry Smith   v    = a->a;
106466ed3db0SBarry Smith   ii   = a->i;
106566ed3db0SBarry Smith 
106666ed3db0SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
106766ed3db0SBarry Smith   idx  += shift;
106866ed3db0SBarry Smith   for (i=0; i<m; i++) {
106966ed3db0SBarry Smith     jrow = ii[i];
107066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
107166ed3db0SBarry Smith     sum1  = 0.0;
107266ed3db0SBarry Smith     sum2  = 0.0;
107366ed3db0SBarry Smith     sum3  = 0.0;
107466ed3db0SBarry Smith     sum4  = 0.0;
107566ed3db0SBarry Smith     sum5  = 0.0;
107666ed3db0SBarry Smith     sum6  = 0.0;
107766ed3db0SBarry Smith     sum7  = 0.0;
107866ed3db0SBarry Smith     sum8  = 0.0;
107966ed3db0SBarry Smith     for (j=0; j<n; j++) {
108066ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
108166ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
108266ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
108366ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
108466ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
108566ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
108666ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
108766ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
108866ed3db0SBarry Smith       jrow++;
108966ed3db0SBarry Smith      }
109066ed3db0SBarry Smith     y[8*i]   += sum1;
109166ed3db0SBarry Smith     y[8*i+1] += sum2;
109266ed3db0SBarry Smith     y[8*i+2] += sum3;
109366ed3db0SBarry Smith     y[8*i+3] += sum4;
109466ed3db0SBarry Smith     y[8*i+4] += sum5;
109566ed3db0SBarry Smith     y[8*i+5] += sum6;
109666ed3db0SBarry Smith     y[8*i+6] += sum7;
109766ed3db0SBarry Smith     y[8*i+7] += sum8;
109866ed3db0SBarry Smith   }
109966ed3db0SBarry Smith 
110066ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
1101*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1102*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
110366ed3db0SBarry Smith   PetscFunctionReturn(0);
110466ed3db0SBarry Smith }
110566ed3db0SBarry Smith 
110666ed3db0SBarry Smith #undef __FUNCT__
110766ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
110866ed3db0SBarry Smith int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
110966ed3db0SBarry Smith {
111066ed3db0SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
111166ed3db0SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
111287828ca2SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
111366ed3db0SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
111466ed3db0SBarry Smith 
111566ed3db0SBarry Smith   PetscFunctionBegin;
111666ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1117*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1118*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
111966ed3db0SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
112066ed3db0SBarry Smith   for (i=0; i<m; i++) {
112166ed3db0SBarry Smith     idx    = a->j + a->i[i] + shift;
112266ed3db0SBarry Smith     v      = a->a + a->i[i] + shift;
112366ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
112466ed3db0SBarry Smith     alpha1 = x[8*i];
112566ed3db0SBarry Smith     alpha2 = x[8*i+1];
112666ed3db0SBarry Smith     alpha3 = x[8*i+2];
112766ed3db0SBarry Smith     alpha4 = x[8*i+3];
112866ed3db0SBarry Smith     alpha5 = x[8*i+4];
112966ed3db0SBarry Smith     alpha6 = x[8*i+5];
113066ed3db0SBarry Smith     alpha7 = x[8*i+6];
113166ed3db0SBarry Smith     alpha8 = x[8*i+7];
113266ed3db0SBarry Smith     while (n-->0) {
113366ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
113466ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
113566ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
113666ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
113766ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
113866ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
113966ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
114066ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
114166ed3db0SBarry Smith       idx++; v++;
114266ed3db0SBarry Smith     }
114366ed3db0SBarry Smith   }
114466ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
1145*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1146*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1147*2f7816d4SBarry Smith   PetscFunctionReturn(0);
1148*2f7816d4SBarry Smith }
1149*2f7816d4SBarry Smith 
1150*2f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1151*2f7816d4SBarry Smith #undef __FUNCT__
1152*2f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
1153*2f7816d4SBarry Smith int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1154*2f7816d4SBarry Smith {
1155*2f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1156*2f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1157*2f7816d4SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1158*2f7816d4SBarry Smith   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1159*2f7816d4SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1160*2f7816d4SBarry Smith   int           n,i,jrow,j;
1161*2f7816d4SBarry Smith 
1162*2f7816d4SBarry Smith   PetscFunctionBegin;
1163*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1164*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1165*2f7816d4SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1166*2f7816d4SBarry Smith   idx  = a->j;
1167*2f7816d4SBarry Smith   v    = a->a;
1168*2f7816d4SBarry Smith   ii   = a->i;
1169*2f7816d4SBarry Smith 
1170*2f7816d4SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
1171*2f7816d4SBarry Smith   idx  += shift;
1172*2f7816d4SBarry Smith   for (i=0; i<m; i++) {
1173*2f7816d4SBarry Smith     jrow = ii[i];
1174*2f7816d4SBarry Smith     n    = ii[i+1] - jrow;
1175*2f7816d4SBarry Smith     sum1  = 0.0;
1176*2f7816d4SBarry Smith     sum2  = 0.0;
1177*2f7816d4SBarry Smith     sum3  = 0.0;
1178*2f7816d4SBarry Smith     sum4  = 0.0;
1179*2f7816d4SBarry Smith     sum5  = 0.0;
1180*2f7816d4SBarry Smith     sum6  = 0.0;
1181*2f7816d4SBarry Smith     sum7  = 0.0;
1182*2f7816d4SBarry Smith     sum8  = 0.0;
1183*2f7816d4SBarry Smith     sum9  = 0.0;
1184*2f7816d4SBarry Smith     sum10 = 0.0;
1185*2f7816d4SBarry Smith     sum11 = 0.0;
1186*2f7816d4SBarry Smith     sum12 = 0.0;
1187*2f7816d4SBarry Smith     sum13 = 0.0;
1188*2f7816d4SBarry Smith     sum14 = 0.0;
1189*2f7816d4SBarry Smith     sum15 = 0.0;
1190*2f7816d4SBarry Smith     sum16 = 0.0;
1191*2f7816d4SBarry Smith     for (j=0; j<n; j++) {
1192*2f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
1193*2f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
1194*2f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
1195*2f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
1196*2f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
1197*2f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
1198*2f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
1199*2f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
1200*2f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
1201*2f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
1202*2f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
1203*2f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
1204*2f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
1205*2f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
1206*2f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
1207*2f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
1208*2f7816d4SBarry Smith       jrow++;
1209*2f7816d4SBarry Smith      }
1210*2f7816d4SBarry Smith     y[16*i]    = sum1;
1211*2f7816d4SBarry Smith     y[16*i+1]  = sum2;
1212*2f7816d4SBarry Smith     y[16*i+2]  = sum3;
1213*2f7816d4SBarry Smith     y[16*i+3]  = sum4;
1214*2f7816d4SBarry Smith     y[16*i+4]  = sum5;
1215*2f7816d4SBarry Smith     y[16*i+5]  = sum6;
1216*2f7816d4SBarry Smith     y[16*i+6]  = sum7;
1217*2f7816d4SBarry Smith     y[16*i+7]  = sum8;
1218*2f7816d4SBarry Smith     y[16*i+8]  = sum9;
1219*2f7816d4SBarry Smith     y[16*i+9]  = sum10;
1220*2f7816d4SBarry Smith     y[16*i+10] = sum11;
1221*2f7816d4SBarry Smith     y[16*i+11] = sum12;
1222*2f7816d4SBarry Smith     y[16*i+12] = sum13;
1223*2f7816d4SBarry Smith     y[16*i+13] = sum14;
1224*2f7816d4SBarry Smith     y[16*i+14] = sum15;
1225*2f7816d4SBarry Smith     y[16*i+15] = sum16;
1226*2f7816d4SBarry Smith   }
1227*2f7816d4SBarry Smith 
1228*2f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*m);
1229*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1230*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1231*2f7816d4SBarry Smith   PetscFunctionReturn(0);
1232*2f7816d4SBarry Smith }
1233*2f7816d4SBarry Smith 
1234*2f7816d4SBarry Smith #undef __FUNCT__
1235*2f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1236*2f7816d4SBarry Smith int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1237*2f7816d4SBarry Smith {
1238*2f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1239*2f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1240*2f7816d4SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1241*2f7816d4SBarry Smith   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1242*2f7816d4SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1243*2f7816d4SBarry Smith 
1244*2f7816d4SBarry Smith   PetscFunctionBegin;
1245*2f7816d4SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1246*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1247*2f7816d4SBarry Smith   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1248*2f7816d4SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
1249*2f7816d4SBarry Smith   for (i=0; i<m; i++) {
1250*2f7816d4SBarry Smith     idx    = a->j + a->i[i] + shift;
1251*2f7816d4SBarry Smith     v      = a->a + a->i[i] + shift;
1252*2f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
1253*2f7816d4SBarry Smith     alpha1  = x[16*i];
1254*2f7816d4SBarry Smith     alpha2  = x[16*i+1];
1255*2f7816d4SBarry Smith     alpha3  = x[16*i+2];
1256*2f7816d4SBarry Smith     alpha4  = x[16*i+3];
1257*2f7816d4SBarry Smith     alpha5  = x[16*i+4];
1258*2f7816d4SBarry Smith     alpha6  = x[16*i+5];
1259*2f7816d4SBarry Smith     alpha7  = x[16*i+6];
1260*2f7816d4SBarry Smith     alpha8  = x[16*i+7];
1261*2f7816d4SBarry Smith     alpha9  = x[16*i+8];
1262*2f7816d4SBarry Smith     alpha10 = x[16*i+9];
1263*2f7816d4SBarry Smith     alpha11 = x[16*i+10];
1264*2f7816d4SBarry Smith     alpha12 = x[16*i+11];
1265*2f7816d4SBarry Smith     alpha13 = x[16*i+12];
1266*2f7816d4SBarry Smith     alpha14 = x[16*i+13];
1267*2f7816d4SBarry Smith     alpha15 = x[16*i+14];
1268*2f7816d4SBarry Smith     alpha16 = x[16*i+15];
1269*2f7816d4SBarry Smith     while (n-->0) {
1270*2f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
1271*2f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
1272*2f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
1273*2f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
1274*2f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
1275*2f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
1276*2f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
1277*2f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
1278*2f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
1279*2f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
1280*2f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
1281*2f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
1282*2f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
1283*2f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
1284*2f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
1285*2f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
1286*2f7816d4SBarry Smith       idx++; v++;
1287*2f7816d4SBarry Smith     }
1288*2f7816d4SBarry Smith   }
1289*2f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1290*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1291*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1292*2f7816d4SBarry Smith   PetscFunctionReturn(0);
1293*2f7816d4SBarry Smith }
1294*2f7816d4SBarry Smith 
1295*2f7816d4SBarry Smith #undef __FUNCT__
1296*2f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1297*2f7816d4SBarry Smith int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1298*2f7816d4SBarry Smith {
1299*2f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1300*2f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1301*2f7816d4SBarry Smith   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1302*2f7816d4SBarry Smith   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1303*2f7816d4SBarry Smith   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1304*2f7816d4SBarry Smith   int           n,i,jrow,j;
1305*2f7816d4SBarry Smith 
1306*2f7816d4SBarry Smith   PetscFunctionBegin;
1307*2f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1308*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1309*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1310*2f7816d4SBarry Smith   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1311*2f7816d4SBarry Smith   idx  = a->j;
1312*2f7816d4SBarry Smith   v    = a->a;
1313*2f7816d4SBarry Smith   ii   = a->i;
1314*2f7816d4SBarry Smith 
1315*2f7816d4SBarry Smith   v    += shift; /* shift for Fortran start by 1 indexing */
1316*2f7816d4SBarry Smith   idx  += shift;
1317*2f7816d4SBarry Smith   for (i=0; i<m; i++) {
1318*2f7816d4SBarry Smith     jrow = ii[i];
1319*2f7816d4SBarry Smith     n    = ii[i+1] - jrow;
1320*2f7816d4SBarry Smith     sum1  = 0.0;
1321*2f7816d4SBarry Smith     sum2  = 0.0;
1322*2f7816d4SBarry Smith     sum3  = 0.0;
1323*2f7816d4SBarry Smith     sum4  = 0.0;
1324*2f7816d4SBarry Smith     sum5  = 0.0;
1325*2f7816d4SBarry Smith     sum6  = 0.0;
1326*2f7816d4SBarry Smith     sum7  = 0.0;
1327*2f7816d4SBarry Smith     sum8  = 0.0;
1328*2f7816d4SBarry Smith     sum9  = 0.0;
1329*2f7816d4SBarry Smith     sum10 = 0.0;
1330*2f7816d4SBarry Smith     sum11 = 0.0;
1331*2f7816d4SBarry Smith     sum12 = 0.0;
1332*2f7816d4SBarry Smith     sum13 = 0.0;
1333*2f7816d4SBarry Smith     sum14 = 0.0;
1334*2f7816d4SBarry Smith     sum15 = 0.0;
1335*2f7816d4SBarry Smith     sum16 = 0.0;
1336*2f7816d4SBarry Smith     for (j=0; j<n; j++) {
1337*2f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
1338*2f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
1339*2f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
1340*2f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
1341*2f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
1342*2f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
1343*2f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
1344*2f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
1345*2f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
1346*2f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
1347*2f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
1348*2f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
1349*2f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
1350*2f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
1351*2f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
1352*2f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
1353*2f7816d4SBarry Smith       jrow++;
1354*2f7816d4SBarry Smith      }
1355*2f7816d4SBarry Smith     y[16*i]    += sum1;
1356*2f7816d4SBarry Smith     y[16*i+1]  += sum2;
1357*2f7816d4SBarry Smith     y[16*i+2]  += sum3;
1358*2f7816d4SBarry Smith     y[16*i+3]  += sum4;
1359*2f7816d4SBarry Smith     y[16*i+4]  += sum5;
1360*2f7816d4SBarry Smith     y[16*i+5]  += sum6;
1361*2f7816d4SBarry Smith     y[16*i+6]  += sum7;
1362*2f7816d4SBarry Smith     y[16*i+7]  += sum8;
1363*2f7816d4SBarry Smith     y[16*i+8]  += sum9;
1364*2f7816d4SBarry Smith     y[16*i+9]  += sum10;
1365*2f7816d4SBarry Smith     y[16*i+10] += sum11;
1366*2f7816d4SBarry Smith     y[16*i+11] += sum12;
1367*2f7816d4SBarry Smith     y[16*i+12] += sum13;
1368*2f7816d4SBarry Smith     y[16*i+13] += sum14;
1369*2f7816d4SBarry Smith     y[16*i+14] += sum15;
1370*2f7816d4SBarry Smith     y[16*i+15] += sum16;
1371*2f7816d4SBarry Smith   }
1372*2f7816d4SBarry Smith 
1373*2f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
1374*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1375*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1376*2f7816d4SBarry Smith   PetscFunctionReturn(0);
1377*2f7816d4SBarry Smith }
1378*2f7816d4SBarry Smith 
1379*2f7816d4SBarry Smith #undef __FUNCT__
1380*2f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1381*2f7816d4SBarry Smith int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1382*2f7816d4SBarry Smith {
1383*2f7816d4SBarry Smith   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1384*2f7816d4SBarry Smith   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1385*2f7816d4SBarry Smith   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1386*2f7816d4SBarry Smith   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1387*2f7816d4SBarry Smith   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1388*2f7816d4SBarry Smith 
1389*2f7816d4SBarry Smith   PetscFunctionBegin;
1390*2f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1391*2f7816d4SBarry Smith   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1392*2f7816d4SBarry Smith   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1393*2f7816d4SBarry Smith   y = y + shift; /* shift for Fortran start by 1 indexing */
1394*2f7816d4SBarry Smith   for (i=0; i<m; i++) {
1395*2f7816d4SBarry Smith     idx    = a->j + a->i[i] + shift;
1396*2f7816d4SBarry Smith     v      = a->a + a->i[i] + shift;
1397*2f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
1398*2f7816d4SBarry Smith     alpha1 = x[16*i];
1399*2f7816d4SBarry Smith     alpha2 = x[16*i+1];
1400*2f7816d4SBarry Smith     alpha3 = x[16*i+2];
1401*2f7816d4SBarry Smith     alpha4 = x[16*i+3];
1402*2f7816d4SBarry Smith     alpha5 = x[16*i+4];
1403*2f7816d4SBarry Smith     alpha6 = x[16*i+5];
1404*2f7816d4SBarry Smith     alpha7 = x[16*i+6];
1405*2f7816d4SBarry Smith     alpha8 = x[16*i+7];
1406*2f7816d4SBarry Smith     alpha9  = x[16*i+8];
1407*2f7816d4SBarry Smith     alpha10 = x[16*i+9];
1408*2f7816d4SBarry Smith     alpha11 = x[16*i+10];
1409*2f7816d4SBarry Smith     alpha12 = x[16*i+11];
1410*2f7816d4SBarry Smith     alpha13 = x[16*i+12];
1411*2f7816d4SBarry Smith     alpha14 = x[16*i+13];
1412*2f7816d4SBarry Smith     alpha15 = x[16*i+14];
1413*2f7816d4SBarry Smith     alpha16 = x[16*i+15];
1414*2f7816d4SBarry Smith     while (n-->0) {
1415*2f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
1416*2f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
1417*2f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
1418*2f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
1419*2f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
1420*2f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
1421*2f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
1422*2f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
1423*2f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
1424*2f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
1425*2f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
1426*2f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
1427*2f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
1428*2f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
1429*2f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
1430*2f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
1431*2f7816d4SBarry Smith       idx++; v++;
1432*2f7816d4SBarry Smith     }
1433*2f7816d4SBarry Smith   }
1434*2f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
1435*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1436*2f7816d4SBarry Smith   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
143766ed3db0SBarry Smith   PetscFunctionReturn(0);
143866ed3db0SBarry Smith }
143966ed3db0SBarry Smith 
1440f4a53059SBarry Smith /*===================================================================================*/
14414a2ae208SSatish Balay #undef __FUNCT__
14424a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1443f4a53059SBarry Smith int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1444f4a53059SBarry Smith {
14454cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1446f4a53059SBarry Smith   int         ierr;
1447f4a53059SBarry Smith   PetscFunctionBegin;
1448f4a53059SBarry Smith 
1449f4a53059SBarry Smith   /* start the scatter */
1450f4a53059SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
14514cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1452f4a53059SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
14534cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1454f4a53059SBarry Smith   PetscFunctionReturn(0);
1455f4a53059SBarry Smith }
1456f4a53059SBarry Smith 
14574a2ae208SSatish Balay #undef __FUNCT__
14584a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
14594cb9d9b8SBarry Smith int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
14604cb9d9b8SBarry Smith {
14614cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
14624cb9d9b8SBarry Smith   int         ierr;
14634cb9d9b8SBarry Smith   PetscFunctionBegin;
14644cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
14654cb9d9b8SBarry Smith   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
14664cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
14674cb9d9b8SBarry Smith   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
14684cb9d9b8SBarry Smith   PetscFunctionReturn(0);
14694cb9d9b8SBarry Smith }
14704cb9d9b8SBarry Smith 
14714a2ae208SSatish Balay #undef __FUNCT__
14724a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1473d72c5c08SBarry Smith int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
14744cb9d9b8SBarry Smith {
14754cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
14764cb9d9b8SBarry Smith   int         ierr;
14774cb9d9b8SBarry Smith   PetscFunctionBegin;
14784cb9d9b8SBarry Smith 
14794cb9d9b8SBarry Smith   /* start the scatter */
14804cb9d9b8SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1481d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
14824cb9d9b8SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1483d72c5c08SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
14844cb9d9b8SBarry Smith   PetscFunctionReturn(0);
14854cb9d9b8SBarry Smith }
14864cb9d9b8SBarry Smith 
14874a2ae208SSatish Balay #undef __FUNCT__
14884a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1489d72c5c08SBarry Smith int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
14904cb9d9b8SBarry Smith {
14914cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
14924cb9d9b8SBarry Smith   int         ierr;
14934cb9d9b8SBarry Smith   PetscFunctionBegin;
14944cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1495d72c5c08SBarry Smith   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1496d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1497d72c5c08SBarry Smith   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
14984cb9d9b8SBarry Smith   PetscFunctionReturn(0);
14994cb9d9b8SBarry Smith }
15004cb9d9b8SBarry Smith 
1501bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
15024a2ae208SSatish Balay #undef __FUNCT__
15034a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ"
1504cd3bbe55SBarry Smith int MatCreateMAIJ(Mat A,int dof,Mat *maij)
150582b1193eSBarry Smith {
1506f4a53059SBarry Smith   int         ierr,size,n;
15074cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
150882b1193eSBarry Smith   Mat         B;
150982b1193eSBarry Smith 
151082b1193eSBarry Smith   PetscFunctionBegin;
1511d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1512d72c5c08SBarry Smith 
1513d72c5c08SBarry Smith   if (dof == 1) {
1514d72c5c08SBarry Smith     *maij = A;
1515d72c5c08SBarry Smith   } else {
151683903688SBarry Smith     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1517cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
1518d72c5c08SBarry Smith 
1519f4a53059SBarry Smith     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1520f4a53059SBarry Smith     if (size == 1) {
1521b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
15224cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
1523b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
1524b9b97703SBarry Smith       b->dof = dof;
15254cb9d9b8SBarry Smith       b->AIJ = A;
1526d72c5c08SBarry Smith       if (dof == 2) {
1527cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
1528cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1529cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1530cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1531bcc973b7SBarry Smith       } else if (dof == 3) {
1532bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
1533bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1534bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1535bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1536bcc973b7SBarry Smith       } else if (dof == 4) {
1537bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
1538bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1539bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1540bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1541f9fae5adSBarry Smith       } else if (dof == 5) {
1542f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
1543f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1544f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1545f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
15466cd98798SBarry Smith       } else if (dof == 6) {
15476cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
15486cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
15496cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
15506cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
155166ed3db0SBarry Smith       } else if (dof == 8) {
155266ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
155366ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
155466ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
155566ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1556*2f7816d4SBarry Smith       } else if (dof == 16) {
1557*2f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
1558*2f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1559*2f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1560*2f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
156182b1193eSBarry Smith       } else {
156266ed3db0SBarry Smith         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
156382b1193eSBarry Smith       }
1564f4a53059SBarry Smith     } else {
1565f4a53059SBarry Smith       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1566f4a53059SBarry Smith       IS         from,to;
1567f4a53059SBarry Smith       Vec        gvec;
1568f4a53059SBarry Smith       int        *garray,i;
1569f4a53059SBarry Smith 
1570b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1571d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
1572b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
1573b9b97703SBarry Smith       b->dof = dof;
1574b9b97703SBarry Smith       b->A   = A;
15754cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
15764cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
15774cb9d9b8SBarry Smith 
1578f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1579f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1580f4a53059SBarry Smith 
1581f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
1582b0a32e0cSBarry Smith       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1583f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1584f4a53059SBarry Smith       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1585f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
1586f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1587f4a53059SBarry Smith 
1588f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
1589f4a53059SBarry Smith       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1590f4a53059SBarry Smith 
1591f4a53059SBarry Smith       /* generate the scatter context */
1592f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1593f4a53059SBarry Smith 
1594f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
1595f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
1596f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1597f4a53059SBarry Smith 
1598f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
15994cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
16004cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
16014cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1602f4a53059SBarry Smith     }
1603cd3bbe55SBarry Smith     *maij = B;
1604d72c5c08SBarry Smith   }
160582b1193eSBarry Smith   PetscFunctionReturn(0);
160682b1193eSBarry Smith }
160782b1193eSBarry Smith 
160882b1193eSBarry Smith 
160982b1193eSBarry Smith 
161082b1193eSBarry Smith 
161182b1193eSBarry Smith 
161282b1193eSBarry Smith 
161382b1193eSBarry Smith 
161482b1193eSBarry Smith 
161582b1193eSBarry Smith 
161682b1193eSBarry Smith 
161782b1193eSBarry Smith 
161882b1193eSBarry Smith 
1619