xref: /petsc/src/mat/impls/maij/maij.c (revision e32f2f54e699d0aa6e733466c00da7e34666fe5e)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
382b1193eSBarry Smith /*
4cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
5cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
6cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
7cd3bbe55SBarry Smith   independently.
8cd3bbe55SBarry Smith 
9cd3bbe55SBarry Smith      We provide:
10cd3bbe55SBarry Smith          MatMult()
11cd3bbe55SBarry Smith          MatMultTranspose()
12cd3bbe55SBarry Smith          MatMultTransposeAdd()
13cd3bbe55SBarry Smith          MatMultAdd()
14cd3bbe55SBarry Smith           and
15cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
16f4a53059SBarry Smith 
17f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1882b1193eSBarry Smith */
1982b1193eSBarry Smith 
20ff585edeSJed Brown #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/
217c4f633dSBarry Smith #include "../src/mat/utils/freespace.h"
221d8d5f9aSSatish Balay #include "private/vecimpl.h"
2382b1193eSBarry Smith 
244a2ae208SSatish Balay #undef __FUNCT__
254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
26ff585edeSJed Brown /*@C
27ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28ff585edeSJed Brown 
29ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30ff585edeSJed Brown 
31ff585edeSJed Brown    Input Parameter:
32ff585edeSJed Brown .  A - the MAIJ matrix
33ff585edeSJed Brown 
34ff585edeSJed Brown    Output Parameter:
35ff585edeSJed Brown .  B - the AIJ matrix
36ff585edeSJed Brown 
37ff585edeSJed Brown    Level: advanced
38ff585edeSJed Brown 
39ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40ff585edeSJed Brown 
41ff585edeSJed Brown .seealso: MatCreateMAIJ()
42ff585edeSJed Brown @*/
43be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
44b9b97703SBarry Smith {
45dfbe8321SBarry Smith   PetscErrorCode ierr;
46b9b97703SBarry Smith   PetscTruth     ismpimaij,isseqmaij;
47b9b97703SBarry Smith 
48b9b97703SBarry Smith   PetscFunctionBegin;
49b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
50b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
51b9b97703SBarry Smith   if (ismpimaij) {
52b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53b9b97703SBarry Smith 
54b9b97703SBarry Smith     *B = b->A;
55b9b97703SBarry Smith   } else if (isseqmaij) {
56b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57b9b97703SBarry Smith 
58b9b97703SBarry Smith     *B = b->AIJ;
59b9b97703SBarry Smith   } else {
60b9b97703SBarry Smith     *B = A;
61b9b97703SBarry Smith   }
62b9b97703SBarry Smith   PetscFunctionReturn(0);
63b9b97703SBarry Smith }
64b9b97703SBarry Smith 
654a2ae208SSatish Balay #undef __FUNCT__
664a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
67ff585edeSJed Brown /*@C
68ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69ff585edeSJed Brown 
70ff585edeSJed Brown    Collective
71ff585edeSJed Brown 
72ff585edeSJed Brown    Input Parameter:
73ff585edeSJed Brown +  A - the MAIJ matrix
74ff585edeSJed Brown -  dof - the block size for the new matrix
75ff585edeSJed Brown 
76ff585edeSJed Brown    Output Parameter:
77ff585edeSJed Brown .  B - the new MAIJ matrix
78ff585edeSJed Brown 
79ff585edeSJed Brown    Level: advanced
80ff585edeSJed Brown 
81ff585edeSJed Brown .seealso: MatCreateMAIJ()
82ff585edeSJed Brown @*/
83be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
84b9b97703SBarry Smith {
85dfbe8321SBarry Smith   PetscErrorCode ierr;
863b98c0a2SBarry Smith   Mat            Aij = PETSC_NULL;
87b9b97703SBarry Smith 
88b9b97703SBarry Smith   PetscFunctionBegin;
89b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91b9b97703SBarry Smith   PetscFunctionReturn(0);
92b9b97703SBarry Smith }
93b9b97703SBarry Smith 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9782b1193eSBarry Smith {
98dfbe8321SBarry Smith   PetscErrorCode ierr;
994cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
10082b1193eSBarry Smith 
10182b1193eSBarry Smith   PetscFunctionBegin;
102cd3bbe55SBarry Smith   if (b->AIJ) {
103cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
10482b1193eSBarry Smith   }
1054cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
1064cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1074cb9d9b8SBarry Smith }
1084cb9d9b8SBarry Smith 
1094a2ae208SSatish Balay #undef __FUNCT__
1100fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
1110fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1120fd73130SBarry Smith {
1130fd73130SBarry Smith   PetscErrorCode ierr;
1140fd73130SBarry Smith   Mat            B;
1150fd73130SBarry Smith 
1160fd73130SBarry Smith   PetscFunctionBegin;
117ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1180fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1190fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
1200fd73130SBarry Smith   PetscFunctionReturn(0);
1210fd73130SBarry Smith }
1220fd73130SBarry Smith 
1230fd73130SBarry Smith #undef __FUNCT__
1240fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
1250fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1260fd73130SBarry Smith {
1270fd73130SBarry Smith   PetscErrorCode ierr;
1280fd73130SBarry Smith   Mat            B;
1290fd73130SBarry Smith 
1300fd73130SBarry Smith   PetscFunctionBegin;
131ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
1320fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1330fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
1340fd73130SBarry Smith   PetscFunctionReturn(0);
1350fd73130SBarry Smith }
1360fd73130SBarry Smith 
1370fd73130SBarry Smith #undef __FUNCT__
1384a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
139dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1404cb9d9b8SBarry Smith {
141dfbe8321SBarry Smith   PetscErrorCode ierr;
1424cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1434cb9d9b8SBarry Smith 
1444cb9d9b8SBarry Smith   PetscFunctionBegin;
1454cb9d9b8SBarry Smith   if (b->AIJ) {
1464cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
1474cb9d9b8SBarry Smith   }
148f4a53059SBarry Smith   if (b->OAIJ) {
149f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
150f4a53059SBarry Smith   }
151b9b97703SBarry Smith   if (b->A) {
152b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
153b9b97703SBarry Smith   }
154f4a53059SBarry Smith   if (b->ctx) {
155f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
156f4a53059SBarry Smith   }
157f4a53059SBarry Smith   if (b->w) {
158f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
159f4a53059SBarry Smith   }
160cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
161dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
162b9b97703SBarry Smith   PetscFunctionReturn(0);
163b9b97703SBarry Smith }
164b9b97703SBarry Smith 
1650bad9183SKris Buschelman /*MC
166fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1670bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1680bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1690bad9183SKris Buschelman 
1700bad9183SKris Buschelman   Operations provided:
1710bad9183SKris Buschelman . MatMult
1720bad9183SKris Buschelman . MatMultTranspose
1730bad9183SKris Buschelman . MatMultAdd
1740bad9183SKris Buschelman . MatMultTransposeAdd
1750bad9183SKris Buschelman 
1760bad9183SKris Buschelman   Level: advanced
1770bad9183SKris Buschelman 
1780bad9183SKris Buschelman .seealso: MatCreateSeqDense
1790bad9183SKris Buschelman M*/
1800bad9183SKris Buschelman 
18182b1193eSBarry Smith EXTERN_C_BEGIN
1824a2ae208SSatish Balay #undef __FUNCT__
1834a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
184be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
18582b1193eSBarry Smith {
186dfbe8321SBarry Smith   PetscErrorCode ierr;
1874cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
18864b52464SHong Zhang   PetscMPIInt    size;
18982b1193eSBarry Smith 
19082b1193eSBarry Smith   PetscFunctionBegin;
19138f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
192b0a32e0cSBarry Smith   A->data  = (void*)b;
193cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
194cd3bbe55SBarry Smith   A->mapping          = 0;
195f4a53059SBarry Smith 
196cd3bbe55SBarry Smith   b->AIJ  = 0;
197cd3bbe55SBarry Smith   b->dof  = 0;
198f4a53059SBarry Smith   b->OAIJ = 0;
199f4a53059SBarry Smith   b->ctx  = 0;
200f4a53059SBarry Smith   b->w    = 0;
2017adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
20264b52464SHong Zhang   if (size == 1){
20364b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
20464b52464SHong Zhang   } else {
20564b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
20664b52464SHong Zhang   }
20782b1193eSBarry Smith   PetscFunctionReturn(0);
20882b1193eSBarry Smith }
20982b1193eSBarry Smith EXTERN_C_END
21082b1193eSBarry Smith 
211cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2124a2ae208SSatish Balay #undef __FUNCT__
213b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
214dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21582b1193eSBarry Smith {
2164cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
217bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
218d2840e09SBarry Smith   const PetscScalar *x,*v;
219d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
220dfbe8321SBarry Smith   PetscErrorCode    ierr;
221d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
222d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
22382b1193eSBarry Smith 
224bcc973b7SBarry Smith   PetscFunctionBegin;
225d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2261ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
227bcc973b7SBarry Smith   idx  = a->j;
228bcc973b7SBarry Smith   v    = a->a;
229bcc973b7SBarry Smith   ii   = a->i;
230bcc973b7SBarry Smith 
231bcc973b7SBarry Smith   for (i=0; i<m; i++) {
232bcc973b7SBarry Smith     jrow = ii[i];
233bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
234bcc973b7SBarry Smith     sum1  = 0.0;
235bcc973b7SBarry Smith     sum2  = 0.0;
236b3c51c66SMatthew Knepley     nonzerorow += (n>0);
237bcc973b7SBarry Smith     for (j=0; j<n; j++) {
238bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
239bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
240bcc973b7SBarry Smith       jrow++;
241bcc973b7SBarry Smith      }
242bcc973b7SBarry Smith     y[2*i]   = sum1;
243bcc973b7SBarry Smith     y[2*i+1] = sum2;
244bcc973b7SBarry Smith   }
245bcc973b7SBarry Smith 
246dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
247d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2481ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
24982b1193eSBarry Smith   PetscFunctionReturn(0);
25082b1193eSBarry Smith }
251bcc973b7SBarry Smith 
2524a2ae208SSatish Balay #undef __FUNCT__
253b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
254dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
25582b1193eSBarry Smith {
2564cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
257bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
258d2840e09SBarry Smith   const PetscScalar *x,*v;
259d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
260dfbe8321SBarry Smith   PetscErrorCode    ierr;
261d2840e09SBarry Smith   PetscInt          n,i;
262d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
26382b1193eSBarry Smith 
264bcc973b7SBarry Smith   PetscFunctionBegin;
265d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
266d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2671ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
268bfec09a0SHong Zhang 
269bcc973b7SBarry Smith   for (i=0; i<m; i++) {
270bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
271bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
272bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
273bcc973b7SBarry Smith     alpha1 = x[2*i];
274bcc973b7SBarry Smith     alpha2 = x[2*i+1];
275bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
276bcc973b7SBarry Smith   }
277dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
278d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28082b1193eSBarry Smith   PetscFunctionReturn(0);
28182b1193eSBarry Smith }
282bcc973b7SBarry Smith 
2834a2ae208SSatish Balay #undef __FUNCT__
284b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
285dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
28682b1193eSBarry Smith {
2874cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
288bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
289d2840e09SBarry Smith   const PetscScalar *x,*v;
290d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
291dfbe8321SBarry Smith   PetscErrorCode    ierr;
292b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
293d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
29482b1193eSBarry Smith 
295bcc973b7SBarry Smith   PetscFunctionBegin;
296f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
297d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
299bcc973b7SBarry Smith   idx  = a->j;
300bcc973b7SBarry Smith   v    = a->a;
301bcc973b7SBarry Smith   ii   = a->i;
302bcc973b7SBarry Smith 
303bcc973b7SBarry Smith   for (i=0; i<m; i++) {
304bcc973b7SBarry Smith     jrow = ii[i];
305bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
306bcc973b7SBarry Smith     sum1  = 0.0;
307bcc973b7SBarry Smith     sum2  = 0.0;
308bcc973b7SBarry Smith     for (j=0; j<n; j++) {
309bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
310bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
311bcc973b7SBarry Smith       jrow++;
312bcc973b7SBarry Smith      }
313bcc973b7SBarry Smith     y[2*i]   += sum1;
314bcc973b7SBarry Smith     y[2*i+1] += sum2;
315bcc973b7SBarry Smith   }
316bcc973b7SBarry Smith 
317dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
318d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
3191ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
320bcc973b7SBarry Smith   PetscFunctionReturn(0);
32182b1193eSBarry Smith }
3224a2ae208SSatish Balay #undef __FUNCT__
323b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
324dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
32582b1193eSBarry Smith {
3264cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
327bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
328d2840e09SBarry Smith   const PetscScalar *x,*v;
329d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
330dfbe8321SBarry Smith   PetscErrorCode    ierr;
331d2840e09SBarry Smith   PetscInt          n,i;
332d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
33382b1193eSBarry Smith 
334bcc973b7SBarry Smith   PetscFunctionBegin;
335f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
336d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
3371ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
338bfec09a0SHong Zhang 
339bcc973b7SBarry Smith   for (i=0; i<m; i++) {
340bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
341bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
342bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
343bcc973b7SBarry Smith     alpha1 = x[2*i];
344bcc973b7SBarry Smith     alpha2 = x[2*i+1];
345bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
346bcc973b7SBarry Smith   }
347dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
348d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
3491ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
350bcc973b7SBarry Smith   PetscFunctionReturn(0);
35182b1193eSBarry Smith }
352cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3534a2ae208SSatish Balay #undef __FUNCT__
354b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
355dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
356bcc973b7SBarry Smith {
3574cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
358bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
359d2840e09SBarry Smith   const PetscScalar *x,*v;
360d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
361dfbe8321SBarry Smith   PetscErrorCode    ierr;
362d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
363d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
36482b1193eSBarry Smith 
365bcc973b7SBarry Smith   PetscFunctionBegin;
366d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
3671ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
368bcc973b7SBarry Smith   idx  = a->j;
369bcc973b7SBarry Smith   v    = a->a;
370bcc973b7SBarry Smith   ii   = a->i;
371bcc973b7SBarry Smith 
372bcc973b7SBarry Smith   for (i=0; i<m; i++) {
373bcc973b7SBarry Smith     jrow = ii[i];
374bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
375bcc973b7SBarry Smith     sum1  = 0.0;
376bcc973b7SBarry Smith     sum2  = 0.0;
377bcc973b7SBarry Smith     sum3  = 0.0;
378b3c51c66SMatthew Knepley     nonzerorow += (n>0);
379bcc973b7SBarry Smith     for (j=0; j<n; j++) {
380bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
381bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
382bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
383bcc973b7SBarry Smith       jrow++;
384bcc973b7SBarry Smith      }
385bcc973b7SBarry Smith     y[3*i]   = sum1;
386bcc973b7SBarry Smith     y[3*i+1] = sum2;
387bcc973b7SBarry Smith     y[3*i+2] = sum3;
388bcc973b7SBarry Smith   }
389bcc973b7SBarry Smith 
390dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
391d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
3921ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
393bcc973b7SBarry Smith   PetscFunctionReturn(0);
394bcc973b7SBarry Smith }
395bcc973b7SBarry Smith 
3964a2ae208SSatish Balay #undef __FUNCT__
397b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
398dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
399bcc973b7SBarry Smith {
4004cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
401bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
402d2840e09SBarry Smith   const PetscScalar *x,*v;
403d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
404dfbe8321SBarry Smith   PetscErrorCode    ierr;
405d2840e09SBarry Smith   PetscInt          n,i;
406d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
407bcc973b7SBarry Smith 
408bcc973b7SBarry Smith   PetscFunctionBegin;
409d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
410d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
4111ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
412bfec09a0SHong Zhang 
413bcc973b7SBarry Smith   for (i=0; i<m; i++) {
414bfec09a0SHong Zhang     idx    = a->j + a->i[i];
415bfec09a0SHong Zhang     v      = a->a + a->i[i];
416bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
417bcc973b7SBarry Smith     alpha1 = x[3*i];
418bcc973b7SBarry Smith     alpha2 = x[3*i+1];
419bcc973b7SBarry Smith     alpha3 = x[3*i+2];
420bcc973b7SBarry Smith     while (n-->0) {
421bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
422bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
423bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
424bcc973b7SBarry Smith       idx++; v++;
425bcc973b7SBarry Smith     }
426bcc973b7SBarry Smith   }
427dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
428d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
4291ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
430bcc973b7SBarry Smith   PetscFunctionReturn(0);
431bcc973b7SBarry Smith }
432bcc973b7SBarry Smith 
4334a2ae208SSatish Balay #undef __FUNCT__
434b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
435dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436bcc973b7SBarry Smith {
4374cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
438bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
439d2840e09SBarry Smith   const PetscScalar *x,*v;
440d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
441dfbe8321SBarry Smith   PetscErrorCode    ierr;
442b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
443d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
444bcc973b7SBarry Smith 
445bcc973b7SBarry Smith   PetscFunctionBegin;
446f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
447d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
4481ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
449bcc973b7SBarry Smith   idx  = a->j;
450bcc973b7SBarry Smith   v    = a->a;
451bcc973b7SBarry Smith   ii   = a->i;
452bcc973b7SBarry Smith 
453bcc973b7SBarry Smith   for (i=0; i<m; i++) {
454bcc973b7SBarry Smith     jrow = ii[i];
455bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
456bcc973b7SBarry Smith     sum1  = 0.0;
457bcc973b7SBarry Smith     sum2  = 0.0;
458bcc973b7SBarry Smith     sum3  = 0.0;
459bcc973b7SBarry Smith     for (j=0; j<n; j++) {
460bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
461bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
462bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
463bcc973b7SBarry Smith       jrow++;
464bcc973b7SBarry Smith      }
465bcc973b7SBarry Smith     y[3*i]   += sum1;
466bcc973b7SBarry Smith     y[3*i+1] += sum2;
467bcc973b7SBarry Smith     y[3*i+2] += sum3;
468bcc973b7SBarry Smith   }
469bcc973b7SBarry Smith 
470dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
471d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
4721ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
473bcc973b7SBarry Smith   PetscFunctionReturn(0);
474bcc973b7SBarry Smith }
4754a2ae208SSatish Balay #undef __FUNCT__
476b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
477dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
478bcc973b7SBarry Smith {
4794cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
480bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
481d2840e09SBarry Smith   const PetscScalar *x,*v;
482d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
483dfbe8321SBarry Smith   PetscErrorCode    ierr;
484d2840e09SBarry Smith   PetscInt          n,i;
485d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
486bcc973b7SBarry Smith 
487bcc973b7SBarry Smith   PetscFunctionBegin;
488f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
489d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
4901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
491bcc973b7SBarry Smith   for (i=0; i<m; i++) {
492bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
493bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
494bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
495bcc973b7SBarry Smith     alpha1 = x[3*i];
496bcc973b7SBarry Smith     alpha2 = x[3*i+1];
497bcc973b7SBarry Smith     alpha3 = x[3*i+2];
498bcc973b7SBarry Smith     while (n-->0) {
499bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
500bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
501bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
502bcc973b7SBarry Smith       idx++; v++;
503bcc973b7SBarry Smith     }
504bcc973b7SBarry Smith   }
505dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
506d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
5071ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
508bcc973b7SBarry Smith   PetscFunctionReturn(0);
509bcc973b7SBarry Smith }
510bcc973b7SBarry Smith 
511bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
5124a2ae208SSatish Balay #undef __FUNCT__
513b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
514dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515bcc973b7SBarry Smith {
5164cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
517bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
518d2840e09SBarry Smith   const PetscScalar *x,*v;
519d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
520dfbe8321SBarry Smith   PetscErrorCode    ierr;
521d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
522d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
523bcc973b7SBarry Smith 
524bcc973b7SBarry Smith   PetscFunctionBegin;
525d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
5261ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527bcc973b7SBarry Smith   idx  = a->j;
528bcc973b7SBarry Smith   v    = a->a;
529bcc973b7SBarry Smith   ii   = a->i;
530bcc973b7SBarry Smith 
531bcc973b7SBarry Smith   for (i=0; i<m; i++) {
532bcc973b7SBarry Smith     jrow = ii[i];
533bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
534bcc973b7SBarry Smith     sum1  = 0.0;
535bcc973b7SBarry Smith     sum2  = 0.0;
536bcc973b7SBarry Smith     sum3  = 0.0;
537bcc973b7SBarry Smith     sum4  = 0.0;
538b3c51c66SMatthew Knepley     nonzerorow += (n>0);
539bcc973b7SBarry Smith     for (j=0; j<n; j++) {
540bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
541bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
542bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
543bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
544bcc973b7SBarry Smith       jrow++;
545bcc973b7SBarry Smith      }
546bcc973b7SBarry Smith     y[4*i]   = sum1;
547bcc973b7SBarry Smith     y[4*i+1] = sum2;
548bcc973b7SBarry Smith     y[4*i+2] = sum3;
549bcc973b7SBarry Smith     y[4*i+3] = sum4;
550bcc973b7SBarry Smith   }
551bcc973b7SBarry Smith 
552dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
553d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
5541ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
555bcc973b7SBarry Smith   PetscFunctionReturn(0);
556bcc973b7SBarry Smith }
557bcc973b7SBarry Smith 
5584a2ae208SSatish Balay #undef __FUNCT__
559b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
560dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
561bcc973b7SBarry Smith {
5624cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
563bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
564d2840e09SBarry Smith   const PetscScalar *x,*v;
565d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
566dfbe8321SBarry Smith   PetscErrorCode    ierr;
567d2840e09SBarry Smith   PetscInt          n,i;
568d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
569bcc973b7SBarry Smith 
570bcc973b7SBarry Smith   PetscFunctionBegin;
571d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
572d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
5731ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
574bcc973b7SBarry Smith   for (i=0; i<m; i++) {
575bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
576bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
577bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
578bcc973b7SBarry Smith     alpha1 = x[4*i];
579bcc973b7SBarry Smith     alpha2 = x[4*i+1];
580bcc973b7SBarry Smith     alpha3 = x[4*i+2];
581bcc973b7SBarry Smith     alpha4 = x[4*i+3];
582bcc973b7SBarry Smith     while (n-->0) {
583bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
584bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
585bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
586bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
587bcc973b7SBarry Smith       idx++; v++;
588bcc973b7SBarry Smith     }
589bcc973b7SBarry Smith   }
590dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
591d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
5921ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
593bcc973b7SBarry Smith   PetscFunctionReturn(0);
594bcc973b7SBarry Smith }
595bcc973b7SBarry Smith 
5964a2ae208SSatish Balay #undef __FUNCT__
597b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
598dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
599bcc973b7SBarry Smith {
6004cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
601f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
602d2840e09SBarry Smith   const PetscScalar *x,*v;
603d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
604dfbe8321SBarry Smith   PetscErrorCode    ierr;
605b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
606d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
607f9fae5adSBarry Smith 
608f9fae5adSBarry Smith   PetscFunctionBegin;
609f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
610d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
6111ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
612f9fae5adSBarry Smith   idx  = a->j;
613f9fae5adSBarry Smith   v    = a->a;
614f9fae5adSBarry Smith   ii   = a->i;
615f9fae5adSBarry Smith 
616f9fae5adSBarry Smith   for (i=0; i<m; i++) {
617f9fae5adSBarry Smith     jrow = ii[i];
618f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
619f9fae5adSBarry Smith     sum1  = 0.0;
620f9fae5adSBarry Smith     sum2  = 0.0;
621f9fae5adSBarry Smith     sum3  = 0.0;
622f9fae5adSBarry Smith     sum4  = 0.0;
623f9fae5adSBarry Smith     for (j=0; j<n; j++) {
624f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
625f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
626f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
627f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
628f9fae5adSBarry Smith       jrow++;
629f9fae5adSBarry Smith      }
630f9fae5adSBarry Smith     y[4*i]   += sum1;
631f9fae5adSBarry Smith     y[4*i+1] += sum2;
632f9fae5adSBarry Smith     y[4*i+2] += sum3;
633f9fae5adSBarry Smith     y[4*i+3] += sum4;
634f9fae5adSBarry Smith   }
635f9fae5adSBarry Smith 
636dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
637d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
6381ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
639f9fae5adSBarry Smith   PetscFunctionReturn(0);
640bcc973b7SBarry Smith }
6414a2ae208SSatish Balay #undef __FUNCT__
642b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
643dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
644bcc973b7SBarry Smith {
6454cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
646f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
647d2840e09SBarry Smith   const PetscScalar *x,*v;
648d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
649dfbe8321SBarry Smith   PetscErrorCode    ierr;
650d2840e09SBarry Smith   PetscInt          n,i;
651d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
652f9fae5adSBarry Smith 
653f9fae5adSBarry Smith   PetscFunctionBegin;
654f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
655d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
6561ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
657bfec09a0SHong Zhang 
658f9fae5adSBarry Smith   for (i=0; i<m; i++) {
659bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
660bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
661f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
662f9fae5adSBarry Smith     alpha1 = x[4*i];
663f9fae5adSBarry Smith     alpha2 = x[4*i+1];
664f9fae5adSBarry Smith     alpha3 = x[4*i+2];
665f9fae5adSBarry Smith     alpha4 = x[4*i+3];
666f9fae5adSBarry Smith     while (n-->0) {
667f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
668f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
669f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
670f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
671f9fae5adSBarry Smith       idx++; v++;
672f9fae5adSBarry Smith     }
673f9fae5adSBarry Smith   }
674dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
675d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
6761ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
677f9fae5adSBarry Smith   PetscFunctionReturn(0);
678f9fae5adSBarry Smith }
679f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6806cd98798SBarry Smith 
6814a2ae208SSatish Balay #undef __FUNCT__
682b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
683dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
684f9fae5adSBarry Smith {
6854cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
686f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
687d2840e09SBarry Smith   const PetscScalar *x,*v;
688d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
689dfbe8321SBarry Smith   PetscErrorCode    ierr;
690d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
691d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
692f9fae5adSBarry Smith 
693f9fae5adSBarry Smith   PetscFunctionBegin;
694d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
6951ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
696f9fae5adSBarry Smith   idx  = a->j;
697f9fae5adSBarry Smith   v    = a->a;
698f9fae5adSBarry Smith   ii   = a->i;
699f9fae5adSBarry Smith 
700f9fae5adSBarry Smith   for (i=0; i<m; i++) {
701f9fae5adSBarry Smith     jrow = ii[i];
702f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
703f9fae5adSBarry Smith     sum1  = 0.0;
704f9fae5adSBarry Smith     sum2  = 0.0;
705f9fae5adSBarry Smith     sum3  = 0.0;
706f9fae5adSBarry Smith     sum4  = 0.0;
707f9fae5adSBarry Smith     sum5  = 0.0;
708b3c51c66SMatthew Knepley     nonzerorow += (n>0);
709f9fae5adSBarry Smith     for (j=0; j<n; j++) {
710f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
711f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
712f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
713f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
714f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
715f9fae5adSBarry Smith       jrow++;
716f9fae5adSBarry Smith      }
717f9fae5adSBarry Smith     y[5*i]   = sum1;
718f9fae5adSBarry Smith     y[5*i+1] = sum2;
719f9fae5adSBarry Smith     y[5*i+2] = sum3;
720f9fae5adSBarry Smith     y[5*i+3] = sum4;
721f9fae5adSBarry Smith     y[5*i+4] = sum5;
722f9fae5adSBarry Smith   }
723f9fae5adSBarry Smith 
724dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
725d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
7261ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
727f9fae5adSBarry Smith   PetscFunctionReturn(0);
728f9fae5adSBarry Smith }
729f9fae5adSBarry Smith 
7304a2ae208SSatish Balay #undef __FUNCT__
731b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
732dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
733f9fae5adSBarry Smith {
7344cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
735f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
736d2840e09SBarry Smith   const PetscScalar *x,*v;
737d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
738dfbe8321SBarry Smith   PetscErrorCode    ierr;
739d2840e09SBarry Smith   PetscInt          n,i;
740d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
741f9fae5adSBarry Smith 
742f9fae5adSBarry Smith   PetscFunctionBegin;
743d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
744d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
7451ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
746bfec09a0SHong Zhang 
747f9fae5adSBarry Smith   for (i=0; i<m; i++) {
748bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
749bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
750f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
751f9fae5adSBarry Smith     alpha1 = x[5*i];
752f9fae5adSBarry Smith     alpha2 = x[5*i+1];
753f9fae5adSBarry Smith     alpha3 = x[5*i+2];
754f9fae5adSBarry Smith     alpha4 = x[5*i+3];
755f9fae5adSBarry Smith     alpha5 = x[5*i+4];
756f9fae5adSBarry Smith     while (n-->0) {
757f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
758f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
759f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
760f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
761f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
762f9fae5adSBarry Smith       idx++; v++;
763f9fae5adSBarry Smith     }
764f9fae5adSBarry Smith   }
765dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
766d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
7671ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
768f9fae5adSBarry Smith   PetscFunctionReturn(0);
769f9fae5adSBarry Smith }
770f9fae5adSBarry Smith 
7714a2ae208SSatish Balay #undef __FUNCT__
772b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
773dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
774f9fae5adSBarry Smith {
7754cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
776f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
777d2840e09SBarry Smith   const PetscScalar *x,*v;
778d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
779dfbe8321SBarry Smith   PetscErrorCode    ierr;
780b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
781d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
782f9fae5adSBarry Smith 
783f9fae5adSBarry Smith   PetscFunctionBegin;
784f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
785d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
7861ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
787f9fae5adSBarry Smith   idx  = a->j;
788f9fae5adSBarry Smith   v    = a->a;
789f9fae5adSBarry Smith   ii   = a->i;
790f9fae5adSBarry Smith 
791f9fae5adSBarry Smith   for (i=0; i<m; i++) {
792f9fae5adSBarry Smith     jrow = ii[i];
793f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
794f9fae5adSBarry Smith     sum1  = 0.0;
795f9fae5adSBarry Smith     sum2  = 0.0;
796f9fae5adSBarry Smith     sum3  = 0.0;
797f9fae5adSBarry Smith     sum4  = 0.0;
798f9fae5adSBarry Smith     sum5  = 0.0;
799f9fae5adSBarry Smith     for (j=0; j<n; j++) {
800f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
801f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
802f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
803f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
804f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
805f9fae5adSBarry Smith       jrow++;
806f9fae5adSBarry Smith      }
807f9fae5adSBarry Smith     y[5*i]   += sum1;
808f9fae5adSBarry Smith     y[5*i+1] += sum2;
809f9fae5adSBarry Smith     y[5*i+2] += sum3;
810f9fae5adSBarry Smith     y[5*i+3] += sum4;
811f9fae5adSBarry Smith     y[5*i+4] += sum5;
812f9fae5adSBarry Smith   }
813f9fae5adSBarry Smith 
814dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
815d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
8161ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
817f9fae5adSBarry Smith   PetscFunctionReturn(0);
818f9fae5adSBarry Smith }
819f9fae5adSBarry Smith 
8204a2ae208SSatish Balay #undef __FUNCT__
821b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
822dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
823f9fae5adSBarry Smith {
8244cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
825f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
826d2840e09SBarry Smith   const PetscScalar *x,*v;
827d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
828dfbe8321SBarry Smith   PetscErrorCode    ierr;
829d2840e09SBarry Smith   PetscInt          n,i;
830d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
831f9fae5adSBarry Smith 
832f9fae5adSBarry Smith   PetscFunctionBegin;
833f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
834d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
8351ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
836bfec09a0SHong Zhang 
837f9fae5adSBarry Smith   for (i=0; i<m; i++) {
838bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
839bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
840f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
841f9fae5adSBarry Smith     alpha1 = x[5*i];
842f9fae5adSBarry Smith     alpha2 = x[5*i+1];
843f9fae5adSBarry Smith     alpha3 = x[5*i+2];
844f9fae5adSBarry Smith     alpha4 = x[5*i+3];
845f9fae5adSBarry Smith     alpha5 = x[5*i+4];
846f9fae5adSBarry Smith     while (n-->0) {
847f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
848f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
849f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
850f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
851f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
852f9fae5adSBarry Smith       idx++; v++;
853f9fae5adSBarry Smith     }
854f9fae5adSBarry Smith   }
855dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
856d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
8571ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
858f9fae5adSBarry Smith   PetscFunctionReturn(0);
859bcc973b7SBarry Smith }
860bcc973b7SBarry Smith 
8616cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8624a2ae208SSatish Balay #undef __FUNCT__
863b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
864dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8656cd98798SBarry Smith {
8666cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8676cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
868d2840e09SBarry Smith   const PetscScalar *x,*v;
869d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
870dfbe8321SBarry Smith   PetscErrorCode    ierr;
871d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
872d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
8736cd98798SBarry Smith 
8746cd98798SBarry Smith   PetscFunctionBegin;
875d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
8761ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8776cd98798SBarry Smith   idx  = a->j;
8786cd98798SBarry Smith   v    = a->a;
8796cd98798SBarry Smith   ii   = a->i;
8806cd98798SBarry Smith 
8816cd98798SBarry Smith   for (i=0; i<m; i++) {
8826cd98798SBarry Smith     jrow = ii[i];
8836cd98798SBarry Smith     n    = ii[i+1] - jrow;
8846cd98798SBarry Smith     sum1  = 0.0;
8856cd98798SBarry Smith     sum2  = 0.0;
8866cd98798SBarry Smith     sum3  = 0.0;
8876cd98798SBarry Smith     sum4  = 0.0;
8886cd98798SBarry Smith     sum5  = 0.0;
8896cd98798SBarry Smith     sum6  = 0.0;
890b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8916cd98798SBarry Smith     for (j=0; j<n; j++) {
8926cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8936cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8946cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8956cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8966cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8976cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8986cd98798SBarry Smith       jrow++;
8996cd98798SBarry Smith      }
9006cd98798SBarry Smith     y[6*i]   = sum1;
9016cd98798SBarry Smith     y[6*i+1] = sum2;
9026cd98798SBarry Smith     y[6*i+2] = sum3;
9036cd98798SBarry Smith     y[6*i+3] = sum4;
9046cd98798SBarry Smith     y[6*i+4] = sum5;
9056cd98798SBarry Smith     y[6*i+5] = sum6;
9066cd98798SBarry Smith   }
9076cd98798SBarry Smith 
908dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
909d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
9101ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9116cd98798SBarry Smith   PetscFunctionReturn(0);
9126cd98798SBarry Smith }
9136cd98798SBarry Smith 
9144a2ae208SSatish Balay #undef __FUNCT__
915b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
916dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
9176cd98798SBarry Smith {
9186cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9196cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
920d2840e09SBarry Smith   const PetscScalar *x,*v;
921d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
922dfbe8321SBarry Smith   PetscErrorCode    ierr;
923d2840e09SBarry Smith   PetscInt          n,i;
924d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9256cd98798SBarry Smith 
9266cd98798SBarry Smith   PetscFunctionBegin;
927d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
928d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
9291ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
930bfec09a0SHong Zhang 
9316cd98798SBarry Smith   for (i=0; i<m; i++) {
932bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
933bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9346cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9356cd98798SBarry Smith     alpha1 = x[6*i];
9366cd98798SBarry Smith     alpha2 = x[6*i+1];
9376cd98798SBarry Smith     alpha3 = x[6*i+2];
9386cd98798SBarry Smith     alpha4 = x[6*i+3];
9396cd98798SBarry Smith     alpha5 = x[6*i+4];
9406cd98798SBarry Smith     alpha6 = x[6*i+5];
9416cd98798SBarry Smith     while (n-->0) {
9426cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9436cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9446cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9456cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9466cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9476cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9486cd98798SBarry Smith       idx++; v++;
9496cd98798SBarry Smith     }
9506cd98798SBarry Smith   }
951dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
952d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
9531ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9546cd98798SBarry Smith   PetscFunctionReturn(0);
9556cd98798SBarry Smith }
9566cd98798SBarry Smith 
9574a2ae208SSatish Balay #undef __FUNCT__
958b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
959dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9606cd98798SBarry Smith {
9616cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9626cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
963d2840e09SBarry Smith   const PetscScalar *x,*v;
964d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
965dfbe8321SBarry Smith   PetscErrorCode    ierr;
966b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
967d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9686cd98798SBarry Smith 
9696cd98798SBarry Smith   PetscFunctionBegin;
9706cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
971d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
9721ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9736cd98798SBarry Smith   idx  = a->j;
9746cd98798SBarry Smith   v    = a->a;
9756cd98798SBarry Smith   ii   = a->i;
9766cd98798SBarry Smith 
9776cd98798SBarry Smith   for (i=0; i<m; i++) {
9786cd98798SBarry Smith     jrow = ii[i];
9796cd98798SBarry Smith     n    = ii[i+1] - jrow;
9806cd98798SBarry Smith     sum1  = 0.0;
9816cd98798SBarry Smith     sum2  = 0.0;
9826cd98798SBarry Smith     sum3  = 0.0;
9836cd98798SBarry Smith     sum4  = 0.0;
9846cd98798SBarry Smith     sum5  = 0.0;
9856cd98798SBarry Smith     sum6  = 0.0;
9866cd98798SBarry Smith     for (j=0; j<n; j++) {
9876cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9886cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9896cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9906cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9916cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9926cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9936cd98798SBarry Smith       jrow++;
9946cd98798SBarry Smith      }
9956cd98798SBarry Smith     y[6*i]   += sum1;
9966cd98798SBarry Smith     y[6*i+1] += sum2;
9976cd98798SBarry Smith     y[6*i+2] += sum3;
9986cd98798SBarry Smith     y[6*i+3] += sum4;
9996cd98798SBarry Smith     y[6*i+4] += sum5;
10006cd98798SBarry Smith     y[6*i+5] += sum6;
10016cd98798SBarry Smith   }
10026cd98798SBarry Smith 
1003dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1004d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
10051ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10066cd98798SBarry Smith   PetscFunctionReturn(0);
10076cd98798SBarry Smith }
10086cd98798SBarry Smith 
10094a2ae208SSatish Balay #undef __FUNCT__
1010b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1011dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
10126cd98798SBarry Smith {
10136cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
10146cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015d2840e09SBarry Smith   const PetscScalar *x,*v;
1016d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1017dfbe8321SBarry Smith   PetscErrorCode    ierr;
1018d2840e09SBarry Smith   PetscInt          n,i;
1019d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
10206cd98798SBarry Smith 
10216cd98798SBarry Smith   PetscFunctionBegin;
10226cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1023d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
10241ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1025bfec09a0SHong Zhang 
10266cd98798SBarry Smith   for (i=0; i<m; i++) {
1027bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1028bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
10296cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
10306cd98798SBarry Smith     alpha1 = x[6*i];
10316cd98798SBarry Smith     alpha2 = x[6*i+1];
10326cd98798SBarry Smith     alpha3 = x[6*i+2];
10336cd98798SBarry Smith     alpha4 = x[6*i+3];
10346cd98798SBarry Smith     alpha5 = x[6*i+4];
10356cd98798SBarry Smith     alpha6 = x[6*i+5];
10366cd98798SBarry Smith     while (n-->0) {
10376cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10386cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10396cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10406cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10416cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10426cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10436cd98798SBarry Smith       idx++; v++;
10446cd98798SBarry Smith     }
10456cd98798SBarry Smith   }
1046dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1047d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
10481ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10496cd98798SBarry Smith   PetscFunctionReturn(0);
10506cd98798SBarry Smith }
10516cd98798SBarry Smith 
105266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
105366ed3db0SBarry Smith #undef __FUNCT__
1054ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
1055ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1056ed8eea03SBarry Smith {
1057ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1058ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1059d2840e09SBarry Smith   const PetscScalar *x,*v;
1060d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1061ed8eea03SBarry Smith   PetscErrorCode    ierr;
1062d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1063d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1064ed8eea03SBarry Smith 
1065ed8eea03SBarry Smith   PetscFunctionBegin;
1066d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1067ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1068ed8eea03SBarry Smith   idx  = a->j;
1069ed8eea03SBarry Smith   v    = a->a;
1070ed8eea03SBarry Smith   ii   = a->i;
1071ed8eea03SBarry Smith 
1072ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1073ed8eea03SBarry Smith     jrow = ii[i];
1074ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1075ed8eea03SBarry Smith     sum1  = 0.0;
1076ed8eea03SBarry Smith     sum2  = 0.0;
1077ed8eea03SBarry Smith     sum3  = 0.0;
1078ed8eea03SBarry Smith     sum4  = 0.0;
1079ed8eea03SBarry Smith     sum5  = 0.0;
1080ed8eea03SBarry Smith     sum6  = 0.0;
1081ed8eea03SBarry Smith     sum7  = 0.0;
1082b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1083ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1084ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1085ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1086ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1087ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1088ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1089ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1090ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1091ed8eea03SBarry Smith       jrow++;
1092ed8eea03SBarry Smith      }
1093ed8eea03SBarry Smith     y[7*i]   = sum1;
1094ed8eea03SBarry Smith     y[7*i+1] = sum2;
1095ed8eea03SBarry Smith     y[7*i+2] = sum3;
1096ed8eea03SBarry Smith     y[7*i+3] = sum4;
1097ed8eea03SBarry Smith     y[7*i+4] = sum5;
1098ed8eea03SBarry Smith     y[7*i+5] = sum6;
1099ed8eea03SBarry Smith     y[7*i+6] = sum7;
1100ed8eea03SBarry Smith   }
1101ed8eea03SBarry Smith 
1102dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1103d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1104ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1105ed8eea03SBarry Smith   PetscFunctionReturn(0);
1106ed8eea03SBarry Smith }
1107ed8eea03SBarry Smith 
1108ed8eea03SBarry Smith #undef __FUNCT__
1109ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1110ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1111ed8eea03SBarry Smith {
1112ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1113ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1114d2840e09SBarry Smith   const PetscScalar *x,*v;
1115d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1116ed8eea03SBarry Smith   PetscErrorCode    ierr;
1117d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1118d2840e09SBarry Smith   PetscInt          n,i;
1119ed8eea03SBarry Smith 
1120ed8eea03SBarry Smith   PetscFunctionBegin;
1121d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1122d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1123ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1124ed8eea03SBarry Smith 
1125ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1126ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1127ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1128ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1129ed8eea03SBarry Smith     alpha1 = x[7*i];
1130ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1131ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1132ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1133ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1134ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1135ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1136ed8eea03SBarry Smith     while (n-->0) {
1137ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1138ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1139ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1140ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1141ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1142ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1143ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1144ed8eea03SBarry Smith       idx++; v++;
1145ed8eea03SBarry Smith     }
1146ed8eea03SBarry Smith   }
1147dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1148d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1149ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1150ed8eea03SBarry Smith   PetscFunctionReturn(0);
1151ed8eea03SBarry Smith }
1152ed8eea03SBarry Smith 
1153ed8eea03SBarry Smith #undef __FUNCT__
1154ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1155ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1156ed8eea03SBarry Smith {
1157ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1158ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1159d2840e09SBarry Smith   const PetscScalar *x,*v;
1160d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1161ed8eea03SBarry Smith   PetscErrorCode    ierr;
1162d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1163ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1164ed8eea03SBarry Smith 
1165ed8eea03SBarry Smith   PetscFunctionBegin;
1166ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1167d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1168ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1169ed8eea03SBarry Smith   idx  = a->j;
1170ed8eea03SBarry Smith   v    = a->a;
1171ed8eea03SBarry Smith   ii   = a->i;
1172ed8eea03SBarry Smith 
1173ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1174ed8eea03SBarry Smith     jrow = ii[i];
1175ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1176ed8eea03SBarry Smith     sum1  = 0.0;
1177ed8eea03SBarry Smith     sum2  = 0.0;
1178ed8eea03SBarry Smith     sum3  = 0.0;
1179ed8eea03SBarry Smith     sum4  = 0.0;
1180ed8eea03SBarry Smith     sum5  = 0.0;
1181ed8eea03SBarry Smith     sum6  = 0.0;
1182ed8eea03SBarry Smith     sum7  = 0.0;
1183ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1184ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1185ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1186ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1187ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1188ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1189ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1190ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1191ed8eea03SBarry Smith       jrow++;
1192ed8eea03SBarry Smith      }
1193ed8eea03SBarry Smith     y[7*i]   += sum1;
1194ed8eea03SBarry Smith     y[7*i+1] += sum2;
1195ed8eea03SBarry Smith     y[7*i+2] += sum3;
1196ed8eea03SBarry Smith     y[7*i+3] += sum4;
1197ed8eea03SBarry Smith     y[7*i+4] += sum5;
1198ed8eea03SBarry Smith     y[7*i+5] += sum6;
1199ed8eea03SBarry Smith     y[7*i+6] += sum7;
1200ed8eea03SBarry Smith   }
1201ed8eea03SBarry Smith 
1202dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1203d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1204ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1205ed8eea03SBarry Smith   PetscFunctionReturn(0);
1206ed8eea03SBarry Smith }
1207ed8eea03SBarry Smith 
1208ed8eea03SBarry Smith #undef __FUNCT__
1209ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1210ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1211ed8eea03SBarry Smith {
1212ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1213ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1214d2840e09SBarry Smith   const PetscScalar *x,*v;
1215d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1216ed8eea03SBarry Smith   PetscErrorCode    ierr;
1217d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1218d2840e09SBarry Smith   PetscInt          n,i;
1219ed8eea03SBarry Smith 
1220ed8eea03SBarry Smith   PetscFunctionBegin;
1221ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1222d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1223ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1224ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1225ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1226ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1227ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1228ed8eea03SBarry Smith     alpha1 = x[7*i];
1229ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1230ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1231ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1232ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1233ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1234ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1235ed8eea03SBarry Smith     while (n-->0) {
1236ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1237ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1238ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1239ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1240ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1241ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1242ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1243ed8eea03SBarry Smith       idx++; v++;
1244ed8eea03SBarry Smith     }
1245ed8eea03SBarry Smith   }
1246dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1247d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1248ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1249ed8eea03SBarry Smith   PetscFunctionReturn(0);
1250ed8eea03SBarry Smith }
1251ed8eea03SBarry Smith 
1252ed8eea03SBarry Smith #undef __FUNCT__
125366ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1254dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
125566ed3db0SBarry Smith {
125666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
125766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1258d2840e09SBarry Smith   const PetscScalar *x,*v;
1259d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1260dfbe8321SBarry Smith   PetscErrorCode    ierr;
1261d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1262d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
126366ed3db0SBarry Smith 
126466ed3db0SBarry Smith   PetscFunctionBegin;
1265d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
12661ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
126766ed3db0SBarry Smith   idx  = a->j;
126866ed3db0SBarry Smith   v    = a->a;
126966ed3db0SBarry Smith   ii   = a->i;
127066ed3db0SBarry Smith 
127166ed3db0SBarry Smith   for (i=0; i<m; i++) {
127266ed3db0SBarry Smith     jrow = ii[i];
127366ed3db0SBarry Smith     n    = ii[i+1] - jrow;
127466ed3db0SBarry Smith     sum1  = 0.0;
127566ed3db0SBarry Smith     sum2  = 0.0;
127666ed3db0SBarry Smith     sum3  = 0.0;
127766ed3db0SBarry Smith     sum4  = 0.0;
127866ed3db0SBarry Smith     sum5  = 0.0;
127966ed3db0SBarry Smith     sum6  = 0.0;
128066ed3db0SBarry Smith     sum7  = 0.0;
128166ed3db0SBarry Smith     sum8  = 0.0;
1282b3c51c66SMatthew Knepley     nonzerorow += (n>0);
128366ed3db0SBarry Smith     for (j=0; j<n; j++) {
128466ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
128566ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
128666ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
128766ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
128866ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
128966ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
129066ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
129166ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
129266ed3db0SBarry Smith       jrow++;
129366ed3db0SBarry Smith      }
129466ed3db0SBarry Smith     y[8*i]   = sum1;
129566ed3db0SBarry Smith     y[8*i+1] = sum2;
129666ed3db0SBarry Smith     y[8*i+2] = sum3;
129766ed3db0SBarry Smith     y[8*i+3] = sum4;
129866ed3db0SBarry Smith     y[8*i+4] = sum5;
129966ed3db0SBarry Smith     y[8*i+5] = sum6;
130066ed3db0SBarry Smith     y[8*i+6] = sum7;
130166ed3db0SBarry Smith     y[8*i+7] = sum8;
130266ed3db0SBarry Smith   }
130366ed3db0SBarry Smith 
1304dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1305d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
13061ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130766ed3db0SBarry Smith   PetscFunctionReturn(0);
130866ed3db0SBarry Smith }
130966ed3db0SBarry Smith 
131066ed3db0SBarry Smith #undef __FUNCT__
131166ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1312dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
131366ed3db0SBarry Smith {
131466ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131566ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1316d2840e09SBarry Smith   const PetscScalar *x,*v;
1317d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1318dfbe8321SBarry Smith   PetscErrorCode    ierr;
1319d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1320d2840e09SBarry Smith   PetscInt          n,i;
132166ed3db0SBarry Smith 
132266ed3db0SBarry Smith   PetscFunctionBegin;
1323d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1324d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
13251ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1326bfec09a0SHong Zhang 
132766ed3db0SBarry Smith   for (i=0; i<m; i++) {
1328bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1329bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
133066ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
133166ed3db0SBarry Smith     alpha1 = x[8*i];
133266ed3db0SBarry Smith     alpha2 = x[8*i+1];
133366ed3db0SBarry Smith     alpha3 = x[8*i+2];
133466ed3db0SBarry Smith     alpha4 = x[8*i+3];
133566ed3db0SBarry Smith     alpha5 = x[8*i+4];
133666ed3db0SBarry Smith     alpha6 = x[8*i+5];
133766ed3db0SBarry Smith     alpha7 = x[8*i+6];
133866ed3db0SBarry Smith     alpha8 = x[8*i+7];
133966ed3db0SBarry Smith     while (n-->0) {
134066ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
134166ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
134266ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
134366ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
134466ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
134566ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
134666ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
134766ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
134866ed3db0SBarry Smith       idx++; v++;
134966ed3db0SBarry Smith     }
135066ed3db0SBarry Smith   }
1351dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1352d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
13531ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
135466ed3db0SBarry Smith   PetscFunctionReturn(0);
135566ed3db0SBarry Smith }
135666ed3db0SBarry Smith 
135766ed3db0SBarry Smith #undef __FUNCT__
135866ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1359dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
136066ed3db0SBarry Smith {
136166ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136266ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1363d2840e09SBarry Smith   const PetscScalar *x,*v;
1364d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1365dfbe8321SBarry Smith   PetscErrorCode    ierr;
1366d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1367b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
136866ed3db0SBarry Smith 
136966ed3db0SBarry Smith   PetscFunctionBegin;
137066ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1371d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
13721ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137366ed3db0SBarry Smith   idx  = a->j;
137466ed3db0SBarry Smith   v    = a->a;
137566ed3db0SBarry Smith   ii   = a->i;
137666ed3db0SBarry Smith 
137766ed3db0SBarry Smith   for (i=0; i<m; i++) {
137866ed3db0SBarry Smith     jrow = ii[i];
137966ed3db0SBarry Smith     n    = ii[i+1] - jrow;
138066ed3db0SBarry Smith     sum1  = 0.0;
138166ed3db0SBarry Smith     sum2  = 0.0;
138266ed3db0SBarry Smith     sum3  = 0.0;
138366ed3db0SBarry Smith     sum4  = 0.0;
138466ed3db0SBarry Smith     sum5  = 0.0;
138566ed3db0SBarry Smith     sum6  = 0.0;
138666ed3db0SBarry Smith     sum7  = 0.0;
138766ed3db0SBarry Smith     sum8  = 0.0;
138866ed3db0SBarry Smith     for (j=0; j<n; j++) {
138966ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
139066ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
139166ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
139266ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
139366ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
139466ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
139566ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
139666ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
139766ed3db0SBarry Smith       jrow++;
139866ed3db0SBarry Smith      }
139966ed3db0SBarry Smith     y[8*i]   += sum1;
140066ed3db0SBarry Smith     y[8*i+1] += sum2;
140166ed3db0SBarry Smith     y[8*i+2] += sum3;
140266ed3db0SBarry Smith     y[8*i+3] += sum4;
140366ed3db0SBarry Smith     y[8*i+4] += sum5;
140466ed3db0SBarry Smith     y[8*i+5] += sum6;
140566ed3db0SBarry Smith     y[8*i+6] += sum7;
140666ed3db0SBarry Smith     y[8*i+7] += sum8;
140766ed3db0SBarry Smith   }
140866ed3db0SBarry Smith 
1409dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1410d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
14111ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
141266ed3db0SBarry Smith   PetscFunctionReturn(0);
141366ed3db0SBarry Smith }
141466ed3db0SBarry Smith 
141566ed3db0SBarry Smith #undef __FUNCT__
141666ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1417dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
141866ed3db0SBarry Smith {
141966ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
142066ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1421d2840e09SBarry Smith   const PetscScalar *x,*v;
1422d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1423dfbe8321SBarry Smith   PetscErrorCode    ierr;
1424d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1425d2840e09SBarry Smith   PetscInt          n,i;
142666ed3db0SBarry Smith 
142766ed3db0SBarry Smith   PetscFunctionBegin;
142866ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1429d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
14301ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
143166ed3db0SBarry Smith   for (i=0; i<m; i++) {
1432bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1433bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
143466ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
143566ed3db0SBarry Smith     alpha1 = x[8*i];
143666ed3db0SBarry Smith     alpha2 = x[8*i+1];
143766ed3db0SBarry Smith     alpha3 = x[8*i+2];
143866ed3db0SBarry Smith     alpha4 = x[8*i+3];
143966ed3db0SBarry Smith     alpha5 = x[8*i+4];
144066ed3db0SBarry Smith     alpha6 = x[8*i+5];
144166ed3db0SBarry Smith     alpha7 = x[8*i+6];
144266ed3db0SBarry Smith     alpha8 = x[8*i+7];
144366ed3db0SBarry Smith     while (n-->0) {
144466ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
144566ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
144666ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
144766ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
144866ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
144966ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
145066ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
145166ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
145266ed3db0SBarry Smith       idx++; v++;
145366ed3db0SBarry Smith     }
145466ed3db0SBarry Smith   }
1455dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1456d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
14571ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14582f7816d4SBarry Smith   PetscFunctionReturn(0);
14592f7816d4SBarry Smith }
14602f7816d4SBarry Smith 
14612b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
14622b692628SMatthew Knepley #undef __FUNCT__
14632b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1464dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14652b692628SMatthew Knepley {
14662b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14672b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1468d2840e09SBarry Smith   const PetscScalar *x,*v;
1469d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1470dfbe8321SBarry Smith   PetscErrorCode    ierr;
1471d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1472d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14732b692628SMatthew Knepley 
14742b692628SMatthew Knepley   PetscFunctionBegin;
1475d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
14761ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14772b692628SMatthew Knepley   idx  = a->j;
14782b692628SMatthew Knepley   v    = a->a;
14792b692628SMatthew Knepley   ii   = a->i;
14802b692628SMatthew Knepley 
14812b692628SMatthew Knepley   for (i=0; i<m; i++) {
14822b692628SMatthew Knepley     jrow = ii[i];
14832b692628SMatthew Knepley     n    = ii[i+1] - jrow;
14842b692628SMatthew Knepley     sum1  = 0.0;
14852b692628SMatthew Knepley     sum2  = 0.0;
14862b692628SMatthew Knepley     sum3  = 0.0;
14872b692628SMatthew Knepley     sum4  = 0.0;
14882b692628SMatthew Knepley     sum5  = 0.0;
14892b692628SMatthew Knepley     sum6  = 0.0;
14902b692628SMatthew Knepley     sum7  = 0.0;
14912b692628SMatthew Knepley     sum8  = 0.0;
14922b692628SMatthew Knepley     sum9  = 0.0;
1493b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14942b692628SMatthew Knepley     for (j=0; j<n; j++) {
14952b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14962b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14972b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14982b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14992b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15002b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15012b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15022b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15032b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15042b692628SMatthew Knepley       jrow++;
15052b692628SMatthew Knepley      }
15062b692628SMatthew Knepley     y[9*i]   = sum1;
15072b692628SMatthew Knepley     y[9*i+1] = sum2;
15082b692628SMatthew Knepley     y[9*i+2] = sum3;
15092b692628SMatthew Knepley     y[9*i+3] = sum4;
15102b692628SMatthew Knepley     y[9*i+4] = sum5;
15112b692628SMatthew Knepley     y[9*i+5] = sum6;
15122b692628SMatthew Knepley     y[9*i+6] = sum7;
15132b692628SMatthew Knepley     y[9*i+7] = sum8;
15142b692628SMatthew Knepley     y[9*i+8] = sum9;
15152b692628SMatthew Knepley   }
15162b692628SMatthew Knepley 
1517dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1518d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
15191ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15202b692628SMatthew Knepley   PetscFunctionReturn(0);
15212b692628SMatthew Knepley }
15222b692628SMatthew Knepley 
1523b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1524b9cda22cSBarry Smith 
15252b692628SMatthew Knepley #undef __FUNCT__
15262b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1527dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
15282b692628SMatthew Knepley {
15292b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15302b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1531d2840e09SBarry Smith   const PetscScalar *x,*v;
1532d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1533dfbe8321SBarry Smith   PetscErrorCode    ierr;
1534d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1535d2840e09SBarry Smith   PetscInt          n,i;
15362b692628SMatthew Knepley 
15372b692628SMatthew Knepley   PetscFunctionBegin;
1538d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1539d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
15401ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15412b692628SMatthew Knepley 
15422b692628SMatthew Knepley   for (i=0; i<m; i++) {
15432b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15442b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15452b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15462b692628SMatthew Knepley     alpha1 = x[9*i];
15472b692628SMatthew Knepley     alpha2 = x[9*i+1];
15482b692628SMatthew Knepley     alpha3 = x[9*i+2];
15492b692628SMatthew Knepley     alpha4 = x[9*i+3];
15502b692628SMatthew Knepley     alpha5 = x[9*i+4];
15512b692628SMatthew Knepley     alpha6 = x[9*i+5];
15522b692628SMatthew Knepley     alpha7 = x[9*i+6];
15532b692628SMatthew Knepley     alpha8 = x[9*i+7];
15542f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15552b692628SMatthew Knepley     while (n-->0) {
15562b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15572b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15582b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15592b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15602b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15612b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15622b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15632b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15642b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15652b692628SMatthew Knepley       idx++; v++;
15662b692628SMatthew Knepley     }
15672b692628SMatthew Knepley   }
1568dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1569d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
15701ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15712b692628SMatthew Knepley   PetscFunctionReturn(0);
15722b692628SMatthew Knepley }
15732b692628SMatthew Knepley 
15742b692628SMatthew Knepley #undef __FUNCT__
15752b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1576dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15772b692628SMatthew Knepley {
15782b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15792b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1580d2840e09SBarry Smith   const PetscScalar *x,*v;
1581d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1582dfbe8321SBarry Smith   PetscErrorCode    ierr;
1583d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1584b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15852b692628SMatthew Knepley 
15862b692628SMatthew Knepley   PetscFunctionBegin;
15872b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1588d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
15891ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15902b692628SMatthew Knepley   idx  = a->j;
15912b692628SMatthew Knepley   v    = a->a;
15922b692628SMatthew Knepley   ii   = a->i;
15932b692628SMatthew Knepley 
15942b692628SMatthew Knepley   for (i=0; i<m; i++) {
15952b692628SMatthew Knepley     jrow = ii[i];
15962b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15972b692628SMatthew Knepley     sum1  = 0.0;
15982b692628SMatthew Knepley     sum2  = 0.0;
15992b692628SMatthew Knepley     sum3  = 0.0;
16002b692628SMatthew Knepley     sum4  = 0.0;
16012b692628SMatthew Knepley     sum5  = 0.0;
16022b692628SMatthew Knepley     sum6  = 0.0;
16032b692628SMatthew Knepley     sum7  = 0.0;
16042b692628SMatthew Knepley     sum8  = 0.0;
16052b692628SMatthew Knepley     sum9  = 0.0;
16062b692628SMatthew Knepley     for (j=0; j<n; j++) {
16072b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
16082b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
16092b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
16102b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
16112b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
16122b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
16132b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
16142b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
16152b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
16162b692628SMatthew Knepley       jrow++;
16172b692628SMatthew Knepley      }
16182b692628SMatthew Knepley     y[9*i]   += sum1;
16192b692628SMatthew Knepley     y[9*i+1] += sum2;
16202b692628SMatthew Knepley     y[9*i+2] += sum3;
16212b692628SMatthew Knepley     y[9*i+3] += sum4;
16222b692628SMatthew Knepley     y[9*i+4] += sum5;
16232b692628SMatthew Knepley     y[9*i+5] += sum6;
16242b692628SMatthew Knepley     y[9*i+6] += sum7;
16252b692628SMatthew Knepley     y[9*i+7] += sum8;
16262b692628SMatthew Knepley     y[9*i+8] += sum9;
16272b692628SMatthew Knepley   }
16282b692628SMatthew Knepley 
1629efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1630d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
16311ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16322b692628SMatthew Knepley   PetscFunctionReturn(0);
16332b692628SMatthew Knepley }
16342b692628SMatthew Knepley 
16352b692628SMatthew Knepley #undef __FUNCT__
16362b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1637dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
16382b692628SMatthew Knepley {
16392b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
16402b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1641d2840e09SBarry Smith   const PetscScalar *x,*v;
1642d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1643dfbe8321SBarry Smith   PetscErrorCode    ierr;
1644d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1645d2840e09SBarry Smith   PetscInt          n,i;
16462b692628SMatthew Knepley 
16472b692628SMatthew Knepley   PetscFunctionBegin;
16482b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1649d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
16501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16512b692628SMatthew Knepley   for (i=0; i<m; i++) {
16522b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
16532b692628SMatthew Knepley     v      = a->a + a->i[i] ;
16542b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
16552b692628SMatthew Knepley     alpha1 = x[9*i];
16562b692628SMatthew Knepley     alpha2 = x[9*i+1];
16572b692628SMatthew Knepley     alpha3 = x[9*i+2];
16582b692628SMatthew Knepley     alpha4 = x[9*i+3];
16592b692628SMatthew Knepley     alpha5 = x[9*i+4];
16602b692628SMatthew Knepley     alpha6 = x[9*i+5];
16612b692628SMatthew Knepley     alpha7 = x[9*i+6];
16622b692628SMatthew Knepley     alpha8 = x[9*i+7];
16632b692628SMatthew Knepley     alpha9 = x[9*i+8];
16642b692628SMatthew Knepley     while (n-->0) {
16652b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16662b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16672b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16682b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16692b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16702b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16712b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16722b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16732b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16742b692628SMatthew Knepley       idx++; v++;
16752b692628SMatthew Knepley     }
16762b692628SMatthew Knepley   }
1677dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1678d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
16791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16802b692628SMatthew Knepley   PetscFunctionReturn(0);
16812b692628SMatthew Knepley }
1682b9cda22cSBarry Smith #undef __FUNCT__
1683b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1684b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1685b9cda22cSBarry Smith {
1686b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1687b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1688d2840e09SBarry Smith   const PetscScalar *x,*v;
1689d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690b9cda22cSBarry Smith   PetscErrorCode    ierr;
1691d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1692d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1693b9cda22cSBarry Smith 
1694b9cda22cSBarry Smith   PetscFunctionBegin;
1695d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1696b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1697b9cda22cSBarry Smith   idx  = a->j;
1698b9cda22cSBarry Smith   v    = a->a;
1699b9cda22cSBarry Smith   ii   = a->i;
1700b9cda22cSBarry Smith 
1701b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1702b9cda22cSBarry Smith     jrow = ii[i];
1703b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1704b9cda22cSBarry Smith     sum1  = 0.0;
1705b9cda22cSBarry Smith     sum2  = 0.0;
1706b9cda22cSBarry Smith     sum3  = 0.0;
1707b9cda22cSBarry Smith     sum4  = 0.0;
1708b9cda22cSBarry Smith     sum5  = 0.0;
1709b9cda22cSBarry Smith     sum6  = 0.0;
1710b9cda22cSBarry Smith     sum7  = 0.0;
1711b9cda22cSBarry Smith     sum8  = 0.0;
1712b9cda22cSBarry Smith     sum9  = 0.0;
1713b9cda22cSBarry Smith     sum10 = 0.0;
1714b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1715b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1716b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1717b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1718b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1719b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1720b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1721b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1722b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1723b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1724b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1725b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1726b9cda22cSBarry Smith       jrow++;
1727b9cda22cSBarry Smith      }
1728b9cda22cSBarry Smith     y[10*i]   = sum1;
1729b9cda22cSBarry Smith     y[10*i+1] = sum2;
1730b9cda22cSBarry Smith     y[10*i+2] = sum3;
1731b9cda22cSBarry Smith     y[10*i+3] = sum4;
1732b9cda22cSBarry Smith     y[10*i+4] = sum5;
1733b9cda22cSBarry Smith     y[10*i+5] = sum6;
1734b9cda22cSBarry Smith     y[10*i+6] = sum7;
1735b9cda22cSBarry Smith     y[10*i+7] = sum8;
1736b9cda22cSBarry Smith     y[10*i+8] = sum9;
1737b9cda22cSBarry Smith     y[10*i+9] = sum10;
1738b9cda22cSBarry Smith   }
1739b9cda22cSBarry Smith 
1740dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1741d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1742b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1743b9cda22cSBarry Smith   PetscFunctionReturn(0);
1744b9cda22cSBarry Smith }
1745b9cda22cSBarry Smith 
1746b9cda22cSBarry Smith #undef __FUNCT__
1747dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1748b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1749b9cda22cSBarry Smith {
1750b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1751b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1752d2840e09SBarry Smith   const PetscScalar *x,*v;
1753d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1754b9cda22cSBarry Smith   PetscErrorCode    ierr;
1755d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1756b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1757b9cda22cSBarry Smith 
1758b9cda22cSBarry Smith   PetscFunctionBegin;
1759b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1760d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1761b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1762b9cda22cSBarry Smith   idx  = a->j;
1763b9cda22cSBarry Smith   v    = a->a;
1764b9cda22cSBarry Smith   ii   = a->i;
1765b9cda22cSBarry Smith 
1766b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1767b9cda22cSBarry Smith     jrow = ii[i];
1768b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1769b9cda22cSBarry Smith     sum1  = 0.0;
1770b9cda22cSBarry Smith     sum2  = 0.0;
1771b9cda22cSBarry Smith     sum3  = 0.0;
1772b9cda22cSBarry Smith     sum4  = 0.0;
1773b9cda22cSBarry Smith     sum5  = 0.0;
1774b9cda22cSBarry Smith     sum6  = 0.0;
1775b9cda22cSBarry Smith     sum7  = 0.0;
1776b9cda22cSBarry Smith     sum8  = 0.0;
1777b9cda22cSBarry Smith     sum9  = 0.0;
1778b9cda22cSBarry Smith     sum10 = 0.0;
1779b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1780b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1781b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1782b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1783b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1784b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1785b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1786b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1787b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1788b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1789b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1790b9cda22cSBarry Smith       jrow++;
1791b9cda22cSBarry Smith      }
1792b9cda22cSBarry Smith     y[10*i]   += sum1;
1793b9cda22cSBarry Smith     y[10*i+1] += sum2;
1794b9cda22cSBarry Smith     y[10*i+2] += sum3;
1795b9cda22cSBarry Smith     y[10*i+3] += sum4;
1796b9cda22cSBarry Smith     y[10*i+4] += sum5;
1797b9cda22cSBarry Smith     y[10*i+5] += sum6;
1798b9cda22cSBarry Smith     y[10*i+6] += sum7;
1799b9cda22cSBarry Smith     y[10*i+7] += sum8;
1800b9cda22cSBarry Smith     y[10*i+8] += sum9;
1801b9cda22cSBarry Smith     y[10*i+9] += sum10;
1802b9cda22cSBarry Smith   }
1803b9cda22cSBarry Smith 
1804dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1805d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1806b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1807b9cda22cSBarry Smith   PetscFunctionReturn(0);
1808b9cda22cSBarry Smith }
1809b9cda22cSBarry Smith 
1810b9cda22cSBarry Smith #undef __FUNCT__
1811b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1812b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1813b9cda22cSBarry Smith {
1814b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1815b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1816d2840e09SBarry Smith   const PetscScalar *x,*v;
1817d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1818b9cda22cSBarry Smith   PetscErrorCode    ierr;
1819d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1820d2840e09SBarry Smith   PetscInt          n,i;
1821b9cda22cSBarry Smith 
1822b9cda22cSBarry Smith   PetscFunctionBegin;
1823d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1824d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1825b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1826b9cda22cSBarry Smith 
1827b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1828b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1829b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1830b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1831e29fdc22SBarry Smith     alpha1 = x[10*i];
1832e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1833e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1834e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1835e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1836e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1837e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1838e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1839e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1840b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1841b9cda22cSBarry Smith     while (n-->0) {
1842e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1843e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1844e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1845e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1846e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1847e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1848e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1849e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1850e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1851b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1852b9cda22cSBarry Smith       idx++; v++;
1853b9cda22cSBarry Smith     }
1854b9cda22cSBarry Smith   }
1855dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1856d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1857b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1858b9cda22cSBarry Smith   PetscFunctionReturn(0);
1859b9cda22cSBarry Smith }
1860b9cda22cSBarry Smith 
1861b9cda22cSBarry Smith #undef __FUNCT__
1862b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1863b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1864b9cda22cSBarry Smith {
1865b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1866b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1867d2840e09SBarry Smith   const PetscScalar *x,*v;
1868d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1869b9cda22cSBarry Smith   PetscErrorCode    ierr;
1870d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1871d2840e09SBarry Smith   PetscInt          n,i;
1872b9cda22cSBarry Smith 
1873b9cda22cSBarry Smith   PetscFunctionBegin;
1874b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1875d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1876b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1877b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1878b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1879b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1880b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1881b9cda22cSBarry Smith     alpha1 = x[10*i];
1882b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1883b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1884b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1885b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1886b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1887b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1888b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1889b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1890b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1891b9cda22cSBarry Smith     while (n-->0) {
1892b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1893b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1894b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1895b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1896b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1897b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1898b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1899b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1900b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1901b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1902b9cda22cSBarry Smith       idx++; v++;
1903b9cda22cSBarry Smith     }
1904b9cda22cSBarry Smith   }
1905dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1906d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1907b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1908b9cda22cSBarry Smith   PetscFunctionReturn(0);
1909b9cda22cSBarry Smith }
1910b9cda22cSBarry Smith 
19112b692628SMatthew Knepley 
19122f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
19132f7816d4SBarry Smith #undef __FUNCT__
1914dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11"
1915dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1916dbdd7285SBarry Smith {
1917dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1918dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1919d2840e09SBarry Smith   const PetscScalar *x,*v;
1920d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1921dbdd7285SBarry Smith   PetscErrorCode    ierr;
1922d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1923d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1924dbdd7285SBarry Smith 
1925dbdd7285SBarry Smith   PetscFunctionBegin;
1926d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1927dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1928dbdd7285SBarry Smith   idx  = a->j;
1929dbdd7285SBarry Smith   v    = a->a;
1930dbdd7285SBarry Smith   ii   = a->i;
1931dbdd7285SBarry Smith 
1932dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1933dbdd7285SBarry Smith     jrow = ii[i];
1934dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
1935dbdd7285SBarry Smith     sum1  = 0.0;
1936dbdd7285SBarry Smith     sum2  = 0.0;
1937dbdd7285SBarry Smith     sum3  = 0.0;
1938dbdd7285SBarry Smith     sum4  = 0.0;
1939dbdd7285SBarry Smith     sum5  = 0.0;
1940dbdd7285SBarry Smith     sum6  = 0.0;
1941dbdd7285SBarry Smith     sum7  = 0.0;
1942dbdd7285SBarry Smith     sum8  = 0.0;
1943dbdd7285SBarry Smith     sum9  = 0.0;
1944dbdd7285SBarry Smith     sum10 = 0.0;
1945dbdd7285SBarry Smith     sum11 = 0.0;
1946dbdd7285SBarry Smith     nonzerorow += (n>0);
1947dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1948dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1949dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1950dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1951dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1952dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1953dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1954dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1955dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1956dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1957dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1958dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1959dbdd7285SBarry Smith       jrow++;
1960dbdd7285SBarry Smith      }
1961dbdd7285SBarry Smith     y[11*i]   = sum1;
1962dbdd7285SBarry Smith     y[11*i+1] = sum2;
1963dbdd7285SBarry Smith     y[11*i+2] = sum3;
1964dbdd7285SBarry Smith     y[11*i+3] = sum4;
1965dbdd7285SBarry Smith     y[11*i+4] = sum5;
1966dbdd7285SBarry Smith     y[11*i+5] = sum6;
1967dbdd7285SBarry Smith     y[11*i+6] = sum7;
1968dbdd7285SBarry Smith     y[11*i+7] = sum8;
1969dbdd7285SBarry Smith     y[11*i+8] = sum9;
1970dbdd7285SBarry Smith     y[11*i+9] = sum10;
1971dbdd7285SBarry Smith     y[11*i+10] = sum11;
1972dbdd7285SBarry Smith   }
1973dbdd7285SBarry Smith 
1974dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1975d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1976dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1977dbdd7285SBarry Smith   PetscFunctionReturn(0);
1978dbdd7285SBarry Smith }
1979dbdd7285SBarry Smith 
1980dbdd7285SBarry Smith #undef __FUNCT__
1981dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1982dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1983dbdd7285SBarry Smith {
1984dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1985dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1986d2840e09SBarry Smith   const PetscScalar *x,*v;
1987d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1988dbdd7285SBarry Smith   PetscErrorCode    ierr;
1989d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1990dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1991dbdd7285SBarry Smith 
1992dbdd7285SBarry Smith   PetscFunctionBegin;
1993dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1994d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
1995dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1996dbdd7285SBarry Smith   idx  = a->j;
1997dbdd7285SBarry Smith   v    = a->a;
1998dbdd7285SBarry Smith   ii   = a->i;
1999dbdd7285SBarry Smith 
2000dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2001dbdd7285SBarry Smith     jrow = ii[i];
2002dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
2003dbdd7285SBarry Smith     sum1  = 0.0;
2004dbdd7285SBarry Smith     sum2  = 0.0;
2005dbdd7285SBarry Smith     sum3  = 0.0;
2006dbdd7285SBarry Smith     sum4  = 0.0;
2007dbdd7285SBarry Smith     sum5  = 0.0;
2008dbdd7285SBarry Smith     sum6  = 0.0;
2009dbdd7285SBarry Smith     sum7  = 0.0;
2010dbdd7285SBarry Smith     sum8  = 0.0;
2011dbdd7285SBarry Smith     sum9  = 0.0;
2012dbdd7285SBarry Smith     sum10 = 0.0;
2013dbdd7285SBarry Smith     sum11 = 0.0;
2014dbdd7285SBarry Smith     for (j=0; j<n; j++) {
2015dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
2016dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
2017dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
2018dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
2019dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
2020dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
2021dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
2022dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
2023dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
2024dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
2025dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
2026dbdd7285SBarry Smith       jrow++;
2027dbdd7285SBarry Smith      }
2028dbdd7285SBarry Smith     y[11*i]   += sum1;
2029dbdd7285SBarry Smith     y[11*i+1] += sum2;
2030dbdd7285SBarry Smith     y[11*i+2] += sum3;
2031dbdd7285SBarry Smith     y[11*i+3] += sum4;
2032dbdd7285SBarry Smith     y[11*i+4] += sum5;
2033dbdd7285SBarry Smith     y[11*i+5] += sum6;
2034dbdd7285SBarry Smith     y[11*i+6] += sum7;
2035dbdd7285SBarry Smith     y[11*i+7] += sum8;
2036dbdd7285SBarry Smith     y[11*i+8] += sum9;
2037dbdd7285SBarry Smith     y[11*i+9] += sum10;
2038dbdd7285SBarry Smith     y[11*i+10] += sum11;
2039dbdd7285SBarry Smith   }
2040dbdd7285SBarry Smith 
2041dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2042d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2043dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2044dbdd7285SBarry Smith   PetscFunctionReturn(0);
2045dbdd7285SBarry Smith }
2046dbdd7285SBarry Smith 
2047dbdd7285SBarry Smith #undef __FUNCT__
2048dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2049dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2050dbdd7285SBarry Smith {
2051dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2052dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2053d2840e09SBarry Smith   const PetscScalar *x,*v;
2054d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2055dbdd7285SBarry Smith   PetscErrorCode    ierr;
2056d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2057d2840e09SBarry Smith   PetscInt          n,i;
2058dbdd7285SBarry Smith 
2059dbdd7285SBarry Smith   PetscFunctionBegin;
2060d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2061d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2062dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2063dbdd7285SBarry Smith 
2064dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2065dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2066dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2067dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2068dbdd7285SBarry Smith     alpha1 = x[11*i];
2069dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2070dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2071dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2072dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2073dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2074dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2075dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2076dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2077dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2078dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2079dbdd7285SBarry Smith     while (n-->0) {
2080dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2081dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2082dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2083dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2084dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2085dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2086dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2087dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2088dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2089dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2090dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2091dbdd7285SBarry Smith       idx++; v++;
2092dbdd7285SBarry Smith     }
2093dbdd7285SBarry Smith   }
2094dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2095d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2096dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2097dbdd7285SBarry Smith   PetscFunctionReturn(0);
2098dbdd7285SBarry Smith }
2099dbdd7285SBarry Smith 
2100dbdd7285SBarry Smith #undef __FUNCT__
2101dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2102dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2103dbdd7285SBarry Smith {
2104dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2105dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2106d2840e09SBarry Smith   const PetscScalar *x,*v;
2107d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2108dbdd7285SBarry Smith   PetscErrorCode    ierr;
2109d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2110d2840e09SBarry Smith   PetscInt          n,i;
2111dbdd7285SBarry Smith 
2112dbdd7285SBarry Smith   PetscFunctionBegin;
2113dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2114d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2115dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2116dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2117dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2118dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2119dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2120dbdd7285SBarry Smith     alpha1 = x[11*i];
2121dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2122dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2123dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2124dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2125dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2126dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2127dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2128dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2129dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2130dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2131dbdd7285SBarry Smith     while (n-->0) {
2132dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2133dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2134dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2135dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2136dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2137dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2138dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2139dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2140dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2141dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2142dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2143dbdd7285SBarry Smith       idx++; v++;
2144dbdd7285SBarry Smith     }
2145dbdd7285SBarry Smith   }
2146dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2147d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2148dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2149dbdd7285SBarry Smith   PetscFunctionReturn(0);
2150dbdd7285SBarry Smith }
2151dbdd7285SBarry Smith 
2152dbdd7285SBarry Smith 
2153dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2154dbdd7285SBarry Smith #undef __FUNCT__
21552f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
2156dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21572f7816d4SBarry Smith {
21582f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21592f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2160d2840e09SBarry Smith   const PetscScalar *x,*v;
2161d2840e09SBarry Smith   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21622f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2163dfbe8321SBarry Smith   PetscErrorCode     ierr;
2164d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2165d2840e09SBarry Smith   PetscInt           nonzerorow=0,n,i,jrow,j;
21662f7816d4SBarry Smith 
21672f7816d4SBarry Smith   PetscFunctionBegin;
2168d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
21691ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
21702f7816d4SBarry Smith   idx  = a->j;
21712f7816d4SBarry Smith   v    = a->a;
21722f7816d4SBarry Smith   ii   = a->i;
21732f7816d4SBarry Smith 
21742f7816d4SBarry Smith   for (i=0; i<m; i++) {
21752f7816d4SBarry Smith     jrow = ii[i];
21762f7816d4SBarry Smith     n    = ii[i+1] - jrow;
21772f7816d4SBarry Smith     sum1  = 0.0;
21782f7816d4SBarry Smith     sum2  = 0.0;
21792f7816d4SBarry Smith     sum3  = 0.0;
21802f7816d4SBarry Smith     sum4  = 0.0;
21812f7816d4SBarry Smith     sum5  = 0.0;
21822f7816d4SBarry Smith     sum6  = 0.0;
21832f7816d4SBarry Smith     sum7  = 0.0;
21842f7816d4SBarry Smith     sum8  = 0.0;
21852f7816d4SBarry Smith     sum9  = 0.0;
21862f7816d4SBarry Smith     sum10 = 0.0;
21872f7816d4SBarry Smith     sum11 = 0.0;
21882f7816d4SBarry Smith     sum12 = 0.0;
21892f7816d4SBarry Smith     sum13 = 0.0;
21902f7816d4SBarry Smith     sum14 = 0.0;
21912f7816d4SBarry Smith     sum15 = 0.0;
21922f7816d4SBarry Smith     sum16 = 0.0;
2193b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21942f7816d4SBarry Smith     for (j=0; j<n; j++) {
21952f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21962f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21972f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21982f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21992f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22002f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22012f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22022f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22032f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22042f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22052f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22062f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22072f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22082f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22092f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22102f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22112f7816d4SBarry Smith       jrow++;
22122f7816d4SBarry Smith      }
22132f7816d4SBarry Smith     y[16*i]    = sum1;
22142f7816d4SBarry Smith     y[16*i+1]  = sum2;
22152f7816d4SBarry Smith     y[16*i+2]  = sum3;
22162f7816d4SBarry Smith     y[16*i+3]  = sum4;
22172f7816d4SBarry Smith     y[16*i+4]  = sum5;
22182f7816d4SBarry Smith     y[16*i+5]  = sum6;
22192f7816d4SBarry Smith     y[16*i+6]  = sum7;
22202f7816d4SBarry Smith     y[16*i+7]  = sum8;
22212f7816d4SBarry Smith     y[16*i+8]  = sum9;
22222f7816d4SBarry Smith     y[16*i+9]  = sum10;
22232f7816d4SBarry Smith     y[16*i+10] = sum11;
22242f7816d4SBarry Smith     y[16*i+11] = sum12;
22252f7816d4SBarry Smith     y[16*i+12] = sum13;
22262f7816d4SBarry Smith     y[16*i+13] = sum14;
22272f7816d4SBarry Smith     y[16*i+14] = sum15;
22282f7816d4SBarry Smith     y[16*i+15] = sum16;
22292f7816d4SBarry Smith   }
22302f7816d4SBarry Smith 
2231dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2232d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
22331ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22342f7816d4SBarry Smith   PetscFunctionReturn(0);
22352f7816d4SBarry Smith }
22362f7816d4SBarry Smith 
22372f7816d4SBarry Smith #undef __FUNCT__
22382f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2239dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
22402f7816d4SBarry Smith {
22412f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22422f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2243d2840e09SBarry Smith   const PetscScalar *x,*v;
2244d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22452f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2246dfbe8321SBarry Smith   PetscErrorCode    ierr;
2247d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2248d2840e09SBarry Smith   PetscInt          n,i;
22492f7816d4SBarry Smith 
22502f7816d4SBarry Smith   PetscFunctionBegin;
2251d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2252d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
22531ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2254bfec09a0SHong Zhang 
22552f7816d4SBarry Smith   for (i=0; i<m; i++) {
2256bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2257bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
22582f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
22592f7816d4SBarry Smith     alpha1  = x[16*i];
22602f7816d4SBarry Smith     alpha2  = x[16*i+1];
22612f7816d4SBarry Smith     alpha3  = x[16*i+2];
22622f7816d4SBarry Smith     alpha4  = x[16*i+3];
22632f7816d4SBarry Smith     alpha5  = x[16*i+4];
22642f7816d4SBarry Smith     alpha6  = x[16*i+5];
22652f7816d4SBarry Smith     alpha7  = x[16*i+6];
22662f7816d4SBarry Smith     alpha8  = x[16*i+7];
22672f7816d4SBarry Smith     alpha9  = x[16*i+8];
22682f7816d4SBarry Smith     alpha10 = x[16*i+9];
22692f7816d4SBarry Smith     alpha11 = x[16*i+10];
22702f7816d4SBarry Smith     alpha12 = x[16*i+11];
22712f7816d4SBarry Smith     alpha13 = x[16*i+12];
22722f7816d4SBarry Smith     alpha14 = x[16*i+13];
22732f7816d4SBarry Smith     alpha15 = x[16*i+14];
22742f7816d4SBarry Smith     alpha16 = x[16*i+15];
22752f7816d4SBarry Smith     while (n-->0) {
22762f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22772f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22782f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22792f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22802f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22812f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22822f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22832f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22842f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22852f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22862f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22872f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22882f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22892f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22902f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22912f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22922f7816d4SBarry Smith       idx++; v++;
22932f7816d4SBarry Smith     }
22942f7816d4SBarry Smith   }
2295dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2296d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
22971ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22982f7816d4SBarry Smith   PetscFunctionReturn(0);
22992f7816d4SBarry Smith }
23002f7816d4SBarry Smith 
23012f7816d4SBarry Smith #undef __FUNCT__
23022f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2303dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23042f7816d4SBarry Smith {
23052f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23062f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307d2840e09SBarry Smith   const PetscScalar *x,*v;
2308d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
23092f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2310dfbe8321SBarry Smith   PetscErrorCode    ierr;
2311d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2312b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
23132f7816d4SBarry Smith 
23142f7816d4SBarry Smith   PetscFunctionBegin;
23152f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2316d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
23171ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23182f7816d4SBarry Smith   idx  = a->j;
23192f7816d4SBarry Smith   v    = a->a;
23202f7816d4SBarry Smith   ii   = a->i;
23212f7816d4SBarry Smith 
23222f7816d4SBarry Smith   for (i=0; i<m; i++) {
23232f7816d4SBarry Smith     jrow = ii[i];
23242f7816d4SBarry Smith     n    = ii[i+1] - jrow;
23252f7816d4SBarry Smith     sum1  = 0.0;
23262f7816d4SBarry Smith     sum2  = 0.0;
23272f7816d4SBarry Smith     sum3  = 0.0;
23282f7816d4SBarry Smith     sum4  = 0.0;
23292f7816d4SBarry Smith     sum5  = 0.0;
23302f7816d4SBarry Smith     sum6  = 0.0;
23312f7816d4SBarry Smith     sum7  = 0.0;
23322f7816d4SBarry Smith     sum8  = 0.0;
23332f7816d4SBarry Smith     sum9  = 0.0;
23342f7816d4SBarry Smith     sum10 = 0.0;
23352f7816d4SBarry Smith     sum11 = 0.0;
23362f7816d4SBarry Smith     sum12 = 0.0;
23372f7816d4SBarry Smith     sum13 = 0.0;
23382f7816d4SBarry Smith     sum14 = 0.0;
23392f7816d4SBarry Smith     sum15 = 0.0;
23402f7816d4SBarry Smith     sum16 = 0.0;
23412f7816d4SBarry Smith     for (j=0; j<n; j++) {
23422f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
23432f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
23442f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
23452f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
23462f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
23472f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
23482f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
23492f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
23502f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
23512f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
23522f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
23532f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
23542f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
23552f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
23562f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
23572f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
23582f7816d4SBarry Smith       jrow++;
23592f7816d4SBarry Smith      }
23602f7816d4SBarry Smith     y[16*i]    += sum1;
23612f7816d4SBarry Smith     y[16*i+1]  += sum2;
23622f7816d4SBarry Smith     y[16*i+2]  += sum3;
23632f7816d4SBarry Smith     y[16*i+3]  += sum4;
23642f7816d4SBarry Smith     y[16*i+4]  += sum5;
23652f7816d4SBarry Smith     y[16*i+5]  += sum6;
23662f7816d4SBarry Smith     y[16*i+6]  += sum7;
23672f7816d4SBarry Smith     y[16*i+7]  += sum8;
23682f7816d4SBarry Smith     y[16*i+8]  += sum9;
23692f7816d4SBarry Smith     y[16*i+9]  += sum10;
23702f7816d4SBarry Smith     y[16*i+10] += sum11;
23712f7816d4SBarry Smith     y[16*i+11] += sum12;
23722f7816d4SBarry Smith     y[16*i+12] += sum13;
23732f7816d4SBarry Smith     y[16*i+13] += sum14;
23742f7816d4SBarry Smith     y[16*i+14] += sum15;
23752f7816d4SBarry Smith     y[16*i+15] += sum16;
23762f7816d4SBarry Smith   }
23772f7816d4SBarry Smith 
2378dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2379d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
23801ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23812f7816d4SBarry Smith   PetscFunctionReturn(0);
23822f7816d4SBarry Smith }
23832f7816d4SBarry Smith 
23842f7816d4SBarry Smith #undef __FUNCT__
23852f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2386dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23872f7816d4SBarry Smith {
23882f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23892f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2390d2840e09SBarry Smith   const PetscScalar *x,*v;
2391d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23922f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2393dfbe8321SBarry Smith   PetscErrorCode    ierr;
2394d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2395d2840e09SBarry Smith   PetscInt          n,i;
23962f7816d4SBarry Smith 
23972f7816d4SBarry Smith   PetscFunctionBegin;
23982f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2399d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
24001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
24012f7816d4SBarry Smith   for (i=0; i<m; i++) {
2402bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2403bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
24042f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
24052f7816d4SBarry Smith     alpha1 = x[16*i];
24062f7816d4SBarry Smith     alpha2 = x[16*i+1];
24072f7816d4SBarry Smith     alpha3 = x[16*i+2];
24082f7816d4SBarry Smith     alpha4 = x[16*i+3];
24092f7816d4SBarry Smith     alpha5 = x[16*i+4];
24102f7816d4SBarry Smith     alpha6 = x[16*i+5];
24112f7816d4SBarry Smith     alpha7 = x[16*i+6];
24122f7816d4SBarry Smith     alpha8 = x[16*i+7];
24132f7816d4SBarry Smith     alpha9  = x[16*i+8];
24142f7816d4SBarry Smith     alpha10 = x[16*i+9];
24152f7816d4SBarry Smith     alpha11 = x[16*i+10];
24162f7816d4SBarry Smith     alpha12 = x[16*i+11];
24172f7816d4SBarry Smith     alpha13 = x[16*i+12];
24182f7816d4SBarry Smith     alpha14 = x[16*i+13];
24192f7816d4SBarry Smith     alpha15 = x[16*i+14];
24202f7816d4SBarry Smith     alpha16 = x[16*i+15];
24212f7816d4SBarry Smith     while (n-->0) {
24222f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
24232f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
24242f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
24252f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
24262f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
24272f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
24282f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
24292f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
24302f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
24312f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
24322f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
24332f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
24342f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
24352f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
24362f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
24372f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
24382f7816d4SBarry Smith       idx++; v++;
24392f7816d4SBarry Smith     }
24402f7816d4SBarry Smith   }
2441dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2442d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
24431ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
244466ed3db0SBarry Smith   PetscFunctionReturn(0);
244566ed3db0SBarry Smith }
244666ed3db0SBarry Smith 
2447ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2448ed1c418cSBarry Smith #undef __FUNCT__
2449ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2450ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451ed1c418cSBarry Smith {
2452ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2453ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2454d2840e09SBarry Smith   const PetscScalar *x,*v;
2455d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2456ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2457ed1c418cSBarry Smith   PetscErrorCode    ierr;
2458d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2459d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2460ed1c418cSBarry Smith 
2461ed1c418cSBarry Smith   PetscFunctionBegin;
2462d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2463ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2464ed1c418cSBarry Smith   idx  = a->j;
2465ed1c418cSBarry Smith   v    = a->a;
2466ed1c418cSBarry Smith   ii   = a->i;
2467ed1c418cSBarry Smith 
2468ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2469ed1c418cSBarry Smith     jrow = ii[i];
2470ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2471ed1c418cSBarry Smith     sum1  = 0.0;
2472ed1c418cSBarry Smith     sum2  = 0.0;
2473ed1c418cSBarry Smith     sum3  = 0.0;
2474ed1c418cSBarry Smith     sum4  = 0.0;
2475ed1c418cSBarry Smith     sum5  = 0.0;
2476ed1c418cSBarry Smith     sum6  = 0.0;
2477ed1c418cSBarry Smith     sum7  = 0.0;
2478ed1c418cSBarry Smith     sum8  = 0.0;
2479ed1c418cSBarry Smith     sum9  = 0.0;
2480ed1c418cSBarry Smith     sum10 = 0.0;
2481ed1c418cSBarry Smith     sum11 = 0.0;
2482ed1c418cSBarry Smith     sum12 = 0.0;
2483ed1c418cSBarry Smith     sum13 = 0.0;
2484ed1c418cSBarry Smith     sum14 = 0.0;
2485ed1c418cSBarry Smith     sum15 = 0.0;
2486ed1c418cSBarry Smith     sum16 = 0.0;
2487ed1c418cSBarry Smith     sum17 = 0.0;
2488ed1c418cSBarry Smith     sum18 = 0.0;
2489ed1c418cSBarry Smith     nonzerorow += (n>0);
2490ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2491ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2492ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2493ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2494ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2495ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2496ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2497ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2498ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2499ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2500ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2501ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2502ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2503ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2504ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2505ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2506ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2507ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2508ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2509ed1c418cSBarry Smith       jrow++;
2510ed1c418cSBarry Smith      }
2511ed1c418cSBarry Smith     y[18*i]    = sum1;
2512ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2513ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2514ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2515ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2516ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2517ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2518ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2519ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2520ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2521ed1c418cSBarry Smith     y[18*i+10] = sum11;
2522ed1c418cSBarry Smith     y[18*i+11] = sum12;
2523ed1c418cSBarry Smith     y[18*i+12] = sum13;
2524ed1c418cSBarry Smith     y[18*i+13] = sum14;
2525ed1c418cSBarry Smith     y[18*i+14] = sum15;
2526ed1c418cSBarry Smith     y[18*i+15] = sum16;
2527ed1c418cSBarry Smith     y[18*i+16] = sum17;
2528ed1c418cSBarry Smith     y[18*i+17] = sum18;
2529ed1c418cSBarry Smith   }
2530ed1c418cSBarry Smith 
2531dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2532d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2533ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2534ed1c418cSBarry Smith   PetscFunctionReturn(0);
2535ed1c418cSBarry Smith }
2536ed1c418cSBarry Smith 
2537ed1c418cSBarry Smith #undef __FUNCT__
2538ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2539ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2540ed1c418cSBarry Smith {
2541ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2542ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2543d2840e09SBarry Smith   const PetscScalar *x,*v;
2544d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2545ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2546ed1c418cSBarry Smith   PetscErrorCode    ierr;
2547d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2548d2840e09SBarry Smith   PetscInt          n,i;
2549ed1c418cSBarry Smith 
2550ed1c418cSBarry Smith   PetscFunctionBegin;
2551d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2552d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2553ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2554ed1c418cSBarry Smith 
2555ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2556ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2557ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2558ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2559ed1c418cSBarry Smith     alpha1  = x[18*i];
2560ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2561ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2562ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2563ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2564ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2565ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2566ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2567ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2568ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2569ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2570ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2571ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2572ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2573ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2574ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2575ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2576ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2577ed1c418cSBarry Smith     while (n-->0) {
2578ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2579ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2580ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2581ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2582ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2583ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2584ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2585ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2586ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2587ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2588ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2589ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2590ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2591ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2592ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2593ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2594ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2595ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2596ed1c418cSBarry Smith       idx++; v++;
2597ed1c418cSBarry Smith     }
2598ed1c418cSBarry Smith   }
2599dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2600d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2601ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2602ed1c418cSBarry Smith   PetscFunctionReturn(0);
2603ed1c418cSBarry Smith }
2604ed1c418cSBarry Smith 
2605ed1c418cSBarry Smith #undef __FUNCT__
2606ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2607ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608ed1c418cSBarry Smith {
2609ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2610ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2611d2840e09SBarry Smith   const PetscScalar *x,*v;
2612d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2613ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2614ed1c418cSBarry Smith   PetscErrorCode    ierr;
2615d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2616ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2617ed1c418cSBarry Smith 
2618ed1c418cSBarry Smith   PetscFunctionBegin;
2619ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2620d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2621ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2622ed1c418cSBarry Smith   idx  = a->j;
2623ed1c418cSBarry Smith   v    = a->a;
2624ed1c418cSBarry Smith   ii   = a->i;
2625ed1c418cSBarry Smith 
2626ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2627ed1c418cSBarry Smith     jrow = ii[i];
2628ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2629ed1c418cSBarry Smith     sum1  = 0.0;
2630ed1c418cSBarry Smith     sum2  = 0.0;
2631ed1c418cSBarry Smith     sum3  = 0.0;
2632ed1c418cSBarry Smith     sum4  = 0.0;
2633ed1c418cSBarry Smith     sum5  = 0.0;
2634ed1c418cSBarry Smith     sum6  = 0.0;
2635ed1c418cSBarry Smith     sum7  = 0.0;
2636ed1c418cSBarry Smith     sum8  = 0.0;
2637ed1c418cSBarry Smith     sum9  = 0.0;
2638ed1c418cSBarry Smith     sum10 = 0.0;
2639ed1c418cSBarry Smith     sum11 = 0.0;
2640ed1c418cSBarry Smith     sum12 = 0.0;
2641ed1c418cSBarry Smith     sum13 = 0.0;
2642ed1c418cSBarry Smith     sum14 = 0.0;
2643ed1c418cSBarry Smith     sum15 = 0.0;
2644ed1c418cSBarry Smith     sum16 = 0.0;
2645ed1c418cSBarry Smith     sum17 = 0.0;
2646ed1c418cSBarry Smith     sum18 = 0.0;
2647ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2648ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2649ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2650ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2651ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2652ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2653ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2654ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2655ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2656ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2657ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2658ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2659ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2660ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2661ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2662ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2663ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2664ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2665ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2666ed1c418cSBarry Smith       jrow++;
2667ed1c418cSBarry Smith      }
2668ed1c418cSBarry Smith     y[18*i]    += sum1;
2669ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2670ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2671ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2672ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2673ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2674ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2675ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2676ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2677ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2678ed1c418cSBarry Smith     y[18*i+10] += sum11;
2679ed1c418cSBarry Smith     y[18*i+11] += sum12;
2680ed1c418cSBarry Smith     y[18*i+12] += sum13;
2681ed1c418cSBarry Smith     y[18*i+13] += sum14;
2682ed1c418cSBarry Smith     y[18*i+14] += sum15;
2683ed1c418cSBarry Smith     y[18*i+15] += sum16;
2684ed1c418cSBarry Smith     y[18*i+16] += sum17;
2685ed1c418cSBarry Smith     y[18*i+17] += sum18;
2686ed1c418cSBarry Smith   }
2687ed1c418cSBarry Smith 
2688dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2689d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2690ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2691ed1c418cSBarry Smith   PetscFunctionReturn(0);
2692ed1c418cSBarry Smith }
2693ed1c418cSBarry Smith 
2694ed1c418cSBarry Smith #undef __FUNCT__
2695ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2696ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2697ed1c418cSBarry Smith {
2698ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2699ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2700d2840e09SBarry Smith   const PetscScalar *x,*v;
2701d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2702ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2703ed1c418cSBarry Smith   PetscErrorCode    ierr;
2704d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2705d2840e09SBarry Smith   PetscInt          n,i;
2706ed1c418cSBarry Smith 
2707ed1c418cSBarry Smith   PetscFunctionBegin;
2708ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2709d2840e09SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2710ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2711ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2712ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2713ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2714ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2715ed1c418cSBarry Smith     alpha1 = x[18*i];
2716ed1c418cSBarry Smith     alpha2 = x[18*i+1];
2717ed1c418cSBarry Smith     alpha3 = x[18*i+2];
2718ed1c418cSBarry Smith     alpha4 = x[18*i+3];
2719ed1c418cSBarry Smith     alpha5 = x[18*i+4];
2720ed1c418cSBarry Smith     alpha6 = x[18*i+5];
2721ed1c418cSBarry Smith     alpha7 = x[18*i+6];
2722ed1c418cSBarry Smith     alpha8 = x[18*i+7];
2723ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2724ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2725ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2726ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2727ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2728ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2729ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2730ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2731ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2732ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2733ed1c418cSBarry Smith     while (n-->0) {
2734ed1c418cSBarry Smith       y[18*(*idx)]   += alpha1*(*v);
2735ed1c418cSBarry Smith       y[18*(*idx)+1] += alpha2*(*v);
2736ed1c418cSBarry Smith       y[18*(*idx)+2] += alpha3*(*v);
2737ed1c418cSBarry Smith       y[18*(*idx)+3] += alpha4*(*v);
2738ed1c418cSBarry Smith       y[18*(*idx)+4] += alpha5*(*v);
2739ed1c418cSBarry Smith       y[18*(*idx)+5] += alpha6*(*v);
2740ed1c418cSBarry Smith       y[18*(*idx)+6] += alpha7*(*v);
2741ed1c418cSBarry Smith       y[18*(*idx)+7] += alpha8*(*v);
2742ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2743ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2744ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2745ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2746ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2747ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2748ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2749ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2750ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2751ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2752ed1c418cSBarry Smith       idx++; v++;
2753ed1c418cSBarry Smith     }
2754ed1c418cSBarry Smith   }
2755dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2756d2840e09SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
2757ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2758ed1c418cSBarry Smith   PetscFunctionReturn(0);
2759ed1c418cSBarry Smith }
2760ed1c418cSBarry Smith 
27616861f193SBarry Smith #undef __FUNCT__
27626861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N"
27636861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27646861f193SBarry Smith {
27656861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27666861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27676861f193SBarry Smith   const PetscScalar *x,*v;
27686861f193SBarry Smith   PetscScalar       *y,*sums;
27696861f193SBarry Smith   PetscErrorCode    ierr;
27706861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27716861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27726861f193SBarry Smith 
27736861f193SBarry Smith   PetscFunctionBegin;
27746861f193SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
27756861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27766861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27776861f193SBarry Smith   idx  = a->j;
27786861f193SBarry Smith   v    = a->a;
27796861f193SBarry Smith   ii   = a->i;
27806861f193SBarry Smith 
27816861f193SBarry Smith   for (i=0; i<m; i++) {
27826861f193SBarry Smith     jrow = ii[i];
27836861f193SBarry Smith     n    = ii[i+1] - jrow;
27846861f193SBarry Smith     sums = y + dof*i;
27856861f193SBarry Smith     for (j=0; j<n; j++) {
27866861f193SBarry Smith       for (k=0; k<dof; k++) {
27876861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
27886861f193SBarry Smith       }
27896861f193SBarry Smith       jrow++;
27906861f193SBarry Smith     }
27916861f193SBarry Smith   }
27926861f193SBarry Smith 
27936861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27946861f193SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
27956861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27966861f193SBarry Smith   PetscFunctionReturn(0);
27976861f193SBarry Smith }
27986861f193SBarry Smith 
27996861f193SBarry Smith #undef __FUNCT__
28006861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
28016861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28026861f193SBarry Smith {
28036861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28046861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28056861f193SBarry Smith   const PetscScalar *x,*v;
28066861f193SBarry Smith   PetscScalar       *y,*sums;
28076861f193SBarry Smith   PetscErrorCode    ierr;
28086861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28096861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
28106861f193SBarry Smith 
28116861f193SBarry Smith   PetscFunctionBegin;
28126861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28136861f193SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
28146861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28156861f193SBarry Smith   idx  = a->j;
28166861f193SBarry Smith   v    = a->a;
28176861f193SBarry Smith   ii   = a->i;
28186861f193SBarry Smith 
28196861f193SBarry Smith   for (i=0; i<m; i++) {
28206861f193SBarry Smith     jrow = ii[i];
28216861f193SBarry Smith     n    = ii[i+1] - jrow;
28226861f193SBarry Smith     sums = y + dof*i;
28236861f193SBarry Smith     for (j=0; j<n; j++) {
28246861f193SBarry Smith       for (k=0; k<dof; k++) {
28256861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
28266861f193SBarry Smith       }
28276861f193SBarry Smith       jrow++;
28286861f193SBarry Smith     }
28296861f193SBarry Smith   }
28306861f193SBarry Smith 
28316861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28326861f193SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
28336861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28346861f193SBarry Smith   PetscFunctionReturn(0);
28356861f193SBarry Smith }
28366861f193SBarry Smith 
28376861f193SBarry Smith #undef __FUNCT__
28386861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
28396861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
28406861f193SBarry Smith {
28416861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28426861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28436861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28446861f193SBarry Smith   PetscScalar       *y;
28456861f193SBarry Smith   PetscErrorCode    ierr;
28466861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28476861f193SBarry Smith   PetscInt          n,i,k;
28486861f193SBarry Smith 
28496861f193SBarry Smith   PetscFunctionBegin;
28506861f193SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
28516861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
28526861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
28536861f193SBarry Smith   for (i=0; i<m; i++) {
28546861f193SBarry Smith     idx    = a->j + a->i[i] ;
28556861f193SBarry Smith     v      = a->a + a->i[i] ;
28566861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28576861f193SBarry Smith     alpha  = x + dof*i;
28586861f193SBarry Smith     while (n-->0) {
28596861f193SBarry Smith       for (k=0; k<dof; k++) {
28606861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28616861f193SBarry Smith       }
286283ce7366SBarry Smith       idx++; v++;
28636861f193SBarry Smith     }
28646861f193SBarry Smith   }
28656861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28666861f193SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
28676861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28686861f193SBarry Smith   PetscFunctionReturn(0);
28696861f193SBarry Smith }
28706861f193SBarry Smith 
28716861f193SBarry Smith #undef __FUNCT__
28726861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
28736861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28746861f193SBarry Smith {
28756861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28766861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28776861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28786861f193SBarry Smith   PetscScalar       *y;
28796861f193SBarry Smith   PetscErrorCode    ierr;
28806861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28816861f193SBarry Smith   PetscInt          n,i,k;
28826861f193SBarry Smith 
28836861f193SBarry Smith   PetscFunctionBegin;
28846861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28856861f193SBarry Smith   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
28866861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28876861f193SBarry Smith   for (i=0; i<m; i++) {
28886861f193SBarry Smith     idx    = a->j + a->i[i] ;
28896861f193SBarry Smith     v      = a->a + a->i[i] ;
28906861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28916861f193SBarry Smith     alpha  = x + dof*i;
28926861f193SBarry Smith     while (n-->0) {
28936861f193SBarry Smith       for (k=0; k<dof; k++) {
28946861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28956861f193SBarry Smith       }
289683ce7366SBarry Smith       idx++; v++;
28976861f193SBarry Smith     }
28986861f193SBarry Smith   }
28996861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
29006861f193SBarry Smith   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
29016861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
29026861f193SBarry Smith   PetscFunctionReturn(0);
29036861f193SBarry Smith }
29046861f193SBarry Smith 
2905f4a53059SBarry Smith /*===================================================================================*/
29064a2ae208SSatish Balay #undef __FUNCT__
29074a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2908dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2909f4a53059SBarry Smith {
29104cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2911dfbe8321SBarry Smith   PetscErrorCode ierr;
2912f4a53059SBarry Smith 
2913b24ad042SBarry Smith   PetscFunctionBegin;
2914f4a53059SBarry Smith   /* start the scatter */
2915ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29164cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2917ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29184cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2919f4a53059SBarry Smith   PetscFunctionReturn(0);
2920f4a53059SBarry Smith }
2921f4a53059SBarry Smith 
29224a2ae208SSatish Balay #undef __FUNCT__
29234a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2924dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
29254cb9d9b8SBarry Smith {
29264cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2927dfbe8321SBarry Smith   PetscErrorCode ierr;
2928b24ad042SBarry Smith 
29294cb9d9b8SBarry Smith   PetscFunctionBegin;
29304cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
29314cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2932ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2933ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29344cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29354cb9d9b8SBarry Smith }
29364cb9d9b8SBarry Smith 
29374a2ae208SSatish Balay #undef __FUNCT__
29384a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2939dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29404cb9d9b8SBarry Smith {
29414cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2942dfbe8321SBarry Smith   PetscErrorCode ierr;
29434cb9d9b8SBarry Smith 
2944b24ad042SBarry Smith   PetscFunctionBegin;
29454cb9d9b8SBarry Smith   /* start the scatter */
2946ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2947d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2948ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2949717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
29504cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29514cb9d9b8SBarry Smith }
29524cb9d9b8SBarry Smith 
29534a2ae208SSatish Balay #undef __FUNCT__
29544a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2955dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29564cb9d9b8SBarry Smith {
29574cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2958dfbe8321SBarry Smith   PetscErrorCode ierr;
2959b24ad042SBarry Smith 
29604cb9d9b8SBarry Smith   PetscFunctionBegin;
29614cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2962ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2963d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2964ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29654cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29664cb9d9b8SBarry Smith }
29674cb9d9b8SBarry Smith 
296895ddefa2SHong Zhang /* ----------------------------------------------------------------*/
29699c4fc4c7SBarry Smith #undef __FUNCT__
29707ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
29717ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
29727ba1a0bfSKris Buschelman {
29737ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
29747ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2975a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
29767ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
29777ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
29787ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2979d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29807ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2981d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2982d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29837ba1a0bfSKris Buschelman   MatScalar          *ca;
2984d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29857ba1a0bfSKris Buschelman 
29867ba1a0bfSKris Buschelman   PetscFunctionBegin;
29877ba1a0bfSKris Buschelman   /* Start timer */
29887ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
29897ba1a0bfSKris Buschelman 
29907ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29917ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29927ba1a0bfSKris Buschelman 
29937ba1a0bfSKris Buschelman   cn = pn*ppdof;
29947ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29957ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
29967ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
29977ba1a0bfSKris Buschelman   ci[0] = 0;
29987ba1a0bfSKris Buschelman 
29997ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
300074ed9c26SBarry Smith   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
3001c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3002c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
30037ba1a0bfSKris Buschelman 
30047ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
30057ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
30067ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3007a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
30087ba1a0bfSKris Buschelman   current_space = free_space;
30097ba1a0bfSKris Buschelman 
30107ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
30117ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
30127ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
30137ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
30147ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
30157ba1a0bfSKris Buschelman       ptanzi = 0;
30167ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
30177ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
30187ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
30197ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
30207ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
30217ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
30227ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
30237ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
30247ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
30257ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
30267ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
30277ba1a0bfSKris Buschelman           }
30287ba1a0bfSKris Buschelman         }
30297ba1a0bfSKris Buschelman       }
30307ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
30317ba1a0bfSKris Buschelman       ptaj = ptasparserow;
30327ba1a0bfSKris Buschelman       cnzi   = 0;
30337ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30347ba1a0bfSKris Buschelman         /* Get offset within block of P */
30357ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
30367ba1a0bfSKris Buschelman         /* Get block row of P */
30377ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
30387ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
30397ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
30407ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
30417ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
30427ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
30437ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30447ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30457ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30467ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30477ba1a0bfSKris Buschelman           }
30487ba1a0bfSKris Buschelman         }
30497ba1a0bfSKris Buschelman       }
30507ba1a0bfSKris Buschelman 
30517ba1a0bfSKris Buschelman       /* sort sparserow */
30527ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30537ba1a0bfSKris Buschelman 
30547ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30557ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30567ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
30574238b7adSHong Zhang         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
30587ba1a0bfSKris Buschelman       }
30597ba1a0bfSKris Buschelman 
30607ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
30617ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
30627ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30637ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30647ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30657ba1a0bfSKris Buschelman 
30667ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30677ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
30687ba1a0bfSKris Buschelman       }
30697ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
30707ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
30717ba1a0bfSKris Buschelman       }
30727ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30737ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30747ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30757ba1a0bfSKris Buschelman     }
30767ba1a0bfSKris Buschelman   }
30777ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30787ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30797ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
30807ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3081a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
308274ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30837ba1a0bfSKris Buschelman 
30847ba1a0bfSKris Buschelman   /* Allocate space for ca */
30857ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30867ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30877ba1a0bfSKris Buschelman 
30887ba1a0bfSKris Buschelman   /* put together the new matrix */
30897adad957SLisandro Dalcin   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
30907ba1a0bfSKris Buschelman 
30917ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30927ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30937ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ *)((*C)->data);
3094e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3095e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30967ba1a0bfSKris Buschelman   c->nonew   = 0;
30977ba1a0bfSKris Buschelman 
30987ba1a0bfSKris Buschelman   /* Clean up. */
30997ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
31007ba1a0bfSKris Buschelman 
31017ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
31027ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31037ba1a0bfSKris Buschelman }
31047ba1a0bfSKris Buschelman 
31057ba1a0bfSKris Buschelman #undef __FUNCT__
31067ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
31077ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
31087ba1a0bfSKris Buschelman {
31097ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
31107ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
31117ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
31127ba1a0bfSKris Buschelman   Mat             P=pp->AIJ;
31137ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
31147ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
31157ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3116d2840e09SBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3117d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3118d2840e09SBarry Smith   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3119d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3120d2840e09SBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3121d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
31227ba1a0bfSKris Buschelman 
31237ba1a0bfSKris Buschelman   PetscFunctionBegin;
31247ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
312574ed9c26SBarry Smith   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
312674ed9c26SBarry Smith   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
312774ed9c26SBarry Smith   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
312874ed9c26SBarry Smith   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
31297ba1a0bfSKris Buschelman 
31307ba1a0bfSKris Buschelman   /* Clear old values in C */
31317ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
31327ba1a0bfSKris Buschelman 
31337ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
31347ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
31357ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
31367ba1a0bfSKris Buschelman     apnzj = 0;
31377ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
31387ba1a0bfSKris Buschelman       /* Get offset within block of P */
31397ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
31407ba1a0bfSKris Buschelman       /* Get block row of P */
31417ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
31427ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
31437ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
31447ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
31457ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
31467ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
31477ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
31487ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
31497ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31507ba1a0bfSKris Buschelman         }
31517ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31527ba1a0bfSKris Buschelman       }
3153dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31547ba1a0bfSKris Buschelman       aa++;
31557ba1a0bfSKris Buschelman     }
31567ba1a0bfSKris Buschelman 
31577ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31587ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31597ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31607ba1a0bfSKris Buschelman 
31617ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31627ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31637ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31647ba1a0bfSKris Buschelman     poffset = pi[prow];
31657ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
31667ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31677ba1a0bfSKris Buschelman     pJ   = pj+poffset;
31687ba1a0bfSKris Buschelman     pA   = pa+poffset;
31697ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
31707ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
31717ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
31727ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
31737ba1a0bfSKris Buschelman       pJ++;
31747ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31757ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
31767ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
31777ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
31787ba1a0bfSKris Buschelman         }
31797ba1a0bfSKris Buschelman       }
3180dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31817ba1a0bfSKris Buschelman       pA++;
31827ba1a0bfSKris Buschelman     }
31837ba1a0bfSKris Buschelman 
31847ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31857ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
31867ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31877ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31887ba1a0bfSKris Buschelman     }
31897ba1a0bfSKris Buschelman   }
31907ba1a0bfSKris Buschelman 
31917ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31927ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31937ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319474ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
319595ddefa2SHong Zhang   PetscFunctionReturn(0);
319695ddefa2SHong Zhang }
31977ba1a0bfSKris Buschelman 
319895ddefa2SHong Zhang #undef __FUNCT__
319995ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
320095ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
320195ddefa2SHong Zhang {
320295ddefa2SHong Zhang   PetscErrorCode    ierr;
320395ddefa2SHong Zhang 
320495ddefa2SHong Zhang   PetscFunctionBegin;
320595ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
320695ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
320795ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
320895ddefa2SHong Zhang   PetscFunctionReturn(0);
320995ddefa2SHong Zhang }
321095ddefa2SHong Zhang 
321195ddefa2SHong Zhang #undef __FUNCT__
321295ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
321395ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
321495ddefa2SHong Zhang {
321595ddefa2SHong Zhang   PetscFunctionBegin;
3216*e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
32177ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
32187ba1a0bfSKris Buschelman }
32197ba1a0bfSKris Buschelman 
3220be1d678aSKris Buschelman EXTERN_C_BEGIN
32217ba1a0bfSKris Buschelman #undef __FUNCT__
32220fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3223f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32249c4fc4c7SBarry Smith {
32259c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3226ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
32279c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
32289c4fc4c7SBarry Smith   PetscErrorCode    ierr;
32290fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3230ba8c8a56SBarry Smith   PetscInt          *cols;
3231ba8c8a56SBarry Smith   PetscScalar       *vals;
32329c4fc4c7SBarry Smith 
32339c4fc4c7SBarry Smith   PetscFunctionBegin;
32349c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
32357c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
32369c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32379c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
32380fd73130SBarry Smith     for (j=0; j<dof; j++) {
32390fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
32409c4fc4c7SBarry Smith     }
32419c4fc4c7SBarry Smith   }
3242ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
32439c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
32449c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
32459c4fc4c7SBarry Smith   ii   = 0;
32469c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3247ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32480fd73130SBarry Smith     for (j=0; j<dof; j++) {
32499c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
32500fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
32519c4fc4c7SBarry Smith       }
3252ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32539c4fc4c7SBarry Smith       ii++;
32549c4fc4c7SBarry Smith     }
3255ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32569c4fc4c7SBarry Smith   }
32579c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3258ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3259ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3260ceb03754SKris Buschelman 
3261ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
32628ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3263c3d102feSKris Buschelman   } else {
3264ceb03754SKris Buschelman     *newmat = B;
3265c3d102feSKris Buschelman   }
32669c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32679c4fc4c7SBarry Smith }
3268be1d678aSKris Buschelman EXTERN_C_END
32699c4fc4c7SBarry Smith 
32707c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h"
3271be1d678aSKris Buschelman 
3272be1d678aSKris Buschelman EXTERN_C_BEGIN
32730fd73130SBarry Smith #undef __FUNCT__
32740fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3275f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32760fd73130SBarry Smith {
32770fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3278ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32790fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32800fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
32810fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
32820fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3283910ba992SMatthew Knepley   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3284910ba992SMatthew Knepley   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
32850fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
32860fd73130SBarry Smith   PetscErrorCode    ierr;
32870fd73130SBarry Smith   PetscScalar       *vals,*ovals;
32880fd73130SBarry Smith 
32890fd73130SBarry Smith   PetscFunctionBegin;
3290d0f46423SBarry Smith   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3291d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32920fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
32930fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
32940fd73130SBarry Smith     for (j=0; j<dof; j++) {
32950fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
32960fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
32970fd73130SBarry Smith     }
32980fd73130SBarry Smith   }
3299d0f46423SBarry Smith   ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
33000fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
33010fd73130SBarry Smith 
33027a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3303d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3304d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33050fd73130SBarry Smith   garray = mpiaij->garray;
33060fd73130SBarry Smith 
33070fd73130SBarry Smith   ii = rstart;
3308d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33090fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33100fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33110fd73130SBarry Smith     for (j=0; j<dof; j++) {
33120fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33130fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33140fd73130SBarry Smith       }
33150fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33160fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33170fd73130SBarry Smith       }
3318ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3319ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33200fd73130SBarry Smith       ii++;
33210fd73130SBarry Smith     }
33220fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33230fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33240fd73130SBarry Smith   }
33250fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33260fd73130SBarry Smith 
3327ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329ceb03754SKris Buschelman 
3330ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
33317adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33327adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
33338ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
33347adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3335c3d102feSKris Buschelman   } else {
3336ceb03754SKris Buschelman     *newmat = B;
3337c3d102feSKris Buschelman   }
33380fd73130SBarry Smith   PetscFunctionReturn(0);
33390fd73130SBarry Smith }
3340be1d678aSKris Buschelman EXTERN_C_END
33410fd73130SBarry Smith 
33420fd73130SBarry Smith 
3343bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3344ff585edeSJed Brown #undef __FUNCT__
3345ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ"
3346ff585edeSJed Brown /*@C
33470bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33480bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33490bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33500bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33510bad9183SKris Buschelman 
3352ff585edeSJed Brown   Collective
3353ff585edeSJed Brown 
3354ff585edeSJed Brown   Input Parameters:
3355ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3356ff585edeSJed Brown - dof - the block size (number of components per node)
3357ff585edeSJed Brown 
3358ff585edeSJed Brown   Output Parameter:
3359ff585edeSJed Brown . maij - the new MAIJ matrix
3360ff585edeSJed Brown 
33610bad9183SKris Buschelman   Operations provided:
33620fd73130SBarry Smith + MatMult
33630bad9183SKris Buschelman . MatMultTranspose
33640bad9183SKris Buschelman . MatMultAdd
33650bad9183SKris Buschelman . MatMultTransposeAdd
33660fd73130SBarry Smith - MatView
33670bad9183SKris Buschelman 
33680bad9183SKris Buschelman   Level: advanced
33690bad9183SKris Buschelman 
3370ff585edeSJed Brown .seealso: MatMAIJGetAIJ(), MatMAIJRedimension()
3371ff585edeSJed Brown @*/
3372be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
337382b1193eSBarry Smith {
3374dfbe8321SBarry Smith   PetscErrorCode ierr;
3375b24ad042SBarry Smith   PetscMPIInt    size;
3376b24ad042SBarry Smith   PetscInt       n;
337782b1193eSBarry Smith   Mat            B;
337882b1193eSBarry Smith 
337982b1193eSBarry Smith   PetscFunctionBegin;
3380d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3381d72c5c08SBarry Smith 
3382d72c5c08SBarry Smith   if (dof == 1) {
3383d72c5c08SBarry Smith     *maij = A;
3384d72c5c08SBarry Smith   } else {
33857adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3386d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3387bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3388bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3389bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3390bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3391cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
3392d72c5c08SBarry Smith 
33937adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3394f4a53059SBarry Smith     if (size == 1) {
3395feb9fe23SJed Brown       Mat_SeqMAIJ    *b;
3396feb9fe23SJed Brown 
3397b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
33984cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33990fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3400feb9fe23SJed Brown       b      = (Mat_SeqMAIJ*)B->data;
3401b9b97703SBarry Smith       b->dof = dof;
34024cb9d9b8SBarry Smith       b->AIJ = A;
3403d72c5c08SBarry Smith       if (dof == 2) {
3404cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3405cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3406cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3407cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3408bcc973b7SBarry Smith       } else if (dof == 3) {
3409bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3410bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3411bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3412bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3413bcc973b7SBarry Smith       } else if (dof == 4) {
3414bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3415bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3416bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3417bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3418f9fae5adSBarry Smith       } else if (dof == 5) {
3419f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3420f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3421f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3422f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34236cd98798SBarry Smith       } else if (dof == 6) {
34246cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34256cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34266cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34276cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3428ed8eea03SBarry Smith       } else if (dof == 7) {
3429ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3430ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3431ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3432ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
343366ed3db0SBarry Smith       } else if (dof == 8) {
343466ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
343566ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
343666ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
343766ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34382b692628SMatthew Knepley       } else if (dof == 9) {
34392b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34402b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34412b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34422b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3443b9cda22cSBarry Smith       } else if (dof == 10) {
3444b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3445b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3446b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3447b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3448dbdd7285SBarry Smith       } else if (dof == 11) {
3449dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3450dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3451dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3452dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34532f7816d4SBarry Smith       } else if (dof == 16) {
34542f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34552f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34562f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34572f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3458ed1c418cSBarry Smith       } else if (dof == 18) {
3459ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3460ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3461ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3462ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
346382b1193eSBarry Smith       } else {
34646861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34656861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34666861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34676861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
346882b1193eSBarry Smith       }
34697ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
34707ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
34719c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3472f4a53059SBarry Smith     } else {
3473f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3474feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3475f4a53059SBarry Smith       IS          from,to;
3476f4a53059SBarry Smith       Vec         gvec;
3477b24ad042SBarry Smith       PetscInt    *garray,i;
3478f4a53059SBarry Smith 
3479b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3480d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
34810fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
3482b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3483b9b97703SBarry Smith       b->dof = dof;
3484b9b97703SBarry Smith       b->A   = A;
34854cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
34864cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
34874cb9d9b8SBarry Smith 
3488f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3489f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
349013288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3491f4a53059SBarry Smith 
3492f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3493b24ad042SBarry Smith       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
3494f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
34957adad957SLisandro Dalcin       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr);
3496f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
3497f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3498f4a53059SBarry Smith 
3499f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3500d0f46423SBarry Smith       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
350113288a74SBarry Smith       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
3502f4a53059SBarry Smith 
3503f4a53059SBarry Smith       /* generate the scatter context */
3504f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3505f4a53059SBarry Smith 
3506f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
3507f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
3508f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
3509f4a53059SBarry Smith 
3510f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
35114cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
35124cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
35134cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
351495ddefa2SHong Zhang       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
351595ddefa2SHong Zhang       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
35160fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3517f4a53059SBarry Smith     }
3518cd3bbe55SBarry Smith     *maij = B;
35190fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
3520d72c5c08SBarry Smith   }
352182b1193eSBarry Smith   PetscFunctionReturn(0);
352282b1193eSBarry Smith }
3523