xref: /petsc/src/mat/impls/maij/maij.c (revision c5eb91543d2ee8daf880d93389b892228ddada03)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
382b1193eSBarry Smith /*
4cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
5cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
6cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
7cd3bbe55SBarry Smith   independently.
8cd3bbe55SBarry Smith 
9cd3bbe55SBarry Smith      We provide:
10cd3bbe55SBarry Smith          MatMult()
11cd3bbe55SBarry Smith          MatMultTranspose()
12cd3bbe55SBarry Smith          MatMultTransposeAdd()
13cd3bbe55SBarry Smith          MatMultAdd()
14cd3bbe55SBarry Smith           and
15cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
16f4a53059SBarry Smith 
17f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1882b1193eSBarry Smith */
1982b1193eSBarry Smith 
20ff585edeSJed Brown #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/
217c4f633dSBarry Smith #include "../src/mat/utils/freespace.h"
221d8d5f9aSSatish Balay #include "private/vecimpl.h"
2382b1193eSBarry Smith 
244a2ae208SSatish Balay #undef __FUNCT__
254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
26ff585edeSJed Brown /*@C
27ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28ff585edeSJed Brown 
29ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30ff585edeSJed Brown 
31ff585edeSJed Brown    Input Parameter:
32ff585edeSJed Brown .  A - the MAIJ matrix
33ff585edeSJed Brown 
34ff585edeSJed Brown    Output Parameter:
35ff585edeSJed Brown .  B - the AIJ matrix
36ff585edeSJed Brown 
37ff585edeSJed Brown    Level: advanced
38ff585edeSJed Brown 
39ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40ff585edeSJed Brown 
41ff585edeSJed Brown .seealso: MatCreateMAIJ()
42ff585edeSJed Brown @*/
43be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
44b9b97703SBarry Smith {
45dfbe8321SBarry Smith   PetscErrorCode ierr;
46b9b97703SBarry Smith   PetscTruth     ismpimaij,isseqmaij;
47b9b97703SBarry Smith 
48b9b97703SBarry Smith   PetscFunctionBegin;
49b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
50b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
51b9b97703SBarry Smith   if (ismpimaij) {
52b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53b9b97703SBarry Smith 
54b9b97703SBarry Smith     *B = b->A;
55b9b97703SBarry Smith   } else if (isseqmaij) {
56b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57b9b97703SBarry Smith 
58b9b97703SBarry Smith     *B = b->AIJ;
59b9b97703SBarry Smith   } else {
60b9b97703SBarry Smith     *B = A;
61b9b97703SBarry Smith   }
62b9b97703SBarry Smith   PetscFunctionReturn(0);
63b9b97703SBarry Smith }
64b9b97703SBarry Smith 
654a2ae208SSatish Balay #undef __FUNCT__
664a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
67ff585edeSJed Brown /*@C
68ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69ff585edeSJed Brown 
703f9fe445SBarry Smith    Logically Collective
71ff585edeSJed Brown 
72ff585edeSJed Brown    Input Parameter:
73ff585edeSJed Brown +  A - the MAIJ matrix
74ff585edeSJed Brown -  dof - the block size for the new matrix
75ff585edeSJed Brown 
76ff585edeSJed Brown    Output Parameter:
77ff585edeSJed Brown .  B - the new MAIJ matrix
78ff585edeSJed Brown 
79ff585edeSJed Brown    Level: advanced
80ff585edeSJed Brown 
81ff585edeSJed Brown .seealso: MatCreateMAIJ()
82ff585edeSJed Brown @*/
83be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
84b9b97703SBarry Smith {
85dfbe8321SBarry Smith   PetscErrorCode ierr;
863b98c0a2SBarry Smith   Mat            Aij = PETSC_NULL;
87b9b97703SBarry Smith 
88b9b97703SBarry Smith   PetscFunctionBegin;
89*c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
90b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
91b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
92b9b97703SBarry Smith   PetscFunctionReturn(0);
93b9b97703SBarry Smith }
94b9b97703SBarry Smith 
954a2ae208SSatish Balay #undef __FUNCT__
964a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
97dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9882b1193eSBarry Smith {
99dfbe8321SBarry Smith   PetscErrorCode ierr;
1004cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
10182b1193eSBarry Smith 
10282b1193eSBarry Smith   PetscFunctionBegin;
103cd3bbe55SBarry Smith   if (b->AIJ) {
104cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
10582b1193eSBarry Smith   }
1064cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
1074cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1084cb9d9b8SBarry Smith }
1094cb9d9b8SBarry Smith 
1104a2ae208SSatish Balay #undef __FUNCT__
1110fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
1120fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1130fd73130SBarry Smith {
1140fd73130SBarry Smith   PetscErrorCode ierr;
1150fd73130SBarry Smith   Mat            B;
1160fd73130SBarry Smith 
1170fd73130SBarry Smith   PetscFunctionBegin;
118ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1190fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1200fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
1210fd73130SBarry Smith   PetscFunctionReturn(0);
1220fd73130SBarry Smith }
1230fd73130SBarry Smith 
1240fd73130SBarry Smith #undef __FUNCT__
1250fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
1260fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1270fd73130SBarry Smith {
1280fd73130SBarry Smith   PetscErrorCode ierr;
1290fd73130SBarry Smith   Mat            B;
1300fd73130SBarry Smith 
1310fd73130SBarry Smith   PetscFunctionBegin;
132ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
1330fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1340fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
1350fd73130SBarry Smith   PetscFunctionReturn(0);
1360fd73130SBarry Smith }
1370fd73130SBarry Smith 
1380fd73130SBarry Smith #undef __FUNCT__
1394a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
140dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1414cb9d9b8SBarry Smith {
142dfbe8321SBarry Smith   PetscErrorCode ierr;
1434cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1444cb9d9b8SBarry Smith 
1454cb9d9b8SBarry Smith   PetscFunctionBegin;
1464cb9d9b8SBarry Smith   if (b->AIJ) {
1474cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
1484cb9d9b8SBarry Smith   }
149f4a53059SBarry Smith   if (b->OAIJ) {
150f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
151f4a53059SBarry Smith   }
152b9b97703SBarry Smith   if (b->A) {
153b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
154b9b97703SBarry Smith   }
155f4a53059SBarry Smith   if (b->ctx) {
156f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
157f4a53059SBarry Smith   }
158f4a53059SBarry Smith   if (b->w) {
159f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
160f4a53059SBarry Smith   }
161cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
162dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163b9b97703SBarry Smith   PetscFunctionReturn(0);
164b9b97703SBarry Smith }
165b9b97703SBarry Smith 
1660bad9183SKris Buschelman /*MC
167fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1680bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1690bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1700bad9183SKris Buschelman 
1710bad9183SKris Buschelman   Operations provided:
1720bad9183SKris Buschelman . MatMult
1730bad9183SKris Buschelman . MatMultTranspose
1740bad9183SKris Buschelman . MatMultAdd
1750bad9183SKris Buschelman . MatMultTransposeAdd
1760bad9183SKris Buschelman 
1770bad9183SKris Buschelman   Level: advanced
1780bad9183SKris Buschelman 
1790bad9183SKris Buschelman .seealso: MatCreateSeqDense
1800bad9183SKris Buschelman M*/
1810bad9183SKris Buschelman 
18282b1193eSBarry Smith EXTERN_C_BEGIN
1834a2ae208SSatish Balay #undef __FUNCT__
1844a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
185be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
18682b1193eSBarry Smith {
187dfbe8321SBarry Smith   PetscErrorCode ierr;
1884cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
18964b52464SHong Zhang   PetscMPIInt    size;
19082b1193eSBarry Smith 
19182b1193eSBarry Smith   PetscFunctionBegin;
19238f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
193b0a32e0cSBarry Smith   A->data  = (void*)b;
194cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
195cd3bbe55SBarry Smith   A->mapping          = 0;
196f4a53059SBarry Smith 
197cd3bbe55SBarry Smith   b->AIJ  = 0;
198cd3bbe55SBarry Smith   b->dof  = 0;
199f4a53059SBarry Smith   b->OAIJ = 0;
200f4a53059SBarry Smith   b->ctx  = 0;
201f4a53059SBarry Smith   b->w    = 0;
2027adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
20364b52464SHong Zhang   if (size == 1){
20464b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
20564b52464SHong Zhang   } else {
20664b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
20764b52464SHong Zhang   }
20882b1193eSBarry Smith   PetscFunctionReturn(0);
20982b1193eSBarry Smith }
21082b1193eSBarry Smith EXTERN_C_END
21182b1193eSBarry Smith 
212cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2134a2ae208SSatish Balay #undef __FUNCT__
214b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
215dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21682b1193eSBarry Smith {
2174cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
218bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
219d2840e09SBarry Smith   const PetscScalar *x,*v;
220d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
221dfbe8321SBarry Smith   PetscErrorCode    ierr;
222d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
223d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
22482b1193eSBarry Smith 
225bcc973b7SBarry Smith   PetscFunctionBegin;
2263649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2271ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
228bcc973b7SBarry Smith   idx  = a->j;
229bcc973b7SBarry Smith   v    = a->a;
230bcc973b7SBarry Smith   ii   = a->i;
231bcc973b7SBarry Smith 
232bcc973b7SBarry Smith   for (i=0; i<m; i++) {
233bcc973b7SBarry Smith     jrow = ii[i];
234bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
235bcc973b7SBarry Smith     sum1  = 0.0;
236bcc973b7SBarry Smith     sum2  = 0.0;
237b3c51c66SMatthew Knepley     nonzerorow += (n>0);
238bcc973b7SBarry Smith     for (j=0; j<n; j++) {
239bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
240bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
241bcc973b7SBarry Smith       jrow++;
242bcc973b7SBarry Smith      }
243bcc973b7SBarry Smith     y[2*i]   = sum1;
244bcc973b7SBarry Smith     y[2*i+1] = sum2;
245bcc973b7SBarry Smith   }
246bcc973b7SBarry Smith 
247dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2483649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2491ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
25082b1193eSBarry Smith   PetscFunctionReturn(0);
25182b1193eSBarry Smith }
252bcc973b7SBarry Smith 
2534a2ae208SSatish Balay #undef __FUNCT__
254b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
255dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
25682b1193eSBarry Smith {
2574cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
258bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
259d2840e09SBarry Smith   const PetscScalar *x,*v;
260d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
261dfbe8321SBarry Smith   PetscErrorCode    ierr;
262d2840e09SBarry Smith   PetscInt          n,i;
263d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
26482b1193eSBarry Smith 
265bcc973b7SBarry Smith   PetscFunctionBegin;
266d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2673649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2681ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
269bfec09a0SHong Zhang 
270bcc973b7SBarry Smith   for (i=0; i<m; i++) {
271bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
272bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
273bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
274bcc973b7SBarry Smith     alpha1 = x[2*i];
275bcc973b7SBarry Smith     alpha2 = x[2*i+1];
276bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
277bcc973b7SBarry Smith   }
278dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2793649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2801ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28182b1193eSBarry Smith   PetscFunctionReturn(0);
28282b1193eSBarry Smith }
283bcc973b7SBarry Smith 
2844a2ae208SSatish Balay #undef __FUNCT__
285b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
286dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
28782b1193eSBarry Smith {
2884cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
289bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
290d2840e09SBarry Smith   const PetscScalar *x,*v;
291d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
292dfbe8321SBarry Smith   PetscErrorCode    ierr;
293b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
294d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
29582b1193eSBarry Smith 
296bcc973b7SBarry Smith   PetscFunctionBegin;
297f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2983649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
300bcc973b7SBarry Smith   idx  = a->j;
301bcc973b7SBarry Smith   v    = a->a;
302bcc973b7SBarry Smith   ii   = a->i;
303bcc973b7SBarry Smith 
304bcc973b7SBarry Smith   for (i=0; i<m; i++) {
305bcc973b7SBarry Smith     jrow = ii[i];
306bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
307bcc973b7SBarry Smith     sum1  = 0.0;
308bcc973b7SBarry Smith     sum2  = 0.0;
309bcc973b7SBarry Smith     for (j=0; j<n; j++) {
310bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
311bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
312bcc973b7SBarry Smith       jrow++;
313bcc973b7SBarry Smith      }
314bcc973b7SBarry Smith     y[2*i]   += sum1;
315bcc973b7SBarry Smith     y[2*i+1] += sum2;
316bcc973b7SBarry Smith   }
317bcc973b7SBarry Smith 
318dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3201ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
321bcc973b7SBarry Smith   PetscFunctionReturn(0);
32282b1193eSBarry Smith }
3234a2ae208SSatish Balay #undef __FUNCT__
324b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
325dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
32682b1193eSBarry Smith {
3274cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
328bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
329d2840e09SBarry Smith   const PetscScalar *x,*v;
330d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
331dfbe8321SBarry Smith   PetscErrorCode    ierr;
332d2840e09SBarry Smith   PetscInt          n,i;
333d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
33482b1193eSBarry Smith 
335bcc973b7SBarry Smith   PetscFunctionBegin;
336f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3373649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3381ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
339bfec09a0SHong Zhang 
340bcc973b7SBarry Smith   for (i=0; i<m; i++) {
341bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
342bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
343bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
344bcc973b7SBarry Smith     alpha1 = x[2*i];
345bcc973b7SBarry Smith     alpha2 = x[2*i+1];
346bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
347bcc973b7SBarry Smith   }
348dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3501ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
351bcc973b7SBarry Smith   PetscFunctionReturn(0);
35282b1193eSBarry Smith }
353cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3544a2ae208SSatish Balay #undef __FUNCT__
355b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
356dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
357bcc973b7SBarry Smith {
3584cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
359bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
360d2840e09SBarry Smith   const PetscScalar *x,*v;
361d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
362dfbe8321SBarry Smith   PetscErrorCode    ierr;
363d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
364d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
36582b1193eSBarry Smith 
366bcc973b7SBarry Smith   PetscFunctionBegin;
3673649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3681ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
369bcc973b7SBarry Smith   idx  = a->j;
370bcc973b7SBarry Smith   v    = a->a;
371bcc973b7SBarry Smith   ii   = a->i;
372bcc973b7SBarry Smith 
373bcc973b7SBarry Smith   for (i=0; i<m; i++) {
374bcc973b7SBarry Smith     jrow = ii[i];
375bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
376bcc973b7SBarry Smith     sum1  = 0.0;
377bcc973b7SBarry Smith     sum2  = 0.0;
378bcc973b7SBarry Smith     sum3  = 0.0;
379b3c51c66SMatthew Knepley     nonzerorow += (n>0);
380bcc973b7SBarry Smith     for (j=0; j<n; j++) {
381bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
382bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
383bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
384bcc973b7SBarry Smith       jrow++;
385bcc973b7SBarry Smith      }
386bcc973b7SBarry Smith     y[3*i]   = sum1;
387bcc973b7SBarry Smith     y[3*i+1] = sum2;
388bcc973b7SBarry Smith     y[3*i+2] = sum3;
389bcc973b7SBarry Smith   }
390bcc973b7SBarry Smith 
391dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3923649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3931ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
394bcc973b7SBarry Smith   PetscFunctionReturn(0);
395bcc973b7SBarry Smith }
396bcc973b7SBarry Smith 
3974a2ae208SSatish Balay #undef __FUNCT__
398b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
399dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
400bcc973b7SBarry Smith {
4014cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
402bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
403d2840e09SBarry Smith   const PetscScalar *x,*v;
404d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
405dfbe8321SBarry Smith   PetscErrorCode    ierr;
406d2840e09SBarry Smith   PetscInt          n,i;
407d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
408bcc973b7SBarry Smith 
409bcc973b7SBarry Smith   PetscFunctionBegin;
410d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
4113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4121ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
413bfec09a0SHong Zhang 
414bcc973b7SBarry Smith   for (i=0; i<m; i++) {
415bfec09a0SHong Zhang     idx    = a->j + a->i[i];
416bfec09a0SHong Zhang     v      = a->a + a->i[i];
417bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
418bcc973b7SBarry Smith     alpha1 = x[3*i];
419bcc973b7SBarry Smith     alpha2 = x[3*i+1];
420bcc973b7SBarry Smith     alpha3 = x[3*i+2];
421bcc973b7SBarry Smith     while (n-->0) {
422bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
423bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
424bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
425bcc973b7SBarry Smith       idx++; v++;
426bcc973b7SBarry Smith     }
427bcc973b7SBarry Smith   }
428dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4301ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
431bcc973b7SBarry Smith   PetscFunctionReturn(0);
432bcc973b7SBarry Smith }
433bcc973b7SBarry Smith 
4344a2ae208SSatish Balay #undef __FUNCT__
435b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
436dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
437bcc973b7SBarry Smith {
4384cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
439bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
440d2840e09SBarry Smith   const PetscScalar *x,*v;
441d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
442dfbe8321SBarry Smith   PetscErrorCode    ierr;
443b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
444d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
445bcc973b7SBarry Smith 
446bcc973b7SBarry Smith   PetscFunctionBegin;
447f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4483649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4491ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
450bcc973b7SBarry Smith   idx  = a->j;
451bcc973b7SBarry Smith   v    = a->a;
452bcc973b7SBarry Smith   ii   = a->i;
453bcc973b7SBarry Smith 
454bcc973b7SBarry Smith   for (i=0; i<m; i++) {
455bcc973b7SBarry Smith     jrow = ii[i];
456bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
457bcc973b7SBarry Smith     sum1  = 0.0;
458bcc973b7SBarry Smith     sum2  = 0.0;
459bcc973b7SBarry Smith     sum3  = 0.0;
460bcc973b7SBarry Smith     for (j=0; j<n; j++) {
461bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
462bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
463bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
464bcc973b7SBarry Smith       jrow++;
465bcc973b7SBarry Smith      }
466bcc973b7SBarry Smith     y[3*i]   += sum1;
467bcc973b7SBarry Smith     y[3*i+1] += sum2;
468bcc973b7SBarry Smith     y[3*i+2] += sum3;
469bcc973b7SBarry Smith   }
470bcc973b7SBarry Smith 
471dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4731ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
474bcc973b7SBarry Smith   PetscFunctionReturn(0);
475bcc973b7SBarry Smith }
4764a2ae208SSatish Balay #undef __FUNCT__
477b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
478dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
479bcc973b7SBarry Smith {
4804cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
481bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
482d2840e09SBarry Smith   const PetscScalar *x,*v;
483d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
484dfbe8321SBarry Smith   PetscErrorCode    ierr;
485d2840e09SBarry Smith   PetscInt          n,i;
486d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
487bcc973b7SBarry Smith 
488bcc973b7SBarry Smith   PetscFunctionBegin;
489f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4903649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4911ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
492bcc973b7SBarry Smith   for (i=0; i<m; i++) {
493bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
494bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
495bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
496bcc973b7SBarry Smith     alpha1 = x[3*i];
497bcc973b7SBarry Smith     alpha2 = x[3*i+1];
498bcc973b7SBarry Smith     alpha3 = x[3*i+2];
499bcc973b7SBarry Smith     while (n-->0) {
500bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
501bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
502bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
503bcc973b7SBarry Smith       idx++; v++;
504bcc973b7SBarry Smith     }
505bcc973b7SBarry Smith   }
506dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
5073649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5081ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
509bcc973b7SBarry Smith   PetscFunctionReturn(0);
510bcc973b7SBarry Smith }
511bcc973b7SBarry Smith 
512bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
5134a2ae208SSatish Balay #undef __FUNCT__
514b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
515dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516bcc973b7SBarry Smith {
5174cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
518bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
519d2840e09SBarry Smith   const PetscScalar *x,*v;
520d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
521dfbe8321SBarry Smith   PetscErrorCode    ierr;
522d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
523d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
524bcc973b7SBarry Smith 
525bcc973b7SBarry Smith   PetscFunctionBegin;
5263649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5271ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
528bcc973b7SBarry Smith   idx  = a->j;
529bcc973b7SBarry Smith   v    = a->a;
530bcc973b7SBarry Smith   ii   = a->i;
531bcc973b7SBarry Smith 
532bcc973b7SBarry Smith   for (i=0; i<m; i++) {
533bcc973b7SBarry Smith     jrow = ii[i];
534bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
535bcc973b7SBarry Smith     sum1  = 0.0;
536bcc973b7SBarry Smith     sum2  = 0.0;
537bcc973b7SBarry Smith     sum3  = 0.0;
538bcc973b7SBarry Smith     sum4  = 0.0;
539b3c51c66SMatthew Knepley     nonzerorow += (n>0);
540bcc973b7SBarry Smith     for (j=0; j<n; j++) {
541bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
542bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
543bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
544bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
545bcc973b7SBarry Smith       jrow++;
546bcc973b7SBarry Smith      }
547bcc973b7SBarry Smith     y[4*i]   = sum1;
548bcc973b7SBarry Smith     y[4*i+1] = sum2;
549bcc973b7SBarry Smith     y[4*i+2] = sum3;
550bcc973b7SBarry Smith     y[4*i+3] = sum4;
551bcc973b7SBarry Smith   }
552bcc973b7SBarry Smith 
553dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5543649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5551ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
556bcc973b7SBarry Smith   PetscFunctionReturn(0);
557bcc973b7SBarry Smith }
558bcc973b7SBarry Smith 
5594a2ae208SSatish Balay #undef __FUNCT__
560b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
561dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
562bcc973b7SBarry Smith {
5634cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
564bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
565d2840e09SBarry Smith   const PetscScalar *x,*v;
566d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
567dfbe8321SBarry Smith   PetscErrorCode    ierr;
568d2840e09SBarry Smith   PetscInt          n,i;
569d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
570bcc973b7SBarry Smith 
571bcc973b7SBarry Smith   PetscFunctionBegin;
572d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5733649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5741ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
575bcc973b7SBarry Smith   for (i=0; i<m; i++) {
576bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
577bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
578bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
579bcc973b7SBarry Smith     alpha1 = x[4*i];
580bcc973b7SBarry Smith     alpha2 = x[4*i+1];
581bcc973b7SBarry Smith     alpha3 = x[4*i+2];
582bcc973b7SBarry Smith     alpha4 = x[4*i+3];
583bcc973b7SBarry Smith     while (n-->0) {
584bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
585bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
586bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
587bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
588bcc973b7SBarry Smith       idx++; v++;
589bcc973b7SBarry Smith     }
590bcc973b7SBarry Smith   }
591dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5923649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5931ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
594bcc973b7SBarry Smith   PetscFunctionReturn(0);
595bcc973b7SBarry Smith }
596bcc973b7SBarry Smith 
5974a2ae208SSatish Balay #undef __FUNCT__
598b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
599dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
600bcc973b7SBarry Smith {
6014cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
602f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
603d2840e09SBarry Smith   const PetscScalar *x,*v;
604d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
605dfbe8321SBarry Smith   PetscErrorCode    ierr;
606b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
607d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
608f9fae5adSBarry Smith 
609f9fae5adSBarry Smith   PetscFunctionBegin;
610f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6121ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
613f9fae5adSBarry Smith   idx  = a->j;
614f9fae5adSBarry Smith   v    = a->a;
615f9fae5adSBarry Smith   ii   = a->i;
616f9fae5adSBarry Smith 
617f9fae5adSBarry Smith   for (i=0; i<m; i++) {
618f9fae5adSBarry Smith     jrow = ii[i];
619f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
620f9fae5adSBarry Smith     sum1  = 0.0;
621f9fae5adSBarry Smith     sum2  = 0.0;
622f9fae5adSBarry Smith     sum3  = 0.0;
623f9fae5adSBarry Smith     sum4  = 0.0;
624f9fae5adSBarry Smith     for (j=0; j<n; j++) {
625f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
626f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
627f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
628f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
629f9fae5adSBarry Smith       jrow++;
630f9fae5adSBarry Smith      }
631f9fae5adSBarry Smith     y[4*i]   += sum1;
632f9fae5adSBarry Smith     y[4*i+1] += sum2;
633f9fae5adSBarry Smith     y[4*i+2] += sum3;
634f9fae5adSBarry Smith     y[4*i+3] += sum4;
635f9fae5adSBarry Smith   }
636f9fae5adSBarry Smith 
637dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6383649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6391ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
640f9fae5adSBarry Smith   PetscFunctionReturn(0);
641bcc973b7SBarry Smith }
6424a2ae208SSatish Balay #undef __FUNCT__
643b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
644dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
645bcc973b7SBarry Smith {
6464cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
647f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
648d2840e09SBarry Smith   const PetscScalar *x,*v;
649d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
650dfbe8321SBarry Smith   PetscErrorCode    ierr;
651d2840e09SBarry Smith   PetscInt          n,i;
652d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
653f9fae5adSBarry Smith 
654f9fae5adSBarry Smith   PetscFunctionBegin;
655f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6563649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6571ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
658bfec09a0SHong Zhang 
659f9fae5adSBarry Smith   for (i=0; i<m; i++) {
660bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
661bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
662f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
663f9fae5adSBarry Smith     alpha1 = x[4*i];
664f9fae5adSBarry Smith     alpha2 = x[4*i+1];
665f9fae5adSBarry Smith     alpha3 = x[4*i+2];
666f9fae5adSBarry Smith     alpha4 = x[4*i+3];
667f9fae5adSBarry Smith     while (n-->0) {
668f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
669f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
670f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
671f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
672f9fae5adSBarry Smith       idx++; v++;
673f9fae5adSBarry Smith     }
674f9fae5adSBarry Smith   }
675dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6763649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6771ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
678f9fae5adSBarry Smith   PetscFunctionReturn(0);
679f9fae5adSBarry Smith }
680f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6816cd98798SBarry Smith 
6824a2ae208SSatish Balay #undef __FUNCT__
683b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
684dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
685f9fae5adSBarry Smith {
6864cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
687f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
688d2840e09SBarry Smith   const PetscScalar *x,*v;
689d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
690dfbe8321SBarry Smith   PetscErrorCode    ierr;
691d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
692d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
693f9fae5adSBarry Smith 
694f9fae5adSBarry Smith   PetscFunctionBegin;
6953649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6961ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
697f9fae5adSBarry Smith   idx  = a->j;
698f9fae5adSBarry Smith   v    = a->a;
699f9fae5adSBarry Smith   ii   = a->i;
700f9fae5adSBarry Smith 
701f9fae5adSBarry Smith   for (i=0; i<m; i++) {
702f9fae5adSBarry Smith     jrow = ii[i];
703f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
704f9fae5adSBarry Smith     sum1  = 0.0;
705f9fae5adSBarry Smith     sum2  = 0.0;
706f9fae5adSBarry Smith     sum3  = 0.0;
707f9fae5adSBarry Smith     sum4  = 0.0;
708f9fae5adSBarry Smith     sum5  = 0.0;
709b3c51c66SMatthew Knepley     nonzerorow += (n>0);
710f9fae5adSBarry Smith     for (j=0; j<n; j++) {
711f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
712f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
713f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
714f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
715f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
716f9fae5adSBarry Smith       jrow++;
717f9fae5adSBarry Smith      }
718f9fae5adSBarry Smith     y[5*i]   = sum1;
719f9fae5adSBarry Smith     y[5*i+1] = sum2;
720f9fae5adSBarry Smith     y[5*i+2] = sum3;
721f9fae5adSBarry Smith     y[5*i+3] = sum4;
722f9fae5adSBarry Smith     y[5*i+4] = sum5;
723f9fae5adSBarry Smith   }
724f9fae5adSBarry Smith 
725dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
7263649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7271ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
728f9fae5adSBarry Smith   PetscFunctionReturn(0);
729f9fae5adSBarry Smith }
730f9fae5adSBarry Smith 
7314a2ae208SSatish Balay #undef __FUNCT__
732b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
733dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
734f9fae5adSBarry Smith {
7354cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
736f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
737d2840e09SBarry Smith   const PetscScalar *x,*v;
738d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
739dfbe8321SBarry Smith   PetscErrorCode    ierr;
740d2840e09SBarry Smith   PetscInt          n,i;
741d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
742f9fae5adSBarry Smith 
743f9fae5adSBarry Smith   PetscFunctionBegin;
744d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7453649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7461ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
747bfec09a0SHong Zhang 
748f9fae5adSBarry Smith   for (i=0; i<m; i++) {
749bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
750bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
751f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
752f9fae5adSBarry Smith     alpha1 = x[5*i];
753f9fae5adSBarry Smith     alpha2 = x[5*i+1];
754f9fae5adSBarry Smith     alpha3 = x[5*i+2];
755f9fae5adSBarry Smith     alpha4 = x[5*i+3];
756f9fae5adSBarry Smith     alpha5 = x[5*i+4];
757f9fae5adSBarry Smith     while (n-->0) {
758f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
759f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
760f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
761f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
762f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
763f9fae5adSBarry Smith       idx++; v++;
764f9fae5adSBarry Smith     }
765f9fae5adSBarry Smith   }
766dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7681ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
769f9fae5adSBarry Smith   PetscFunctionReturn(0);
770f9fae5adSBarry Smith }
771f9fae5adSBarry Smith 
7724a2ae208SSatish Balay #undef __FUNCT__
773b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
774dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
775f9fae5adSBarry Smith {
7764cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
777f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
778d2840e09SBarry Smith   const PetscScalar *x,*v;
779d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
780dfbe8321SBarry Smith   PetscErrorCode    ierr;
781b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
782d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
783f9fae5adSBarry Smith 
784f9fae5adSBarry Smith   PetscFunctionBegin;
785f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
788f9fae5adSBarry Smith   idx  = a->j;
789f9fae5adSBarry Smith   v    = a->a;
790f9fae5adSBarry Smith   ii   = a->i;
791f9fae5adSBarry Smith 
792f9fae5adSBarry Smith   for (i=0; i<m; i++) {
793f9fae5adSBarry Smith     jrow = ii[i];
794f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
795f9fae5adSBarry Smith     sum1  = 0.0;
796f9fae5adSBarry Smith     sum2  = 0.0;
797f9fae5adSBarry Smith     sum3  = 0.0;
798f9fae5adSBarry Smith     sum4  = 0.0;
799f9fae5adSBarry Smith     sum5  = 0.0;
800f9fae5adSBarry Smith     for (j=0; j<n; j++) {
801f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
802f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
803f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
804f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
805f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
806f9fae5adSBarry Smith       jrow++;
807f9fae5adSBarry Smith      }
808f9fae5adSBarry Smith     y[5*i]   += sum1;
809f9fae5adSBarry Smith     y[5*i+1] += sum2;
810f9fae5adSBarry Smith     y[5*i+2] += sum3;
811f9fae5adSBarry Smith     y[5*i+3] += sum4;
812f9fae5adSBarry Smith     y[5*i+4] += sum5;
813f9fae5adSBarry Smith   }
814f9fae5adSBarry Smith 
815dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8163649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8171ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
818f9fae5adSBarry Smith   PetscFunctionReturn(0);
819f9fae5adSBarry Smith }
820f9fae5adSBarry Smith 
8214a2ae208SSatish Balay #undef __FUNCT__
822b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
823dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
824f9fae5adSBarry Smith {
8254cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
826f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
827d2840e09SBarry Smith   const PetscScalar *x,*v;
828d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
829dfbe8321SBarry Smith   PetscErrorCode    ierr;
830d2840e09SBarry Smith   PetscInt          n,i;
831d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
832f9fae5adSBarry Smith 
833f9fae5adSBarry Smith   PetscFunctionBegin;
834f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8353649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8361ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
837bfec09a0SHong Zhang 
838f9fae5adSBarry Smith   for (i=0; i<m; i++) {
839bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
840bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
841f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
842f9fae5adSBarry Smith     alpha1 = x[5*i];
843f9fae5adSBarry Smith     alpha2 = x[5*i+1];
844f9fae5adSBarry Smith     alpha3 = x[5*i+2];
845f9fae5adSBarry Smith     alpha4 = x[5*i+3];
846f9fae5adSBarry Smith     alpha5 = x[5*i+4];
847f9fae5adSBarry Smith     while (n-->0) {
848f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
849f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
850f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
851f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
852f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
853f9fae5adSBarry Smith       idx++; v++;
854f9fae5adSBarry Smith     }
855f9fae5adSBarry Smith   }
856dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8581ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
859f9fae5adSBarry Smith   PetscFunctionReturn(0);
860bcc973b7SBarry Smith }
861bcc973b7SBarry Smith 
8626cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8634a2ae208SSatish Balay #undef __FUNCT__
864b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
865dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8666cd98798SBarry Smith {
8676cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8686cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
869d2840e09SBarry Smith   const PetscScalar *x,*v;
870d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
871dfbe8321SBarry Smith   PetscErrorCode    ierr;
872d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
873d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
8746cd98798SBarry Smith 
8756cd98798SBarry Smith   PetscFunctionBegin;
8763649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8771ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8786cd98798SBarry Smith   idx  = a->j;
8796cd98798SBarry Smith   v    = a->a;
8806cd98798SBarry Smith   ii   = a->i;
8816cd98798SBarry Smith 
8826cd98798SBarry Smith   for (i=0; i<m; i++) {
8836cd98798SBarry Smith     jrow = ii[i];
8846cd98798SBarry Smith     n    = ii[i+1] - jrow;
8856cd98798SBarry Smith     sum1  = 0.0;
8866cd98798SBarry Smith     sum2  = 0.0;
8876cd98798SBarry Smith     sum3  = 0.0;
8886cd98798SBarry Smith     sum4  = 0.0;
8896cd98798SBarry Smith     sum5  = 0.0;
8906cd98798SBarry Smith     sum6  = 0.0;
891b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8926cd98798SBarry Smith     for (j=0; j<n; j++) {
8936cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8946cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8956cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8966cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8976cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8986cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8996cd98798SBarry Smith       jrow++;
9006cd98798SBarry Smith      }
9016cd98798SBarry Smith     y[6*i]   = sum1;
9026cd98798SBarry Smith     y[6*i+1] = sum2;
9036cd98798SBarry Smith     y[6*i+2] = sum3;
9046cd98798SBarry Smith     y[6*i+3] = sum4;
9056cd98798SBarry Smith     y[6*i+4] = sum5;
9066cd98798SBarry Smith     y[6*i+5] = sum6;
9076cd98798SBarry Smith   }
9086cd98798SBarry Smith 
909dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
9103649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9111ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9126cd98798SBarry Smith   PetscFunctionReturn(0);
9136cd98798SBarry Smith }
9146cd98798SBarry Smith 
9154a2ae208SSatish Balay #undef __FUNCT__
916b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
917dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
9186cd98798SBarry Smith {
9196cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9206cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
921d2840e09SBarry Smith   const PetscScalar *x,*v;
922d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
923dfbe8321SBarry Smith   PetscErrorCode    ierr;
924d2840e09SBarry Smith   PetscInt          n,i;
925d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9266cd98798SBarry Smith 
9276cd98798SBarry Smith   PetscFunctionBegin;
928d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
9293649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9301ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
931bfec09a0SHong Zhang 
9326cd98798SBarry Smith   for (i=0; i<m; i++) {
933bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
934bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9356cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9366cd98798SBarry Smith     alpha1 = x[6*i];
9376cd98798SBarry Smith     alpha2 = x[6*i+1];
9386cd98798SBarry Smith     alpha3 = x[6*i+2];
9396cd98798SBarry Smith     alpha4 = x[6*i+3];
9406cd98798SBarry Smith     alpha5 = x[6*i+4];
9416cd98798SBarry Smith     alpha6 = x[6*i+5];
9426cd98798SBarry Smith     while (n-->0) {
9436cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9446cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9456cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9466cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9476cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9486cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9496cd98798SBarry Smith       idx++; v++;
9506cd98798SBarry Smith     }
9516cd98798SBarry Smith   }
952dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9541ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9556cd98798SBarry Smith   PetscFunctionReturn(0);
9566cd98798SBarry Smith }
9576cd98798SBarry Smith 
9584a2ae208SSatish Balay #undef __FUNCT__
959b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
960dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9616cd98798SBarry Smith {
9626cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9636cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
964d2840e09SBarry Smith   const PetscScalar *x,*v;
965d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
966dfbe8321SBarry Smith   PetscErrorCode    ierr;
967b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
968d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9696cd98798SBarry Smith 
9706cd98798SBarry Smith   PetscFunctionBegin;
9716cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9731ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9746cd98798SBarry Smith   idx  = a->j;
9756cd98798SBarry Smith   v    = a->a;
9766cd98798SBarry Smith   ii   = a->i;
9776cd98798SBarry Smith 
9786cd98798SBarry Smith   for (i=0; i<m; i++) {
9796cd98798SBarry Smith     jrow = ii[i];
9806cd98798SBarry Smith     n    = ii[i+1] - jrow;
9816cd98798SBarry Smith     sum1  = 0.0;
9826cd98798SBarry Smith     sum2  = 0.0;
9836cd98798SBarry Smith     sum3  = 0.0;
9846cd98798SBarry Smith     sum4  = 0.0;
9856cd98798SBarry Smith     sum5  = 0.0;
9866cd98798SBarry Smith     sum6  = 0.0;
9876cd98798SBarry Smith     for (j=0; j<n; j++) {
9886cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9896cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9906cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9916cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9926cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9936cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9946cd98798SBarry Smith       jrow++;
9956cd98798SBarry Smith      }
9966cd98798SBarry Smith     y[6*i]   += sum1;
9976cd98798SBarry Smith     y[6*i+1] += sum2;
9986cd98798SBarry Smith     y[6*i+2] += sum3;
9996cd98798SBarry Smith     y[6*i+3] += sum4;
10006cd98798SBarry Smith     y[6*i+4] += sum5;
10016cd98798SBarry Smith     y[6*i+5] += sum6;
10026cd98798SBarry Smith   }
10036cd98798SBarry Smith 
1004dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10076cd98798SBarry Smith   PetscFunctionReturn(0);
10086cd98798SBarry Smith }
10096cd98798SBarry Smith 
10104a2ae208SSatish Balay #undef __FUNCT__
1011b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1012dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
10136cd98798SBarry Smith {
10146cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
10156cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1016d2840e09SBarry Smith   const PetscScalar *x,*v;
1017d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1018dfbe8321SBarry Smith   PetscErrorCode    ierr;
1019d2840e09SBarry Smith   PetscInt          n,i;
1020d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
10216cd98798SBarry Smith 
10226cd98798SBarry Smith   PetscFunctionBegin;
10236cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10243649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
10251ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1026bfec09a0SHong Zhang 
10276cd98798SBarry Smith   for (i=0; i<m; i++) {
1028bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1029bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
10306cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
10316cd98798SBarry Smith     alpha1 = x[6*i];
10326cd98798SBarry Smith     alpha2 = x[6*i+1];
10336cd98798SBarry Smith     alpha3 = x[6*i+2];
10346cd98798SBarry Smith     alpha4 = x[6*i+3];
10356cd98798SBarry Smith     alpha5 = x[6*i+4];
10366cd98798SBarry Smith     alpha6 = x[6*i+5];
10376cd98798SBarry Smith     while (n-->0) {
10386cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10396cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10406cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10416cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10426cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10436cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10446cd98798SBarry Smith       idx++; v++;
10456cd98798SBarry Smith     }
10466cd98798SBarry Smith   }
1047dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10483649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10491ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10506cd98798SBarry Smith   PetscFunctionReturn(0);
10516cd98798SBarry Smith }
10526cd98798SBarry Smith 
105366ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
105466ed3db0SBarry Smith #undef __FUNCT__
1055ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
1056ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1057ed8eea03SBarry Smith {
1058ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1059ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1060d2840e09SBarry Smith   const PetscScalar *x,*v;
1061d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1062ed8eea03SBarry Smith   PetscErrorCode    ierr;
1063d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1064d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1065ed8eea03SBarry Smith 
1066ed8eea03SBarry Smith   PetscFunctionBegin;
10673649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1068ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1069ed8eea03SBarry Smith   idx  = a->j;
1070ed8eea03SBarry Smith   v    = a->a;
1071ed8eea03SBarry Smith   ii   = a->i;
1072ed8eea03SBarry Smith 
1073ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1074ed8eea03SBarry Smith     jrow = ii[i];
1075ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1076ed8eea03SBarry Smith     sum1  = 0.0;
1077ed8eea03SBarry Smith     sum2  = 0.0;
1078ed8eea03SBarry Smith     sum3  = 0.0;
1079ed8eea03SBarry Smith     sum4  = 0.0;
1080ed8eea03SBarry Smith     sum5  = 0.0;
1081ed8eea03SBarry Smith     sum6  = 0.0;
1082ed8eea03SBarry Smith     sum7  = 0.0;
1083b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1084ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1085ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1086ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1087ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1088ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1089ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1090ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1091ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1092ed8eea03SBarry Smith       jrow++;
1093ed8eea03SBarry Smith      }
1094ed8eea03SBarry Smith     y[7*i]   = sum1;
1095ed8eea03SBarry Smith     y[7*i+1] = sum2;
1096ed8eea03SBarry Smith     y[7*i+2] = sum3;
1097ed8eea03SBarry Smith     y[7*i+3] = sum4;
1098ed8eea03SBarry Smith     y[7*i+4] = sum5;
1099ed8eea03SBarry Smith     y[7*i+5] = sum6;
1100ed8eea03SBarry Smith     y[7*i+6] = sum7;
1101ed8eea03SBarry Smith   }
1102ed8eea03SBarry Smith 
1103dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
11043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1105ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1106ed8eea03SBarry Smith   PetscFunctionReturn(0);
1107ed8eea03SBarry Smith }
1108ed8eea03SBarry Smith 
1109ed8eea03SBarry Smith #undef __FUNCT__
1110ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1111ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1112ed8eea03SBarry Smith {
1113ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1114ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1115d2840e09SBarry Smith   const PetscScalar *x,*v;
1116d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1117ed8eea03SBarry Smith   PetscErrorCode    ierr;
1118d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1119d2840e09SBarry Smith   PetscInt          n,i;
1120ed8eea03SBarry Smith 
1121ed8eea03SBarry Smith   PetscFunctionBegin;
1122d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
11233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1124ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1125ed8eea03SBarry Smith 
1126ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1127ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1128ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1129ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1130ed8eea03SBarry Smith     alpha1 = x[7*i];
1131ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1132ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1133ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1134ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1135ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1136ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1137ed8eea03SBarry Smith     while (n-->0) {
1138ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1139ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1140ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1141ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1142ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1143ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1144ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1145ed8eea03SBarry Smith       idx++; v++;
1146ed8eea03SBarry Smith     }
1147ed8eea03SBarry Smith   }
1148dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1150ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1151ed8eea03SBarry Smith   PetscFunctionReturn(0);
1152ed8eea03SBarry Smith }
1153ed8eea03SBarry Smith 
1154ed8eea03SBarry Smith #undef __FUNCT__
1155ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1156ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1157ed8eea03SBarry Smith {
1158ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1159ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1160d2840e09SBarry Smith   const PetscScalar *x,*v;
1161d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1162ed8eea03SBarry Smith   PetscErrorCode    ierr;
1163d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1164ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1165ed8eea03SBarry Smith 
1166ed8eea03SBarry Smith   PetscFunctionBegin;
1167ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11683649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1169ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1170ed8eea03SBarry Smith   idx  = a->j;
1171ed8eea03SBarry Smith   v    = a->a;
1172ed8eea03SBarry Smith   ii   = a->i;
1173ed8eea03SBarry Smith 
1174ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1175ed8eea03SBarry Smith     jrow = ii[i];
1176ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1177ed8eea03SBarry Smith     sum1  = 0.0;
1178ed8eea03SBarry Smith     sum2  = 0.0;
1179ed8eea03SBarry Smith     sum3  = 0.0;
1180ed8eea03SBarry Smith     sum4  = 0.0;
1181ed8eea03SBarry Smith     sum5  = 0.0;
1182ed8eea03SBarry Smith     sum6  = 0.0;
1183ed8eea03SBarry Smith     sum7  = 0.0;
1184ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1185ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1186ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1187ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1188ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1189ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1190ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1191ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1192ed8eea03SBarry Smith       jrow++;
1193ed8eea03SBarry Smith      }
1194ed8eea03SBarry Smith     y[7*i]   += sum1;
1195ed8eea03SBarry Smith     y[7*i+1] += sum2;
1196ed8eea03SBarry Smith     y[7*i+2] += sum3;
1197ed8eea03SBarry Smith     y[7*i+3] += sum4;
1198ed8eea03SBarry Smith     y[7*i+4] += sum5;
1199ed8eea03SBarry Smith     y[7*i+5] += sum6;
1200ed8eea03SBarry Smith     y[7*i+6] += sum7;
1201ed8eea03SBarry Smith   }
1202ed8eea03SBarry Smith 
1203dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1205ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1206ed8eea03SBarry Smith   PetscFunctionReturn(0);
1207ed8eea03SBarry Smith }
1208ed8eea03SBarry Smith 
1209ed8eea03SBarry Smith #undef __FUNCT__
1210ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1211ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1212ed8eea03SBarry Smith {
1213ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1214ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1215d2840e09SBarry Smith   const PetscScalar *x,*v;
1216d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1217ed8eea03SBarry Smith   PetscErrorCode    ierr;
1218d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1219d2840e09SBarry Smith   PetscInt          n,i;
1220ed8eea03SBarry Smith 
1221ed8eea03SBarry Smith   PetscFunctionBegin;
1222ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1224ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1225ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1226ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1227ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1228ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1229ed8eea03SBarry Smith     alpha1 = x[7*i];
1230ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1231ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1232ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1233ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1234ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1235ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1236ed8eea03SBarry Smith     while (n-->0) {
1237ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1238ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1239ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1240ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1241ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1242ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1243ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1244ed8eea03SBarry Smith       idx++; v++;
1245ed8eea03SBarry Smith     }
1246ed8eea03SBarry Smith   }
1247dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12483649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1249ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1250ed8eea03SBarry Smith   PetscFunctionReturn(0);
1251ed8eea03SBarry Smith }
1252ed8eea03SBarry Smith 
1253ed8eea03SBarry Smith #undef __FUNCT__
125466ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1255dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
125666ed3db0SBarry Smith {
125766ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
125866ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1259d2840e09SBarry Smith   const PetscScalar *x,*v;
1260d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1261dfbe8321SBarry Smith   PetscErrorCode    ierr;
1262d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1263d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
126466ed3db0SBarry Smith 
126566ed3db0SBarry Smith   PetscFunctionBegin;
12663649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12671ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
126866ed3db0SBarry Smith   idx  = a->j;
126966ed3db0SBarry Smith   v    = a->a;
127066ed3db0SBarry Smith   ii   = a->i;
127166ed3db0SBarry Smith 
127266ed3db0SBarry Smith   for (i=0; i<m; i++) {
127366ed3db0SBarry Smith     jrow = ii[i];
127466ed3db0SBarry Smith     n    = ii[i+1] - jrow;
127566ed3db0SBarry Smith     sum1  = 0.0;
127666ed3db0SBarry Smith     sum2  = 0.0;
127766ed3db0SBarry Smith     sum3  = 0.0;
127866ed3db0SBarry Smith     sum4  = 0.0;
127966ed3db0SBarry Smith     sum5  = 0.0;
128066ed3db0SBarry Smith     sum6  = 0.0;
128166ed3db0SBarry Smith     sum7  = 0.0;
128266ed3db0SBarry Smith     sum8  = 0.0;
1283b3c51c66SMatthew Knepley     nonzerorow += (n>0);
128466ed3db0SBarry Smith     for (j=0; j<n; j++) {
128566ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
128666ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
128766ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
128866ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
128966ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
129066ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
129166ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
129266ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
129366ed3db0SBarry Smith       jrow++;
129466ed3db0SBarry Smith      }
129566ed3db0SBarry Smith     y[8*i]   = sum1;
129666ed3db0SBarry Smith     y[8*i+1] = sum2;
129766ed3db0SBarry Smith     y[8*i+2] = sum3;
129866ed3db0SBarry Smith     y[8*i+3] = sum4;
129966ed3db0SBarry Smith     y[8*i+4] = sum5;
130066ed3db0SBarry Smith     y[8*i+5] = sum6;
130166ed3db0SBarry Smith     y[8*i+6] = sum7;
130266ed3db0SBarry Smith     y[8*i+7] = sum8;
130366ed3db0SBarry Smith   }
130466ed3db0SBarry Smith 
1305dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
13063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13071ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130866ed3db0SBarry Smith   PetscFunctionReturn(0);
130966ed3db0SBarry Smith }
131066ed3db0SBarry Smith 
131166ed3db0SBarry Smith #undef __FUNCT__
131266ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1313dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
131466ed3db0SBarry Smith {
131566ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131666ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1317d2840e09SBarry Smith   const PetscScalar *x,*v;
1318d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1319dfbe8321SBarry Smith   PetscErrorCode    ierr;
1320d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1321d2840e09SBarry Smith   PetscInt          n,i;
132266ed3db0SBarry Smith 
132366ed3db0SBarry Smith   PetscFunctionBegin;
1324d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
13253649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13261ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1327bfec09a0SHong Zhang 
132866ed3db0SBarry Smith   for (i=0; i<m; i++) {
1329bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1330bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
133166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
133266ed3db0SBarry Smith     alpha1 = x[8*i];
133366ed3db0SBarry Smith     alpha2 = x[8*i+1];
133466ed3db0SBarry Smith     alpha3 = x[8*i+2];
133566ed3db0SBarry Smith     alpha4 = x[8*i+3];
133666ed3db0SBarry Smith     alpha5 = x[8*i+4];
133766ed3db0SBarry Smith     alpha6 = x[8*i+5];
133866ed3db0SBarry Smith     alpha7 = x[8*i+6];
133966ed3db0SBarry Smith     alpha8 = x[8*i+7];
134066ed3db0SBarry Smith     while (n-->0) {
134166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
134266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
134366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
134466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
134566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
134666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
134766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
134866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
134966ed3db0SBarry Smith       idx++; v++;
135066ed3db0SBarry Smith     }
135166ed3db0SBarry Smith   }
1352dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13541ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
135566ed3db0SBarry Smith   PetscFunctionReturn(0);
135666ed3db0SBarry Smith }
135766ed3db0SBarry Smith 
135866ed3db0SBarry Smith #undef __FUNCT__
135966ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1360dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
136166ed3db0SBarry Smith {
136266ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136366ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1364d2840e09SBarry Smith   const PetscScalar *x,*v;
1365d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1366dfbe8321SBarry Smith   PetscErrorCode    ierr;
1367d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1368b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
136966ed3db0SBarry Smith 
137066ed3db0SBarry Smith   PetscFunctionBegin;
137166ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13731ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137466ed3db0SBarry Smith   idx  = a->j;
137566ed3db0SBarry Smith   v    = a->a;
137666ed3db0SBarry Smith   ii   = a->i;
137766ed3db0SBarry Smith 
137866ed3db0SBarry Smith   for (i=0; i<m; i++) {
137966ed3db0SBarry Smith     jrow = ii[i];
138066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
138166ed3db0SBarry Smith     sum1  = 0.0;
138266ed3db0SBarry Smith     sum2  = 0.0;
138366ed3db0SBarry Smith     sum3  = 0.0;
138466ed3db0SBarry Smith     sum4  = 0.0;
138566ed3db0SBarry Smith     sum5  = 0.0;
138666ed3db0SBarry Smith     sum6  = 0.0;
138766ed3db0SBarry Smith     sum7  = 0.0;
138866ed3db0SBarry Smith     sum8  = 0.0;
138966ed3db0SBarry Smith     for (j=0; j<n; j++) {
139066ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
139166ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
139266ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
139366ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
139466ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
139566ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
139666ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
139766ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
139866ed3db0SBarry Smith       jrow++;
139966ed3db0SBarry Smith      }
140066ed3db0SBarry Smith     y[8*i]   += sum1;
140166ed3db0SBarry Smith     y[8*i+1] += sum2;
140266ed3db0SBarry Smith     y[8*i+2] += sum3;
140366ed3db0SBarry Smith     y[8*i+3] += sum4;
140466ed3db0SBarry Smith     y[8*i+4] += sum5;
140566ed3db0SBarry Smith     y[8*i+5] += sum6;
140666ed3db0SBarry Smith     y[8*i+6] += sum7;
140766ed3db0SBarry Smith     y[8*i+7] += sum8;
140866ed3db0SBarry Smith   }
140966ed3db0SBarry Smith 
1410dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14113649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14121ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
141366ed3db0SBarry Smith   PetscFunctionReturn(0);
141466ed3db0SBarry Smith }
141566ed3db0SBarry Smith 
141666ed3db0SBarry Smith #undef __FUNCT__
141766ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1418dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
141966ed3db0SBarry Smith {
142066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
142166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1422d2840e09SBarry Smith   const PetscScalar *x,*v;
1423d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1424dfbe8321SBarry Smith   PetscErrorCode    ierr;
1425d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1426d2840e09SBarry Smith   PetscInt          n,i;
142766ed3db0SBarry Smith 
142866ed3db0SBarry Smith   PetscFunctionBegin;
142966ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14303649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14311ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
143266ed3db0SBarry Smith   for (i=0; i<m; i++) {
1433bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1434bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
143566ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
143666ed3db0SBarry Smith     alpha1 = x[8*i];
143766ed3db0SBarry Smith     alpha2 = x[8*i+1];
143866ed3db0SBarry Smith     alpha3 = x[8*i+2];
143966ed3db0SBarry Smith     alpha4 = x[8*i+3];
144066ed3db0SBarry Smith     alpha5 = x[8*i+4];
144166ed3db0SBarry Smith     alpha6 = x[8*i+5];
144266ed3db0SBarry Smith     alpha7 = x[8*i+6];
144366ed3db0SBarry Smith     alpha8 = x[8*i+7];
144466ed3db0SBarry Smith     while (n-->0) {
144566ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
144666ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
144766ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
144866ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
144966ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
145066ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
145166ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
145266ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
145366ed3db0SBarry Smith       idx++; v++;
145466ed3db0SBarry Smith     }
145566ed3db0SBarry Smith   }
1456dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14581ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14592f7816d4SBarry Smith   PetscFunctionReturn(0);
14602f7816d4SBarry Smith }
14612f7816d4SBarry Smith 
14622b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
14632b692628SMatthew Knepley #undef __FUNCT__
14642b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1465dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14662b692628SMatthew Knepley {
14672b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14682b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1469d2840e09SBarry Smith   const PetscScalar *x,*v;
1470d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1471dfbe8321SBarry Smith   PetscErrorCode    ierr;
1472d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1473d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14742b692628SMatthew Knepley 
14752b692628SMatthew Knepley   PetscFunctionBegin;
14763649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14771ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14782b692628SMatthew Knepley   idx  = a->j;
14792b692628SMatthew Knepley   v    = a->a;
14802b692628SMatthew Knepley   ii   = a->i;
14812b692628SMatthew Knepley 
14822b692628SMatthew Knepley   for (i=0; i<m; i++) {
14832b692628SMatthew Knepley     jrow = ii[i];
14842b692628SMatthew Knepley     n    = ii[i+1] - jrow;
14852b692628SMatthew Knepley     sum1  = 0.0;
14862b692628SMatthew Knepley     sum2  = 0.0;
14872b692628SMatthew Knepley     sum3  = 0.0;
14882b692628SMatthew Knepley     sum4  = 0.0;
14892b692628SMatthew Knepley     sum5  = 0.0;
14902b692628SMatthew Knepley     sum6  = 0.0;
14912b692628SMatthew Knepley     sum7  = 0.0;
14922b692628SMatthew Knepley     sum8  = 0.0;
14932b692628SMatthew Knepley     sum9  = 0.0;
1494b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14952b692628SMatthew Knepley     for (j=0; j<n; j++) {
14962b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14972b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14982b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14992b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15002b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15012b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15022b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15032b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15042b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15052b692628SMatthew Knepley       jrow++;
15062b692628SMatthew Knepley      }
15072b692628SMatthew Knepley     y[9*i]   = sum1;
15082b692628SMatthew Knepley     y[9*i+1] = sum2;
15092b692628SMatthew Knepley     y[9*i+2] = sum3;
15102b692628SMatthew Knepley     y[9*i+3] = sum4;
15112b692628SMatthew Knepley     y[9*i+4] = sum5;
15122b692628SMatthew Knepley     y[9*i+5] = sum6;
15132b692628SMatthew Knepley     y[9*i+6] = sum7;
15142b692628SMatthew Knepley     y[9*i+7] = sum8;
15152b692628SMatthew Knepley     y[9*i+8] = sum9;
15162b692628SMatthew Knepley   }
15172b692628SMatthew Knepley 
1518dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
15193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15201ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15212b692628SMatthew Knepley   PetscFunctionReturn(0);
15222b692628SMatthew Knepley }
15232b692628SMatthew Knepley 
1524b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1525b9cda22cSBarry Smith 
15262b692628SMatthew Knepley #undef __FUNCT__
15272b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1528dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
15292b692628SMatthew Knepley {
15302b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15312b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1532d2840e09SBarry Smith   const PetscScalar *x,*v;
1533d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1534dfbe8321SBarry Smith   PetscErrorCode    ierr;
1535d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1536d2840e09SBarry Smith   PetscInt          n,i;
15372b692628SMatthew Knepley 
15382b692628SMatthew Knepley   PetscFunctionBegin;
1539d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
15403649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15411ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15422b692628SMatthew Knepley 
15432b692628SMatthew Knepley   for (i=0; i<m; i++) {
15442b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15452b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15462b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15472b692628SMatthew Knepley     alpha1 = x[9*i];
15482b692628SMatthew Knepley     alpha2 = x[9*i+1];
15492b692628SMatthew Knepley     alpha3 = x[9*i+2];
15502b692628SMatthew Knepley     alpha4 = x[9*i+3];
15512b692628SMatthew Knepley     alpha5 = x[9*i+4];
15522b692628SMatthew Knepley     alpha6 = x[9*i+5];
15532b692628SMatthew Knepley     alpha7 = x[9*i+6];
15542b692628SMatthew Knepley     alpha8 = x[9*i+7];
15552f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15562b692628SMatthew Knepley     while (n-->0) {
15572b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15582b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15592b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15602b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15612b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15622b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15632b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15642b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15652b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15662b692628SMatthew Knepley       idx++; v++;
15672b692628SMatthew Knepley     }
15682b692628SMatthew Knepley   }
1569dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15711ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15722b692628SMatthew Knepley   PetscFunctionReturn(0);
15732b692628SMatthew Knepley }
15742b692628SMatthew Knepley 
15752b692628SMatthew Knepley #undef __FUNCT__
15762b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1577dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15782b692628SMatthew Knepley {
15792b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15802b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1581d2840e09SBarry Smith   const PetscScalar *x,*v;
1582d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1583dfbe8321SBarry Smith   PetscErrorCode    ierr;
1584d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1585b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15862b692628SMatthew Knepley 
15872b692628SMatthew Knepley   PetscFunctionBegin;
15882b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15893649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15912b692628SMatthew Knepley   idx  = a->j;
15922b692628SMatthew Knepley   v    = a->a;
15932b692628SMatthew Knepley   ii   = a->i;
15942b692628SMatthew Knepley 
15952b692628SMatthew Knepley   for (i=0; i<m; i++) {
15962b692628SMatthew Knepley     jrow = ii[i];
15972b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15982b692628SMatthew Knepley     sum1  = 0.0;
15992b692628SMatthew Knepley     sum2  = 0.0;
16002b692628SMatthew Knepley     sum3  = 0.0;
16012b692628SMatthew Knepley     sum4  = 0.0;
16022b692628SMatthew Knepley     sum5  = 0.0;
16032b692628SMatthew Knepley     sum6  = 0.0;
16042b692628SMatthew Knepley     sum7  = 0.0;
16052b692628SMatthew Knepley     sum8  = 0.0;
16062b692628SMatthew Knepley     sum9  = 0.0;
16072b692628SMatthew Knepley     for (j=0; j<n; j++) {
16082b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
16092b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
16102b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
16112b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
16122b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
16132b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
16142b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
16152b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
16162b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
16172b692628SMatthew Knepley       jrow++;
16182b692628SMatthew Knepley      }
16192b692628SMatthew Knepley     y[9*i]   += sum1;
16202b692628SMatthew Knepley     y[9*i+1] += sum2;
16212b692628SMatthew Knepley     y[9*i+2] += sum3;
16222b692628SMatthew Knepley     y[9*i+3] += sum4;
16232b692628SMatthew Knepley     y[9*i+4] += sum5;
16242b692628SMatthew Knepley     y[9*i+5] += sum6;
16252b692628SMatthew Knepley     y[9*i+6] += sum7;
16262b692628SMatthew Knepley     y[9*i+7] += sum8;
16272b692628SMatthew Knepley     y[9*i+8] += sum9;
16282b692628SMatthew Knepley   }
16292b692628SMatthew Knepley 
1630efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
16313649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16321ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16332b692628SMatthew Knepley   PetscFunctionReturn(0);
16342b692628SMatthew Knepley }
16352b692628SMatthew Knepley 
16362b692628SMatthew Knepley #undef __FUNCT__
16372b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1638dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
16392b692628SMatthew Knepley {
16402b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
16412b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1642d2840e09SBarry Smith   const PetscScalar *x,*v;
1643d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1644dfbe8321SBarry Smith   PetscErrorCode    ierr;
1645d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1646d2840e09SBarry Smith   PetscInt          n,i;
16472b692628SMatthew Knepley 
16482b692628SMatthew Knepley   PetscFunctionBegin;
16492b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16503649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
16511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16522b692628SMatthew Knepley   for (i=0; i<m; i++) {
16532b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
16542b692628SMatthew Knepley     v      = a->a + a->i[i] ;
16552b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
16562b692628SMatthew Knepley     alpha1 = x[9*i];
16572b692628SMatthew Knepley     alpha2 = x[9*i+1];
16582b692628SMatthew Knepley     alpha3 = x[9*i+2];
16592b692628SMatthew Knepley     alpha4 = x[9*i+3];
16602b692628SMatthew Knepley     alpha5 = x[9*i+4];
16612b692628SMatthew Knepley     alpha6 = x[9*i+5];
16622b692628SMatthew Knepley     alpha7 = x[9*i+6];
16632b692628SMatthew Knepley     alpha8 = x[9*i+7];
16642b692628SMatthew Knepley     alpha9 = x[9*i+8];
16652b692628SMatthew Knepley     while (n-->0) {
16662b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16672b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16682b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16692b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16702b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16712b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16722b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16732b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16742b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16752b692628SMatthew Knepley       idx++; v++;
16762b692628SMatthew Knepley     }
16772b692628SMatthew Knepley   }
1678dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16793649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16801ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16812b692628SMatthew Knepley   PetscFunctionReturn(0);
16822b692628SMatthew Knepley }
1683b9cda22cSBarry Smith #undef __FUNCT__
1684b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1685b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1686b9cda22cSBarry Smith {
1687b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1688b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1689d2840e09SBarry Smith   const PetscScalar *x,*v;
1690d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1691b9cda22cSBarry Smith   PetscErrorCode    ierr;
1692d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1693d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1694b9cda22cSBarry Smith 
1695b9cda22cSBarry Smith   PetscFunctionBegin;
16963649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1697b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1698b9cda22cSBarry Smith   idx  = a->j;
1699b9cda22cSBarry Smith   v    = a->a;
1700b9cda22cSBarry Smith   ii   = a->i;
1701b9cda22cSBarry Smith 
1702b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1703b9cda22cSBarry Smith     jrow = ii[i];
1704b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1705b9cda22cSBarry Smith     sum1  = 0.0;
1706b9cda22cSBarry Smith     sum2  = 0.0;
1707b9cda22cSBarry Smith     sum3  = 0.0;
1708b9cda22cSBarry Smith     sum4  = 0.0;
1709b9cda22cSBarry Smith     sum5  = 0.0;
1710b9cda22cSBarry Smith     sum6  = 0.0;
1711b9cda22cSBarry Smith     sum7  = 0.0;
1712b9cda22cSBarry Smith     sum8  = 0.0;
1713b9cda22cSBarry Smith     sum9  = 0.0;
1714b9cda22cSBarry Smith     sum10 = 0.0;
1715b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1716b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1717b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1718b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1719b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1720b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1721b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1722b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1723b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1724b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1725b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1726b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1727b9cda22cSBarry Smith       jrow++;
1728b9cda22cSBarry Smith      }
1729b9cda22cSBarry Smith     y[10*i]   = sum1;
1730b9cda22cSBarry Smith     y[10*i+1] = sum2;
1731b9cda22cSBarry Smith     y[10*i+2] = sum3;
1732b9cda22cSBarry Smith     y[10*i+3] = sum4;
1733b9cda22cSBarry Smith     y[10*i+4] = sum5;
1734b9cda22cSBarry Smith     y[10*i+5] = sum6;
1735b9cda22cSBarry Smith     y[10*i+6] = sum7;
1736b9cda22cSBarry Smith     y[10*i+7] = sum8;
1737b9cda22cSBarry Smith     y[10*i+8] = sum9;
1738b9cda22cSBarry Smith     y[10*i+9] = sum10;
1739b9cda22cSBarry Smith   }
1740b9cda22cSBarry Smith 
1741dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
17423649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1743b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1744b9cda22cSBarry Smith   PetscFunctionReturn(0);
1745b9cda22cSBarry Smith }
1746b9cda22cSBarry Smith 
1747b9cda22cSBarry Smith #undef __FUNCT__
1748dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1749b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1750b9cda22cSBarry Smith {
1751b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1752b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1753d2840e09SBarry Smith   const PetscScalar *x,*v;
1754d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1755b9cda22cSBarry Smith   PetscErrorCode    ierr;
1756d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1757b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1758b9cda22cSBarry Smith 
1759b9cda22cSBarry Smith   PetscFunctionBegin;
1760b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
17613649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1762b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1763b9cda22cSBarry Smith   idx  = a->j;
1764b9cda22cSBarry Smith   v    = a->a;
1765b9cda22cSBarry Smith   ii   = a->i;
1766b9cda22cSBarry Smith 
1767b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1768b9cda22cSBarry Smith     jrow = ii[i];
1769b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1770b9cda22cSBarry Smith     sum1  = 0.0;
1771b9cda22cSBarry Smith     sum2  = 0.0;
1772b9cda22cSBarry Smith     sum3  = 0.0;
1773b9cda22cSBarry Smith     sum4  = 0.0;
1774b9cda22cSBarry Smith     sum5  = 0.0;
1775b9cda22cSBarry Smith     sum6  = 0.0;
1776b9cda22cSBarry Smith     sum7  = 0.0;
1777b9cda22cSBarry Smith     sum8  = 0.0;
1778b9cda22cSBarry Smith     sum9  = 0.0;
1779b9cda22cSBarry Smith     sum10 = 0.0;
1780b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1781b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1782b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1783b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1784b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1785b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1786b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1787b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1788b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1789b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1790b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1791b9cda22cSBarry Smith       jrow++;
1792b9cda22cSBarry Smith      }
1793b9cda22cSBarry Smith     y[10*i]   += sum1;
1794b9cda22cSBarry Smith     y[10*i+1] += sum2;
1795b9cda22cSBarry Smith     y[10*i+2] += sum3;
1796b9cda22cSBarry Smith     y[10*i+3] += sum4;
1797b9cda22cSBarry Smith     y[10*i+4] += sum5;
1798b9cda22cSBarry Smith     y[10*i+5] += sum6;
1799b9cda22cSBarry Smith     y[10*i+6] += sum7;
1800b9cda22cSBarry Smith     y[10*i+7] += sum8;
1801b9cda22cSBarry Smith     y[10*i+8] += sum9;
1802b9cda22cSBarry Smith     y[10*i+9] += sum10;
1803b9cda22cSBarry Smith   }
1804b9cda22cSBarry Smith 
1805dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1807b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1808b9cda22cSBarry Smith   PetscFunctionReturn(0);
1809b9cda22cSBarry Smith }
1810b9cda22cSBarry Smith 
1811b9cda22cSBarry Smith #undef __FUNCT__
1812b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1813b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1814b9cda22cSBarry Smith {
1815b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1816b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1817d2840e09SBarry Smith   const PetscScalar *x,*v;
1818d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1819b9cda22cSBarry Smith   PetscErrorCode    ierr;
1820d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1821d2840e09SBarry Smith   PetscInt          n,i;
1822b9cda22cSBarry Smith 
1823b9cda22cSBarry Smith   PetscFunctionBegin;
1824d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
18253649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1826b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1827b9cda22cSBarry Smith 
1828b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1829b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1830b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1831b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1832e29fdc22SBarry Smith     alpha1 = x[10*i];
1833e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1834e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1835e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1836e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1837e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1838e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1839e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1840e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1841b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1842b9cda22cSBarry Smith     while (n-->0) {
1843e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1844e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1845e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1846e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1847e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1848e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1849e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1850e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1851e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1852b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1853b9cda22cSBarry Smith       idx++; v++;
1854b9cda22cSBarry Smith     }
1855b9cda22cSBarry Smith   }
1856dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1858b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1859b9cda22cSBarry Smith   PetscFunctionReturn(0);
1860b9cda22cSBarry Smith }
1861b9cda22cSBarry Smith 
1862b9cda22cSBarry Smith #undef __FUNCT__
1863b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1864b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1865b9cda22cSBarry Smith {
1866b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1867b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1868d2840e09SBarry Smith   const PetscScalar *x,*v;
1869d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1870b9cda22cSBarry Smith   PetscErrorCode    ierr;
1871d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1872d2840e09SBarry Smith   PetscInt          n,i;
1873b9cda22cSBarry Smith 
1874b9cda22cSBarry Smith   PetscFunctionBegin;
1875b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18763649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1877b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1878b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1879b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1880b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1881b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1882b9cda22cSBarry Smith     alpha1 = x[10*i];
1883b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1884b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1885b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1886b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1887b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1888b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1889b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1890b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1891b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1892b9cda22cSBarry Smith     while (n-->0) {
1893b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1894b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1895b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1896b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1897b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1898b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1899b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1900b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1901b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1902b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1903b9cda22cSBarry Smith       idx++; v++;
1904b9cda22cSBarry Smith     }
1905b9cda22cSBarry Smith   }
1906dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
19073649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1908b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1909b9cda22cSBarry Smith   PetscFunctionReturn(0);
1910b9cda22cSBarry Smith }
1911b9cda22cSBarry Smith 
19122b692628SMatthew Knepley 
19132f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
19142f7816d4SBarry Smith #undef __FUNCT__
1915dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11"
1916dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1917dbdd7285SBarry Smith {
1918dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1919dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1920d2840e09SBarry Smith   const PetscScalar *x,*v;
1921d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1922dbdd7285SBarry Smith   PetscErrorCode    ierr;
1923d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1924d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1925dbdd7285SBarry Smith 
1926dbdd7285SBarry Smith   PetscFunctionBegin;
19273649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1928dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1929dbdd7285SBarry Smith   idx  = a->j;
1930dbdd7285SBarry Smith   v    = a->a;
1931dbdd7285SBarry Smith   ii   = a->i;
1932dbdd7285SBarry Smith 
1933dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1934dbdd7285SBarry Smith     jrow = ii[i];
1935dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
1936dbdd7285SBarry Smith     sum1  = 0.0;
1937dbdd7285SBarry Smith     sum2  = 0.0;
1938dbdd7285SBarry Smith     sum3  = 0.0;
1939dbdd7285SBarry Smith     sum4  = 0.0;
1940dbdd7285SBarry Smith     sum5  = 0.0;
1941dbdd7285SBarry Smith     sum6  = 0.0;
1942dbdd7285SBarry Smith     sum7  = 0.0;
1943dbdd7285SBarry Smith     sum8  = 0.0;
1944dbdd7285SBarry Smith     sum9  = 0.0;
1945dbdd7285SBarry Smith     sum10 = 0.0;
1946dbdd7285SBarry Smith     sum11 = 0.0;
1947dbdd7285SBarry Smith     nonzerorow += (n>0);
1948dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1949dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1950dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1951dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1952dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1953dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1954dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1955dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1956dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1957dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1958dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1959dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1960dbdd7285SBarry Smith       jrow++;
1961dbdd7285SBarry Smith      }
1962dbdd7285SBarry Smith     y[11*i]   = sum1;
1963dbdd7285SBarry Smith     y[11*i+1] = sum2;
1964dbdd7285SBarry Smith     y[11*i+2] = sum3;
1965dbdd7285SBarry Smith     y[11*i+3] = sum4;
1966dbdd7285SBarry Smith     y[11*i+4] = sum5;
1967dbdd7285SBarry Smith     y[11*i+5] = sum6;
1968dbdd7285SBarry Smith     y[11*i+6] = sum7;
1969dbdd7285SBarry Smith     y[11*i+7] = sum8;
1970dbdd7285SBarry Smith     y[11*i+8] = sum9;
1971dbdd7285SBarry Smith     y[11*i+9] = sum10;
1972dbdd7285SBarry Smith     y[11*i+10] = sum11;
1973dbdd7285SBarry Smith   }
1974dbdd7285SBarry Smith 
1975dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19763649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1977dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1978dbdd7285SBarry Smith   PetscFunctionReturn(0);
1979dbdd7285SBarry Smith }
1980dbdd7285SBarry Smith 
1981dbdd7285SBarry Smith #undef __FUNCT__
1982dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1983dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1984dbdd7285SBarry Smith {
1985dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1986dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1987d2840e09SBarry Smith   const PetscScalar *x,*v;
1988d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1989dbdd7285SBarry Smith   PetscErrorCode    ierr;
1990d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1991dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1992dbdd7285SBarry Smith 
1993dbdd7285SBarry Smith   PetscFunctionBegin;
1994dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19953649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1996dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1997dbdd7285SBarry Smith   idx  = a->j;
1998dbdd7285SBarry Smith   v    = a->a;
1999dbdd7285SBarry Smith   ii   = a->i;
2000dbdd7285SBarry Smith 
2001dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2002dbdd7285SBarry Smith     jrow = ii[i];
2003dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
2004dbdd7285SBarry Smith     sum1  = 0.0;
2005dbdd7285SBarry Smith     sum2  = 0.0;
2006dbdd7285SBarry Smith     sum3  = 0.0;
2007dbdd7285SBarry Smith     sum4  = 0.0;
2008dbdd7285SBarry Smith     sum5  = 0.0;
2009dbdd7285SBarry Smith     sum6  = 0.0;
2010dbdd7285SBarry Smith     sum7  = 0.0;
2011dbdd7285SBarry Smith     sum8  = 0.0;
2012dbdd7285SBarry Smith     sum9  = 0.0;
2013dbdd7285SBarry Smith     sum10 = 0.0;
2014dbdd7285SBarry Smith     sum11 = 0.0;
2015dbdd7285SBarry Smith     for (j=0; j<n; j++) {
2016dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
2017dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
2018dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
2019dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
2020dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
2021dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
2022dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
2023dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
2024dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
2025dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
2026dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
2027dbdd7285SBarry Smith       jrow++;
2028dbdd7285SBarry Smith      }
2029dbdd7285SBarry Smith     y[11*i]   += sum1;
2030dbdd7285SBarry Smith     y[11*i+1] += sum2;
2031dbdd7285SBarry Smith     y[11*i+2] += sum3;
2032dbdd7285SBarry Smith     y[11*i+3] += sum4;
2033dbdd7285SBarry Smith     y[11*i+4] += sum5;
2034dbdd7285SBarry Smith     y[11*i+5] += sum6;
2035dbdd7285SBarry Smith     y[11*i+6] += sum7;
2036dbdd7285SBarry Smith     y[11*i+7] += sum8;
2037dbdd7285SBarry Smith     y[11*i+8] += sum9;
2038dbdd7285SBarry Smith     y[11*i+9] += sum10;
2039dbdd7285SBarry Smith     y[11*i+10] += sum11;
2040dbdd7285SBarry Smith   }
2041dbdd7285SBarry Smith 
2042dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20433649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2044dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2045dbdd7285SBarry Smith   PetscFunctionReturn(0);
2046dbdd7285SBarry Smith }
2047dbdd7285SBarry Smith 
2048dbdd7285SBarry Smith #undef __FUNCT__
2049dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2050dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2051dbdd7285SBarry Smith {
2052dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2053dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2054d2840e09SBarry Smith   const PetscScalar *x,*v;
2055d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2056dbdd7285SBarry Smith   PetscErrorCode    ierr;
2057d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2058d2840e09SBarry Smith   PetscInt          n,i;
2059dbdd7285SBarry Smith 
2060dbdd7285SBarry Smith   PetscFunctionBegin;
2061d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
20623649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2063dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2064dbdd7285SBarry Smith 
2065dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2066dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2067dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2068dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2069dbdd7285SBarry Smith     alpha1 = x[11*i];
2070dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2071dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2072dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2073dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2074dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2075dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2076dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2077dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2078dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2079dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2080dbdd7285SBarry Smith     while (n-->0) {
2081dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2082dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2083dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2084dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2085dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2086dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2087dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2088dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2089dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2090dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2091dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2092dbdd7285SBarry Smith       idx++; v++;
2093dbdd7285SBarry Smith     }
2094dbdd7285SBarry Smith   }
2095dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20963649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2097dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2098dbdd7285SBarry Smith   PetscFunctionReturn(0);
2099dbdd7285SBarry Smith }
2100dbdd7285SBarry Smith 
2101dbdd7285SBarry Smith #undef __FUNCT__
2102dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2103dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2104dbdd7285SBarry Smith {
2105dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2106dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2107d2840e09SBarry Smith   const PetscScalar *x,*v;
2108d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2109dbdd7285SBarry Smith   PetscErrorCode    ierr;
2110d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2111d2840e09SBarry Smith   PetscInt          n,i;
2112dbdd7285SBarry Smith 
2113dbdd7285SBarry Smith   PetscFunctionBegin;
2114dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
21153649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2116dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2117dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2118dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2119dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2120dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2121dbdd7285SBarry Smith     alpha1 = x[11*i];
2122dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2123dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2124dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2125dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2126dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2127dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2128dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2129dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2130dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2131dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2132dbdd7285SBarry Smith     while (n-->0) {
2133dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2134dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2135dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2136dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2137dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2138dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2139dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2140dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2141dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2142dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2143dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2144dbdd7285SBarry Smith       idx++; v++;
2145dbdd7285SBarry Smith     }
2146dbdd7285SBarry Smith   }
2147dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
21483649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2149dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2150dbdd7285SBarry Smith   PetscFunctionReturn(0);
2151dbdd7285SBarry Smith }
2152dbdd7285SBarry Smith 
2153dbdd7285SBarry Smith 
2154dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2155dbdd7285SBarry Smith #undef __FUNCT__
21562f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
2157dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21582f7816d4SBarry Smith {
21592f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21602f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2161d2840e09SBarry Smith   const PetscScalar *x,*v;
2162d2840e09SBarry Smith   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21632f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2164dfbe8321SBarry Smith   PetscErrorCode     ierr;
2165d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2166d2840e09SBarry Smith   PetscInt           nonzerorow=0,n,i,jrow,j;
21672f7816d4SBarry Smith 
21682f7816d4SBarry Smith   PetscFunctionBegin;
21693649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21701ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
21712f7816d4SBarry Smith   idx  = a->j;
21722f7816d4SBarry Smith   v    = a->a;
21732f7816d4SBarry Smith   ii   = a->i;
21742f7816d4SBarry Smith 
21752f7816d4SBarry Smith   for (i=0; i<m; i++) {
21762f7816d4SBarry Smith     jrow = ii[i];
21772f7816d4SBarry Smith     n    = ii[i+1] - jrow;
21782f7816d4SBarry Smith     sum1  = 0.0;
21792f7816d4SBarry Smith     sum2  = 0.0;
21802f7816d4SBarry Smith     sum3  = 0.0;
21812f7816d4SBarry Smith     sum4  = 0.0;
21822f7816d4SBarry Smith     sum5  = 0.0;
21832f7816d4SBarry Smith     sum6  = 0.0;
21842f7816d4SBarry Smith     sum7  = 0.0;
21852f7816d4SBarry Smith     sum8  = 0.0;
21862f7816d4SBarry Smith     sum9  = 0.0;
21872f7816d4SBarry Smith     sum10 = 0.0;
21882f7816d4SBarry Smith     sum11 = 0.0;
21892f7816d4SBarry Smith     sum12 = 0.0;
21902f7816d4SBarry Smith     sum13 = 0.0;
21912f7816d4SBarry Smith     sum14 = 0.0;
21922f7816d4SBarry Smith     sum15 = 0.0;
21932f7816d4SBarry Smith     sum16 = 0.0;
2194b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21952f7816d4SBarry Smith     for (j=0; j<n; j++) {
21962f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21972f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21982f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21992f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22002f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22012f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22022f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22032f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22042f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22052f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22062f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22072f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22082f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22092f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22102f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22112f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22122f7816d4SBarry Smith       jrow++;
22132f7816d4SBarry Smith      }
22142f7816d4SBarry Smith     y[16*i]    = sum1;
22152f7816d4SBarry Smith     y[16*i+1]  = sum2;
22162f7816d4SBarry Smith     y[16*i+2]  = sum3;
22172f7816d4SBarry Smith     y[16*i+3]  = sum4;
22182f7816d4SBarry Smith     y[16*i+4]  = sum5;
22192f7816d4SBarry Smith     y[16*i+5]  = sum6;
22202f7816d4SBarry Smith     y[16*i+6]  = sum7;
22212f7816d4SBarry Smith     y[16*i+7]  = sum8;
22222f7816d4SBarry Smith     y[16*i+8]  = sum9;
22232f7816d4SBarry Smith     y[16*i+9]  = sum10;
22242f7816d4SBarry Smith     y[16*i+10] = sum11;
22252f7816d4SBarry Smith     y[16*i+11] = sum12;
22262f7816d4SBarry Smith     y[16*i+12] = sum13;
22272f7816d4SBarry Smith     y[16*i+13] = sum14;
22282f7816d4SBarry Smith     y[16*i+14] = sum15;
22292f7816d4SBarry Smith     y[16*i+15] = sum16;
22302f7816d4SBarry Smith   }
22312f7816d4SBarry Smith 
2232dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
22333649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22341ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22352f7816d4SBarry Smith   PetscFunctionReturn(0);
22362f7816d4SBarry Smith }
22372f7816d4SBarry Smith 
22382f7816d4SBarry Smith #undef __FUNCT__
22392f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2240dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
22412f7816d4SBarry Smith {
22422f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22432f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2244d2840e09SBarry Smith   const PetscScalar *x,*v;
2245d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22462f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2247dfbe8321SBarry Smith   PetscErrorCode    ierr;
2248d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2249d2840e09SBarry Smith   PetscInt          n,i;
22502f7816d4SBarry Smith 
22512f7816d4SBarry Smith   PetscFunctionBegin;
2252d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
22533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22541ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2255bfec09a0SHong Zhang 
22562f7816d4SBarry Smith   for (i=0; i<m; i++) {
2257bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2258bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
22592f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
22602f7816d4SBarry Smith     alpha1  = x[16*i];
22612f7816d4SBarry Smith     alpha2  = x[16*i+1];
22622f7816d4SBarry Smith     alpha3  = x[16*i+2];
22632f7816d4SBarry Smith     alpha4  = x[16*i+3];
22642f7816d4SBarry Smith     alpha5  = x[16*i+4];
22652f7816d4SBarry Smith     alpha6  = x[16*i+5];
22662f7816d4SBarry Smith     alpha7  = x[16*i+6];
22672f7816d4SBarry Smith     alpha8  = x[16*i+7];
22682f7816d4SBarry Smith     alpha9  = x[16*i+8];
22692f7816d4SBarry Smith     alpha10 = x[16*i+9];
22702f7816d4SBarry Smith     alpha11 = x[16*i+10];
22712f7816d4SBarry Smith     alpha12 = x[16*i+11];
22722f7816d4SBarry Smith     alpha13 = x[16*i+12];
22732f7816d4SBarry Smith     alpha14 = x[16*i+13];
22742f7816d4SBarry Smith     alpha15 = x[16*i+14];
22752f7816d4SBarry Smith     alpha16 = x[16*i+15];
22762f7816d4SBarry Smith     while (n-->0) {
22772f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22782f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22792f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22802f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22812f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22822f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22832f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22842f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22852f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22862f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22872f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22882f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22892f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22902f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22912f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22922f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22932f7816d4SBarry Smith       idx++; v++;
22942f7816d4SBarry Smith     }
22952f7816d4SBarry Smith   }
2296dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22973649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22981ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22992f7816d4SBarry Smith   PetscFunctionReturn(0);
23002f7816d4SBarry Smith }
23012f7816d4SBarry Smith 
23022f7816d4SBarry Smith #undef __FUNCT__
23032f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2304dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23052f7816d4SBarry Smith {
23062f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23072f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2308d2840e09SBarry Smith   const PetscScalar *x,*v;
2309d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
23102f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2311dfbe8321SBarry Smith   PetscErrorCode    ierr;
2312d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2313b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
23142f7816d4SBarry Smith 
23152f7816d4SBarry Smith   PetscFunctionBegin;
23162f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23173649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23181ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23192f7816d4SBarry Smith   idx  = a->j;
23202f7816d4SBarry Smith   v    = a->a;
23212f7816d4SBarry Smith   ii   = a->i;
23222f7816d4SBarry Smith 
23232f7816d4SBarry Smith   for (i=0; i<m; i++) {
23242f7816d4SBarry Smith     jrow = ii[i];
23252f7816d4SBarry Smith     n    = ii[i+1] - jrow;
23262f7816d4SBarry Smith     sum1  = 0.0;
23272f7816d4SBarry Smith     sum2  = 0.0;
23282f7816d4SBarry Smith     sum3  = 0.0;
23292f7816d4SBarry Smith     sum4  = 0.0;
23302f7816d4SBarry Smith     sum5  = 0.0;
23312f7816d4SBarry Smith     sum6  = 0.0;
23322f7816d4SBarry Smith     sum7  = 0.0;
23332f7816d4SBarry Smith     sum8  = 0.0;
23342f7816d4SBarry Smith     sum9  = 0.0;
23352f7816d4SBarry Smith     sum10 = 0.0;
23362f7816d4SBarry Smith     sum11 = 0.0;
23372f7816d4SBarry Smith     sum12 = 0.0;
23382f7816d4SBarry Smith     sum13 = 0.0;
23392f7816d4SBarry Smith     sum14 = 0.0;
23402f7816d4SBarry Smith     sum15 = 0.0;
23412f7816d4SBarry Smith     sum16 = 0.0;
23422f7816d4SBarry Smith     for (j=0; j<n; j++) {
23432f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
23442f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
23452f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
23462f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
23472f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
23482f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
23492f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
23502f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
23512f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
23522f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
23532f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
23542f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
23552f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
23562f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
23572f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
23582f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
23592f7816d4SBarry Smith       jrow++;
23602f7816d4SBarry Smith      }
23612f7816d4SBarry Smith     y[16*i]    += sum1;
23622f7816d4SBarry Smith     y[16*i+1]  += sum2;
23632f7816d4SBarry Smith     y[16*i+2]  += sum3;
23642f7816d4SBarry Smith     y[16*i+3]  += sum4;
23652f7816d4SBarry Smith     y[16*i+4]  += sum5;
23662f7816d4SBarry Smith     y[16*i+5]  += sum6;
23672f7816d4SBarry Smith     y[16*i+6]  += sum7;
23682f7816d4SBarry Smith     y[16*i+7]  += sum8;
23692f7816d4SBarry Smith     y[16*i+8]  += sum9;
23702f7816d4SBarry Smith     y[16*i+9]  += sum10;
23712f7816d4SBarry Smith     y[16*i+10] += sum11;
23722f7816d4SBarry Smith     y[16*i+11] += sum12;
23732f7816d4SBarry Smith     y[16*i+12] += sum13;
23742f7816d4SBarry Smith     y[16*i+13] += sum14;
23752f7816d4SBarry Smith     y[16*i+14] += sum15;
23762f7816d4SBarry Smith     y[16*i+15] += sum16;
23772f7816d4SBarry Smith   }
23782f7816d4SBarry Smith 
2379dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23803649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23811ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23822f7816d4SBarry Smith   PetscFunctionReturn(0);
23832f7816d4SBarry Smith }
23842f7816d4SBarry Smith 
23852f7816d4SBarry Smith #undef __FUNCT__
23862f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2387dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23882f7816d4SBarry Smith {
23892f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23902f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2391d2840e09SBarry Smith   const PetscScalar *x,*v;
2392d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23932f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2394dfbe8321SBarry Smith   PetscErrorCode    ierr;
2395d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2396d2840e09SBarry Smith   PetscInt          n,i;
23972f7816d4SBarry Smith 
23982f7816d4SBarry Smith   PetscFunctionBegin;
23992f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
24003649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
24011ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
24022f7816d4SBarry Smith   for (i=0; i<m; i++) {
2403bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2404bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
24052f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
24062f7816d4SBarry Smith     alpha1 = x[16*i];
24072f7816d4SBarry Smith     alpha2 = x[16*i+1];
24082f7816d4SBarry Smith     alpha3 = x[16*i+2];
24092f7816d4SBarry Smith     alpha4 = x[16*i+3];
24102f7816d4SBarry Smith     alpha5 = x[16*i+4];
24112f7816d4SBarry Smith     alpha6 = x[16*i+5];
24122f7816d4SBarry Smith     alpha7 = x[16*i+6];
24132f7816d4SBarry Smith     alpha8 = x[16*i+7];
24142f7816d4SBarry Smith     alpha9  = x[16*i+8];
24152f7816d4SBarry Smith     alpha10 = x[16*i+9];
24162f7816d4SBarry Smith     alpha11 = x[16*i+10];
24172f7816d4SBarry Smith     alpha12 = x[16*i+11];
24182f7816d4SBarry Smith     alpha13 = x[16*i+12];
24192f7816d4SBarry Smith     alpha14 = x[16*i+13];
24202f7816d4SBarry Smith     alpha15 = x[16*i+14];
24212f7816d4SBarry Smith     alpha16 = x[16*i+15];
24222f7816d4SBarry Smith     while (n-->0) {
24232f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
24242f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
24252f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
24262f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
24272f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
24282f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
24292f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
24302f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
24312f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
24322f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
24332f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
24342f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
24352f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
24362f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
24372f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
24382f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
24392f7816d4SBarry Smith       idx++; v++;
24402f7816d4SBarry Smith     }
24412f7816d4SBarry Smith   }
2442dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
24433649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
24441ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
244566ed3db0SBarry Smith   PetscFunctionReturn(0);
244666ed3db0SBarry Smith }
244766ed3db0SBarry Smith 
2448ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2449ed1c418cSBarry Smith #undef __FUNCT__
2450ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2451ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2452ed1c418cSBarry Smith {
2453ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2454ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2455d2840e09SBarry Smith   const PetscScalar *x,*v;
2456d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2457ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2458ed1c418cSBarry Smith   PetscErrorCode    ierr;
2459d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2460d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2461ed1c418cSBarry Smith 
2462ed1c418cSBarry Smith   PetscFunctionBegin;
24633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2464ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2465ed1c418cSBarry Smith   idx  = a->j;
2466ed1c418cSBarry Smith   v    = a->a;
2467ed1c418cSBarry Smith   ii   = a->i;
2468ed1c418cSBarry Smith 
2469ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2470ed1c418cSBarry Smith     jrow = ii[i];
2471ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2472ed1c418cSBarry Smith     sum1  = 0.0;
2473ed1c418cSBarry Smith     sum2  = 0.0;
2474ed1c418cSBarry Smith     sum3  = 0.0;
2475ed1c418cSBarry Smith     sum4  = 0.0;
2476ed1c418cSBarry Smith     sum5  = 0.0;
2477ed1c418cSBarry Smith     sum6  = 0.0;
2478ed1c418cSBarry Smith     sum7  = 0.0;
2479ed1c418cSBarry Smith     sum8  = 0.0;
2480ed1c418cSBarry Smith     sum9  = 0.0;
2481ed1c418cSBarry Smith     sum10 = 0.0;
2482ed1c418cSBarry Smith     sum11 = 0.0;
2483ed1c418cSBarry Smith     sum12 = 0.0;
2484ed1c418cSBarry Smith     sum13 = 0.0;
2485ed1c418cSBarry Smith     sum14 = 0.0;
2486ed1c418cSBarry Smith     sum15 = 0.0;
2487ed1c418cSBarry Smith     sum16 = 0.0;
2488ed1c418cSBarry Smith     sum17 = 0.0;
2489ed1c418cSBarry Smith     sum18 = 0.0;
2490ed1c418cSBarry Smith     nonzerorow += (n>0);
2491ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2492ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2493ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2494ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2495ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2496ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2497ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2498ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2499ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2500ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2501ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2502ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2503ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2504ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2505ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2506ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2507ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2508ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2509ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2510ed1c418cSBarry Smith       jrow++;
2511ed1c418cSBarry Smith      }
2512ed1c418cSBarry Smith     y[18*i]    = sum1;
2513ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2514ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2515ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2516ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2517ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2518ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2519ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2520ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2521ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2522ed1c418cSBarry Smith     y[18*i+10] = sum11;
2523ed1c418cSBarry Smith     y[18*i+11] = sum12;
2524ed1c418cSBarry Smith     y[18*i+12] = sum13;
2525ed1c418cSBarry Smith     y[18*i+13] = sum14;
2526ed1c418cSBarry Smith     y[18*i+14] = sum15;
2527ed1c418cSBarry Smith     y[18*i+15] = sum16;
2528ed1c418cSBarry Smith     y[18*i+16] = sum17;
2529ed1c418cSBarry Smith     y[18*i+17] = sum18;
2530ed1c418cSBarry Smith   }
2531ed1c418cSBarry Smith 
2532dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
25333649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2534ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2535ed1c418cSBarry Smith   PetscFunctionReturn(0);
2536ed1c418cSBarry Smith }
2537ed1c418cSBarry Smith 
2538ed1c418cSBarry Smith #undef __FUNCT__
2539ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2540ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2541ed1c418cSBarry Smith {
2542ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2543ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2544d2840e09SBarry Smith   const PetscScalar *x,*v;
2545d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2546ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2547ed1c418cSBarry Smith   PetscErrorCode    ierr;
2548d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2549d2840e09SBarry Smith   PetscInt          n,i;
2550ed1c418cSBarry Smith 
2551ed1c418cSBarry Smith   PetscFunctionBegin;
2552d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
25533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2554ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2555ed1c418cSBarry Smith 
2556ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2557ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2558ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2559ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2560ed1c418cSBarry Smith     alpha1  = x[18*i];
2561ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2562ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2563ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2564ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2565ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2566ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2567ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2568ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2569ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2570ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2571ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2572ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2573ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2574ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2575ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2576ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2577ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2578ed1c418cSBarry Smith     while (n-->0) {
2579ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2580ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2581ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2582ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2583ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2584ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2585ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2586ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2587ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2588ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2589ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2590ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2591ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2592ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2593ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2594ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2595ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2596ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2597ed1c418cSBarry Smith       idx++; v++;
2598ed1c418cSBarry Smith     }
2599ed1c418cSBarry Smith   }
2600dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2602ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2603ed1c418cSBarry Smith   PetscFunctionReturn(0);
2604ed1c418cSBarry Smith }
2605ed1c418cSBarry Smith 
2606ed1c418cSBarry Smith #undef __FUNCT__
2607ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2608ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2609ed1c418cSBarry Smith {
2610ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2611ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2612d2840e09SBarry Smith   const PetscScalar *x,*v;
2613d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2614ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2615ed1c418cSBarry Smith   PetscErrorCode    ierr;
2616d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2617ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2618ed1c418cSBarry Smith 
2619ed1c418cSBarry Smith   PetscFunctionBegin;
2620ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2622ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2623ed1c418cSBarry Smith   idx  = a->j;
2624ed1c418cSBarry Smith   v    = a->a;
2625ed1c418cSBarry Smith   ii   = a->i;
2626ed1c418cSBarry Smith 
2627ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2628ed1c418cSBarry Smith     jrow = ii[i];
2629ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2630ed1c418cSBarry Smith     sum1  = 0.0;
2631ed1c418cSBarry Smith     sum2  = 0.0;
2632ed1c418cSBarry Smith     sum3  = 0.0;
2633ed1c418cSBarry Smith     sum4  = 0.0;
2634ed1c418cSBarry Smith     sum5  = 0.0;
2635ed1c418cSBarry Smith     sum6  = 0.0;
2636ed1c418cSBarry Smith     sum7  = 0.0;
2637ed1c418cSBarry Smith     sum8  = 0.0;
2638ed1c418cSBarry Smith     sum9  = 0.0;
2639ed1c418cSBarry Smith     sum10 = 0.0;
2640ed1c418cSBarry Smith     sum11 = 0.0;
2641ed1c418cSBarry Smith     sum12 = 0.0;
2642ed1c418cSBarry Smith     sum13 = 0.0;
2643ed1c418cSBarry Smith     sum14 = 0.0;
2644ed1c418cSBarry Smith     sum15 = 0.0;
2645ed1c418cSBarry Smith     sum16 = 0.0;
2646ed1c418cSBarry Smith     sum17 = 0.0;
2647ed1c418cSBarry Smith     sum18 = 0.0;
2648ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2649ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2650ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2651ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2652ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2653ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2654ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2655ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2656ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2657ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2658ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2659ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2660ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2661ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2662ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2663ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2664ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2665ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2666ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2667ed1c418cSBarry Smith       jrow++;
2668ed1c418cSBarry Smith      }
2669ed1c418cSBarry Smith     y[18*i]    += sum1;
2670ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2671ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2672ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2673ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2674ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2675ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2676ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2677ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2678ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2679ed1c418cSBarry Smith     y[18*i+10] += sum11;
2680ed1c418cSBarry Smith     y[18*i+11] += sum12;
2681ed1c418cSBarry Smith     y[18*i+12] += sum13;
2682ed1c418cSBarry Smith     y[18*i+13] += sum14;
2683ed1c418cSBarry Smith     y[18*i+14] += sum15;
2684ed1c418cSBarry Smith     y[18*i+15] += sum16;
2685ed1c418cSBarry Smith     y[18*i+16] += sum17;
2686ed1c418cSBarry Smith     y[18*i+17] += sum18;
2687ed1c418cSBarry Smith   }
2688ed1c418cSBarry Smith 
2689dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26903649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2691ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2692ed1c418cSBarry Smith   PetscFunctionReturn(0);
2693ed1c418cSBarry Smith }
2694ed1c418cSBarry Smith 
2695ed1c418cSBarry Smith #undef __FUNCT__
2696ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2697ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2698ed1c418cSBarry Smith {
2699ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2700ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2701d2840e09SBarry Smith   const PetscScalar *x,*v;
2702d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2703ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2704ed1c418cSBarry Smith   PetscErrorCode    ierr;
2705d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2706d2840e09SBarry Smith   PetscInt          n,i;
2707ed1c418cSBarry Smith 
2708ed1c418cSBarry Smith   PetscFunctionBegin;
2709ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27103649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2711ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2712ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2713ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2714ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2715ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2716ed1c418cSBarry Smith     alpha1 = x[18*i];
2717ed1c418cSBarry Smith     alpha2 = x[18*i+1];
2718ed1c418cSBarry Smith     alpha3 = x[18*i+2];
2719ed1c418cSBarry Smith     alpha4 = x[18*i+3];
2720ed1c418cSBarry Smith     alpha5 = x[18*i+4];
2721ed1c418cSBarry Smith     alpha6 = x[18*i+5];
2722ed1c418cSBarry Smith     alpha7 = x[18*i+6];
2723ed1c418cSBarry Smith     alpha8 = x[18*i+7];
2724ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2725ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2726ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2727ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2728ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2729ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2730ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2731ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2732ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2733ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2734ed1c418cSBarry Smith     while (n-->0) {
2735ed1c418cSBarry Smith       y[18*(*idx)]   += alpha1*(*v);
2736ed1c418cSBarry Smith       y[18*(*idx)+1] += alpha2*(*v);
2737ed1c418cSBarry Smith       y[18*(*idx)+2] += alpha3*(*v);
2738ed1c418cSBarry Smith       y[18*(*idx)+3] += alpha4*(*v);
2739ed1c418cSBarry Smith       y[18*(*idx)+4] += alpha5*(*v);
2740ed1c418cSBarry Smith       y[18*(*idx)+5] += alpha6*(*v);
2741ed1c418cSBarry Smith       y[18*(*idx)+6] += alpha7*(*v);
2742ed1c418cSBarry Smith       y[18*(*idx)+7] += alpha8*(*v);
2743ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2744ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2745ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2746ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2747ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2748ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2749ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2750ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2751ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2752ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2753ed1c418cSBarry Smith       idx++; v++;
2754ed1c418cSBarry Smith     }
2755ed1c418cSBarry Smith   }
2756dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
27573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2758ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2759ed1c418cSBarry Smith   PetscFunctionReturn(0);
2760ed1c418cSBarry Smith }
2761ed1c418cSBarry Smith 
27626861f193SBarry Smith #undef __FUNCT__
27636861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N"
27646861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27656861f193SBarry Smith {
27666861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27676861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27686861f193SBarry Smith   const PetscScalar *x,*v;
27696861f193SBarry Smith   PetscScalar       *y,*sums;
27706861f193SBarry Smith   PetscErrorCode    ierr;
27716861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27726861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27736861f193SBarry Smith 
27746861f193SBarry Smith   PetscFunctionBegin;
27753649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27766861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27776861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27786861f193SBarry Smith   idx  = a->j;
27796861f193SBarry Smith   v    = a->a;
27806861f193SBarry Smith   ii   = a->i;
27816861f193SBarry Smith 
27826861f193SBarry Smith   for (i=0; i<m; i++) {
27836861f193SBarry Smith     jrow = ii[i];
27846861f193SBarry Smith     n    = ii[i+1] - jrow;
27856861f193SBarry Smith     sums = y + dof*i;
27866861f193SBarry Smith     for (j=0; j<n; j++) {
27876861f193SBarry Smith       for (k=0; k<dof; k++) {
27886861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
27896861f193SBarry Smith       }
27906861f193SBarry Smith       jrow++;
27916861f193SBarry Smith     }
27926861f193SBarry Smith   }
27936861f193SBarry Smith 
27946861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27953649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27966861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27976861f193SBarry Smith   PetscFunctionReturn(0);
27986861f193SBarry Smith }
27996861f193SBarry Smith 
28006861f193SBarry Smith #undef __FUNCT__
28016861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
28026861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28036861f193SBarry Smith {
28046861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28056861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28066861f193SBarry Smith   const PetscScalar *x,*v;
28076861f193SBarry Smith   PetscScalar       *y,*sums;
28086861f193SBarry Smith   PetscErrorCode    ierr;
28096861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28106861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
28116861f193SBarry Smith 
28126861f193SBarry Smith   PetscFunctionBegin;
28136861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28143649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28156861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28166861f193SBarry Smith   idx  = a->j;
28176861f193SBarry Smith   v    = a->a;
28186861f193SBarry Smith   ii   = a->i;
28196861f193SBarry Smith 
28206861f193SBarry Smith   for (i=0; i<m; i++) {
28216861f193SBarry Smith     jrow = ii[i];
28226861f193SBarry Smith     n    = ii[i+1] - jrow;
28236861f193SBarry Smith     sums = y + dof*i;
28246861f193SBarry Smith     for (j=0; j<n; j++) {
28256861f193SBarry Smith       for (k=0; k<dof; k++) {
28266861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
28276861f193SBarry Smith       }
28286861f193SBarry Smith       jrow++;
28296861f193SBarry Smith     }
28306861f193SBarry Smith   }
28316861f193SBarry Smith 
28326861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28333649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28346861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28356861f193SBarry Smith   PetscFunctionReturn(0);
28366861f193SBarry Smith }
28376861f193SBarry Smith 
28386861f193SBarry Smith #undef __FUNCT__
28396861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
28406861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
28416861f193SBarry Smith {
28426861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28436861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28446861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28456861f193SBarry Smith   PetscScalar       *y;
28466861f193SBarry Smith   PetscErrorCode    ierr;
28476861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28486861f193SBarry Smith   PetscInt          n,i,k;
28496861f193SBarry Smith 
28506861f193SBarry Smith   PetscFunctionBegin;
28513649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28526861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
28536861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
28546861f193SBarry Smith   for (i=0; i<m; i++) {
28556861f193SBarry Smith     idx    = a->j + a->i[i] ;
28566861f193SBarry Smith     v      = a->a + a->i[i] ;
28576861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28586861f193SBarry Smith     alpha  = x + dof*i;
28596861f193SBarry Smith     while (n-->0) {
28606861f193SBarry Smith       for (k=0; k<dof; k++) {
28616861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28626861f193SBarry Smith       }
286383ce7366SBarry Smith       idx++; v++;
28646861f193SBarry Smith     }
28656861f193SBarry Smith   }
28666861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28686861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28696861f193SBarry Smith   PetscFunctionReturn(0);
28706861f193SBarry Smith }
28716861f193SBarry Smith 
28726861f193SBarry Smith #undef __FUNCT__
28736861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
28746861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28756861f193SBarry Smith {
28766861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28776861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28786861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28796861f193SBarry Smith   PetscScalar       *y;
28806861f193SBarry Smith   PetscErrorCode    ierr;
28816861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28826861f193SBarry Smith   PetscInt          n,i,k;
28836861f193SBarry Smith 
28846861f193SBarry Smith   PetscFunctionBegin;
28856861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28876861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28886861f193SBarry Smith   for (i=0; i<m; i++) {
28896861f193SBarry Smith     idx    = a->j + a->i[i] ;
28906861f193SBarry Smith     v      = a->a + a->i[i] ;
28916861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28926861f193SBarry Smith     alpha  = x + dof*i;
28936861f193SBarry Smith     while (n-->0) {
28946861f193SBarry Smith       for (k=0; k<dof; k++) {
28956861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28966861f193SBarry Smith       }
289783ce7366SBarry Smith       idx++; v++;
28986861f193SBarry Smith     }
28996861f193SBarry Smith   }
29006861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
29013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
29026861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
29036861f193SBarry Smith   PetscFunctionReturn(0);
29046861f193SBarry Smith }
29056861f193SBarry Smith 
2906f4a53059SBarry Smith /*===================================================================================*/
29074a2ae208SSatish Balay #undef __FUNCT__
29084a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2909dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2910f4a53059SBarry Smith {
29114cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2912dfbe8321SBarry Smith   PetscErrorCode ierr;
2913f4a53059SBarry Smith 
2914b24ad042SBarry Smith   PetscFunctionBegin;
2915f4a53059SBarry Smith   /* start the scatter */
2916ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29174cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2918ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29194cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2920f4a53059SBarry Smith   PetscFunctionReturn(0);
2921f4a53059SBarry Smith }
2922f4a53059SBarry Smith 
29234a2ae208SSatish Balay #undef __FUNCT__
29244a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2925dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
29264cb9d9b8SBarry Smith {
29274cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2928dfbe8321SBarry Smith   PetscErrorCode ierr;
2929b24ad042SBarry Smith 
29304cb9d9b8SBarry Smith   PetscFunctionBegin;
29314cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
29324cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2933ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2934ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29354cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29364cb9d9b8SBarry Smith }
29374cb9d9b8SBarry Smith 
29384a2ae208SSatish Balay #undef __FUNCT__
29394a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2940dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29414cb9d9b8SBarry Smith {
29424cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2943dfbe8321SBarry Smith   PetscErrorCode ierr;
29444cb9d9b8SBarry Smith 
2945b24ad042SBarry Smith   PetscFunctionBegin;
29464cb9d9b8SBarry Smith   /* start the scatter */
2947ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2948d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2949ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2950717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
29514cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29524cb9d9b8SBarry Smith }
29534cb9d9b8SBarry Smith 
29544a2ae208SSatish Balay #undef __FUNCT__
29554a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2956dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29574cb9d9b8SBarry Smith {
29584cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2959dfbe8321SBarry Smith   PetscErrorCode ierr;
2960b24ad042SBarry Smith 
29614cb9d9b8SBarry Smith   PetscFunctionBegin;
29624cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2963ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2964d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2965ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29664cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29674cb9d9b8SBarry Smith }
29684cb9d9b8SBarry Smith 
296995ddefa2SHong Zhang /* ----------------------------------------------------------------*/
29709c4fc4c7SBarry Smith #undef __FUNCT__
29717ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
29727ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
29737ba1a0bfSKris Buschelman {
29747ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
29757ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2976a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
29777ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
29787ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
29797ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2980d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29817ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2982d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2983d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29847ba1a0bfSKris Buschelman   MatScalar          *ca;
2985d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29867ba1a0bfSKris Buschelman 
29877ba1a0bfSKris Buschelman   PetscFunctionBegin;
29887ba1a0bfSKris Buschelman   /* Start timer */
29897ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
29907ba1a0bfSKris Buschelman 
29917ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29927ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29937ba1a0bfSKris Buschelman 
29947ba1a0bfSKris Buschelman   cn = pn*ppdof;
29957ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29967ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
29977ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
29987ba1a0bfSKris Buschelman   ci[0] = 0;
29997ba1a0bfSKris Buschelman 
30007ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
300174ed9c26SBarry Smith   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
3002c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3003c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
30047ba1a0bfSKris Buschelman 
30057ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
30067ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
30077ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3008a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
30097ba1a0bfSKris Buschelman   current_space = free_space;
30107ba1a0bfSKris Buschelman 
30117ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
30127ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
30137ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
30147ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
30157ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
30167ba1a0bfSKris Buschelman       ptanzi = 0;
30177ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
30187ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
30197ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
30207ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
30217ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
30227ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
30237ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
30247ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
30257ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
30267ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
30277ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
30287ba1a0bfSKris Buschelman           }
30297ba1a0bfSKris Buschelman         }
30307ba1a0bfSKris Buschelman       }
30317ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
30327ba1a0bfSKris Buschelman       ptaj = ptasparserow;
30337ba1a0bfSKris Buschelman       cnzi   = 0;
30347ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30357ba1a0bfSKris Buschelman         /* Get offset within block of P */
30367ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
30377ba1a0bfSKris Buschelman         /* Get block row of P */
30387ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
30397ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
30407ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
30417ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
30427ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
30437ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
30447ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30457ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30467ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30477ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30487ba1a0bfSKris Buschelman           }
30497ba1a0bfSKris Buschelman         }
30507ba1a0bfSKris Buschelman       }
30517ba1a0bfSKris Buschelman 
30527ba1a0bfSKris Buschelman       /* sort sparserow */
30537ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30547ba1a0bfSKris Buschelman 
30557ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30567ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30577ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
30584238b7adSHong Zhang         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
30597ba1a0bfSKris Buschelman       }
30607ba1a0bfSKris Buschelman 
30617ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
30627ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
30637ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30647ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30657ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30667ba1a0bfSKris Buschelman 
30677ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30687ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
30697ba1a0bfSKris Buschelman       }
30707ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
30717ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
30727ba1a0bfSKris Buschelman       }
30737ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30747ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30757ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30767ba1a0bfSKris Buschelman     }
30777ba1a0bfSKris Buschelman   }
30787ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30797ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30807ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
30817ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3082a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
308374ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30847ba1a0bfSKris Buschelman 
30857ba1a0bfSKris Buschelman   /* Allocate space for ca */
30867ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30877ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30887ba1a0bfSKris Buschelman 
30897ba1a0bfSKris Buschelman   /* put together the new matrix */
30907adad957SLisandro Dalcin   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
30917ba1a0bfSKris Buschelman 
30927ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30937ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30947ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ *)((*C)->data);
3095e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3096e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30977ba1a0bfSKris Buschelman   c->nonew   = 0;
30987ba1a0bfSKris Buschelman 
30997ba1a0bfSKris Buschelman   /* Clean up. */
31007ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
31017ba1a0bfSKris Buschelman 
31027ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
31037ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31047ba1a0bfSKris Buschelman }
31057ba1a0bfSKris Buschelman 
31067ba1a0bfSKris Buschelman #undef __FUNCT__
31077ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
31087ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
31097ba1a0bfSKris Buschelman {
31107ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
31117ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
31127ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
31137ba1a0bfSKris Buschelman   Mat             P=pp->AIJ;
31147ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
31157ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
31167ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3117d2840e09SBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3118d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3119d2840e09SBarry Smith   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3120d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3121d2840e09SBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3122d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
31237ba1a0bfSKris Buschelman 
31247ba1a0bfSKris Buschelman   PetscFunctionBegin;
31257ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
312674ed9c26SBarry Smith   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
312774ed9c26SBarry Smith   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
312874ed9c26SBarry Smith   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
312974ed9c26SBarry Smith   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
31307ba1a0bfSKris Buschelman 
31317ba1a0bfSKris Buschelman   /* Clear old values in C */
31327ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
31337ba1a0bfSKris Buschelman 
31347ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
31357ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
31367ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
31377ba1a0bfSKris Buschelman     apnzj = 0;
31387ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
31397ba1a0bfSKris Buschelman       /* Get offset within block of P */
31407ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
31417ba1a0bfSKris Buschelman       /* Get block row of P */
31427ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
31437ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
31447ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
31457ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
31467ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
31477ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
31487ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
31497ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
31507ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31517ba1a0bfSKris Buschelman         }
31527ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31537ba1a0bfSKris Buschelman       }
3154dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31557ba1a0bfSKris Buschelman       aa++;
31567ba1a0bfSKris Buschelman     }
31577ba1a0bfSKris Buschelman 
31587ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31597ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31607ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31617ba1a0bfSKris Buschelman 
31627ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31637ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31647ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31657ba1a0bfSKris Buschelman     poffset = pi[prow];
31667ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
31677ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31687ba1a0bfSKris Buschelman     pJ   = pj+poffset;
31697ba1a0bfSKris Buschelman     pA   = pa+poffset;
31707ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
31717ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
31727ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
31737ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
31747ba1a0bfSKris Buschelman       pJ++;
31757ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31767ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
31777ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
31787ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
31797ba1a0bfSKris Buschelman         }
31807ba1a0bfSKris Buschelman       }
3181dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31827ba1a0bfSKris Buschelman       pA++;
31837ba1a0bfSKris Buschelman     }
31847ba1a0bfSKris Buschelman 
31857ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31867ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
31877ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31887ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31897ba1a0bfSKris Buschelman     }
31907ba1a0bfSKris Buschelman   }
31917ba1a0bfSKris Buschelman 
31927ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31937ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31947ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319574ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
319695ddefa2SHong Zhang   PetscFunctionReturn(0);
319795ddefa2SHong Zhang }
31987ba1a0bfSKris Buschelman 
319995ddefa2SHong Zhang #undef __FUNCT__
320095ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
320195ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
320295ddefa2SHong Zhang {
320395ddefa2SHong Zhang   PetscErrorCode    ierr;
320495ddefa2SHong Zhang 
320595ddefa2SHong Zhang   PetscFunctionBegin;
320695ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
320795ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
320895ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
320995ddefa2SHong Zhang   PetscFunctionReturn(0);
321095ddefa2SHong Zhang }
321195ddefa2SHong Zhang 
321295ddefa2SHong Zhang #undef __FUNCT__
321395ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
321495ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
321595ddefa2SHong Zhang {
321695ddefa2SHong Zhang   PetscFunctionBegin;
3217e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
32187ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
32197ba1a0bfSKris Buschelman }
32207ba1a0bfSKris Buschelman 
3221be1d678aSKris Buschelman EXTERN_C_BEGIN
32227ba1a0bfSKris Buschelman #undef __FUNCT__
32230fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3224f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32259c4fc4c7SBarry Smith {
32269c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3227ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
32289c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
32299c4fc4c7SBarry Smith   PetscErrorCode    ierr;
32300fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3231ba8c8a56SBarry Smith   PetscInt          *cols;
3232ba8c8a56SBarry Smith   PetscScalar       *vals;
32339c4fc4c7SBarry Smith 
32349c4fc4c7SBarry Smith   PetscFunctionBegin;
32359c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
32367c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
32379c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32389c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
32390fd73130SBarry Smith     for (j=0; j<dof; j++) {
32400fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
32419c4fc4c7SBarry Smith     }
32429c4fc4c7SBarry Smith   }
3243ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
32449c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
32459c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
32469c4fc4c7SBarry Smith   ii   = 0;
32479c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3248ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32490fd73130SBarry Smith     for (j=0; j<dof; j++) {
32509c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
32510fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
32529c4fc4c7SBarry Smith       }
3253ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32549c4fc4c7SBarry Smith       ii++;
32559c4fc4c7SBarry Smith     }
3256ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32579c4fc4c7SBarry Smith   }
32589c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3259ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3260ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3261ceb03754SKris Buschelman 
3262ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
32638ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3264c3d102feSKris Buschelman   } else {
3265ceb03754SKris Buschelman     *newmat = B;
3266c3d102feSKris Buschelman   }
32679c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32689c4fc4c7SBarry Smith }
3269be1d678aSKris Buschelman EXTERN_C_END
32709c4fc4c7SBarry Smith 
32717c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h"
3272be1d678aSKris Buschelman 
3273be1d678aSKris Buschelman EXTERN_C_BEGIN
32740fd73130SBarry Smith #undef __FUNCT__
32750fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3276f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32770fd73130SBarry Smith {
32780fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3279ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32800fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32810fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
32820fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
32830fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3284910ba992SMatthew Knepley   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3285910ba992SMatthew Knepley   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
32860fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
32870fd73130SBarry Smith   PetscErrorCode    ierr;
32880fd73130SBarry Smith   PetscScalar       *vals,*ovals;
32890fd73130SBarry Smith 
32900fd73130SBarry Smith   PetscFunctionBegin;
3291d0f46423SBarry Smith   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3292d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32930fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
32940fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
32950fd73130SBarry Smith     for (j=0; j<dof; j++) {
32960fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
32970fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
32980fd73130SBarry Smith     }
32990fd73130SBarry Smith   }
3300d0f46423SBarry Smith   ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
33010fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
33020fd73130SBarry Smith 
33037a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3304d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3305d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33060fd73130SBarry Smith   garray = mpiaij->garray;
33070fd73130SBarry Smith 
33080fd73130SBarry Smith   ii = rstart;
3309d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33100fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33110fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33120fd73130SBarry Smith     for (j=0; j<dof; j++) {
33130fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33140fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33150fd73130SBarry Smith       }
33160fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33170fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33180fd73130SBarry Smith       }
3319ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3320ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33210fd73130SBarry Smith       ii++;
33220fd73130SBarry Smith     }
33230fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33240fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33250fd73130SBarry Smith   }
33260fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33270fd73130SBarry Smith 
3328ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3330ceb03754SKris Buschelman 
3331ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
33327adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33337adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
33348ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
33357adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3336c3d102feSKris Buschelman   } else {
3337ceb03754SKris Buschelman     *newmat = B;
3338c3d102feSKris Buschelman   }
33390fd73130SBarry Smith   PetscFunctionReturn(0);
33400fd73130SBarry Smith }
3341be1d678aSKris Buschelman EXTERN_C_END
33420fd73130SBarry Smith 
33430fd73130SBarry Smith 
3344bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3345ff585edeSJed Brown #undef __FUNCT__
3346ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ"
3347ff585edeSJed Brown /*@C
33480bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33490bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33500bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33510bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33520bad9183SKris Buschelman 
3353ff585edeSJed Brown   Collective
3354ff585edeSJed Brown 
3355ff585edeSJed Brown   Input Parameters:
3356ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3357ff585edeSJed Brown - dof - the block size (number of components per node)
3358ff585edeSJed Brown 
3359ff585edeSJed Brown   Output Parameter:
3360ff585edeSJed Brown . maij - the new MAIJ matrix
3361ff585edeSJed Brown 
33620bad9183SKris Buschelman   Operations provided:
33630fd73130SBarry Smith + MatMult
33640bad9183SKris Buschelman . MatMultTranspose
33650bad9183SKris Buschelman . MatMultAdd
33660bad9183SKris Buschelman . MatMultTransposeAdd
33670fd73130SBarry Smith - MatView
33680bad9183SKris Buschelman 
33690bad9183SKris Buschelman   Level: advanced
33700bad9183SKris Buschelman 
3371ff585edeSJed Brown .seealso: MatMAIJGetAIJ(), MatMAIJRedimension()
3372ff585edeSJed Brown @*/
3373be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
337482b1193eSBarry Smith {
3375dfbe8321SBarry Smith   PetscErrorCode ierr;
3376b24ad042SBarry Smith   PetscMPIInt    size;
3377b24ad042SBarry Smith   PetscInt       n;
337882b1193eSBarry Smith   Mat            B;
337982b1193eSBarry Smith 
338082b1193eSBarry Smith   PetscFunctionBegin;
3381d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3382d72c5c08SBarry Smith 
3383d72c5c08SBarry Smith   if (dof == 1) {
3384d72c5c08SBarry Smith     *maij = A;
3385d72c5c08SBarry Smith   } else {
33867adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3387d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3388bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3389bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3390bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3391bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3392cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
3393d72c5c08SBarry Smith 
33947adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3395f4a53059SBarry Smith     if (size == 1) {
3396feb9fe23SJed Brown       Mat_SeqMAIJ    *b;
3397feb9fe23SJed Brown 
3398b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
33994cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
34000fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3401feb9fe23SJed Brown       b      = (Mat_SeqMAIJ*)B->data;
3402b9b97703SBarry Smith       b->dof = dof;
34034cb9d9b8SBarry Smith       b->AIJ = A;
3404d72c5c08SBarry Smith       if (dof == 2) {
3405cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3406cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3407cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3408cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3409bcc973b7SBarry Smith       } else if (dof == 3) {
3410bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3411bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3412bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3413bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3414bcc973b7SBarry Smith       } else if (dof == 4) {
3415bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3416bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3417bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3418bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3419f9fae5adSBarry Smith       } else if (dof == 5) {
3420f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3421f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3422f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3423f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34246cd98798SBarry Smith       } else if (dof == 6) {
34256cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34266cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34276cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34286cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3429ed8eea03SBarry Smith       } else if (dof == 7) {
3430ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3431ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3432ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3433ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
343466ed3db0SBarry Smith       } else if (dof == 8) {
343566ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
343666ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
343766ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
343866ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34392b692628SMatthew Knepley       } else if (dof == 9) {
34402b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34412b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34422b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34432b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3444b9cda22cSBarry Smith       } else if (dof == 10) {
3445b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3446b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3447b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3448b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3449dbdd7285SBarry Smith       } else if (dof == 11) {
3450dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3451dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3452dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3453dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34542f7816d4SBarry Smith       } else if (dof == 16) {
34552f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34562f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34572f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34582f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3459ed1c418cSBarry Smith       } else if (dof == 18) {
3460ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3461ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3462ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3463ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
346482b1193eSBarry Smith       } else {
34656861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34666861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34676861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34686861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
346982b1193eSBarry Smith       }
34707ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
34717ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
34729c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3473f4a53059SBarry Smith     } else {
3474f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3475feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3476f4a53059SBarry Smith       IS          from,to;
3477f4a53059SBarry Smith       Vec         gvec;
3478b24ad042SBarry Smith       PetscInt    *garray,i;
3479f4a53059SBarry Smith 
3480b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3481d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
34820fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
3483b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3484b9b97703SBarry Smith       b->dof = dof;
3485b9b97703SBarry Smith       b->A   = A;
34864cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
34874cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
34884cb9d9b8SBarry Smith 
3489f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3490f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
349113288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3492f4a53059SBarry Smith 
3493f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3494b24ad042SBarry Smith       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
3495f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
34967adad957SLisandro Dalcin       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr);
3497f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
3498f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3499f4a53059SBarry Smith 
3500f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3501d0f46423SBarry Smith       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
350213288a74SBarry Smith       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
3503f4a53059SBarry Smith 
3504f4a53059SBarry Smith       /* generate the scatter context */
3505f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3506f4a53059SBarry Smith 
3507f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
3508f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
3509f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
3510f4a53059SBarry Smith 
3511f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
35124cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
35134cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
35144cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
351595ddefa2SHong Zhang       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
351695ddefa2SHong Zhang       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
35170fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3518f4a53059SBarry Smith     }
3519cd3bbe55SBarry Smith     *maij = B;
35200fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
3521d72c5c08SBarry Smith   }
352282b1193eSBarry Smith   PetscFunctionReturn(0);
352382b1193eSBarry Smith }
3524