xref: /petsc/src/mat/impls/maij/maij.c (revision 8cc058d9cd56c1ccb3be12a47760ddfc446aaffc)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
21b45d2f2cSJed Brown #include <petsc-private/vecimpl.h>
2282b1193eSBarry Smith 
234a2ae208SSatish Balay #undef __FUNCT__
244a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
25ff585edeSJed Brown /*@C
26ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27ff585edeSJed Brown 
28ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29ff585edeSJed Brown 
30ff585edeSJed Brown    Input Parameter:
31ff585edeSJed Brown .  A - the MAIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Output Parameter:
34ff585edeSJed Brown .  B - the AIJ matrix
35ff585edeSJed Brown 
36ff585edeSJed Brown    Level: advanced
37ff585edeSJed Brown 
38ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39ff585edeSJed Brown 
40ff585edeSJed Brown .seealso: MatCreateMAIJ()
41ff585edeSJed Brown @*/
427087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43b9b97703SBarry Smith {
44dfbe8321SBarry Smith   PetscErrorCode ierr;
45ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
46b9b97703SBarry Smith 
47b9b97703SBarry Smith   PetscFunctionBegin;
48251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50b9b97703SBarry Smith   if (ismpimaij) {
51b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith     *B = b->A;
54b9b97703SBarry Smith   } else if (isseqmaij) {
55b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56b9b97703SBarry Smith 
57b9b97703SBarry Smith     *B = b->AIJ;
58b9b97703SBarry Smith   } else {
59b9b97703SBarry Smith     *B = A;
60b9b97703SBarry Smith   }
61b9b97703SBarry Smith   PetscFunctionReturn(0);
62b9b97703SBarry Smith }
63b9b97703SBarry Smith 
644a2ae208SSatish Balay #undef __FUNCT__
654a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
66ff585edeSJed Brown /*@C
67ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68ff585edeSJed Brown 
693f9fe445SBarry Smith    Logically Collective
70ff585edeSJed Brown 
71ff585edeSJed Brown    Input Parameter:
72ff585edeSJed Brown +  A - the MAIJ matrix
73ff585edeSJed Brown -  dof - the block size for the new matrix
74ff585edeSJed Brown 
75ff585edeSJed Brown    Output Parameter:
76ff585edeSJed Brown .  B - the new MAIJ matrix
77ff585edeSJed Brown 
78ff585edeSJed Brown    Level: advanced
79ff585edeSJed Brown 
80ff585edeSJed Brown .seealso: MatCreateMAIJ()
81ff585edeSJed Brown @*/
827087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83b9b97703SBarry Smith {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
850298fd71SBarry Smith   Mat            Aij = NULL;
86b9b97703SBarry Smith 
87b9b97703SBarry Smith   PetscFunctionBegin;
88c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
89b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91b9b97703SBarry Smith   PetscFunctionReturn(0);
92b9b97703SBarry Smith }
93b9b97703SBarry Smith 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9782b1193eSBarry Smith {
98dfbe8321SBarry Smith   PetscErrorCode ierr;
994cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
10082b1193eSBarry Smith 
10182b1193eSBarry Smith   PetscFunctionBegin;
1026bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
10400de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C","",NULL);CHKERRQ(ierr);
10500de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C","",NULL);CHKERRQ(ierr);
1064cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1074cb9d9b8SBarry Smith }
1084cb9d9b8SBarry Smith 
1094a2ae208SSatish Balay #undef __FUNCT__
110356c569eSBarry Smith #define __FUNCT__ "MatSetUp_MAIJ"
111356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
112356c569eSBarry Smith {
113356c569eSBarry Smith   PetscFunctionBegin;
114ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115356c569eSBarry Smith   PetscFunctionReturn(0);
116356c569eSBarry Smith }
117356c569eSBarry Smith 
118356c569eSBarry Smith #undef __FUNCT__
1190fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
1200fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1210fd73130SBarry Smith {
1220fd73130SBarry Smith   PetscErrorCode ierr;
1230fd73130SBarry Smith   Mat            B;
1240fd73130SBarry Smith 
1250fd73130SBarry Smith   PetscFunctionBegin;
126a2ea699eSBarry Smith   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1270fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1286bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1290fd73130SBarry Smith   PetscFunctionReturn(0);
1300fd73130SBarry Smith }
1310fd73130SBarry Smith 
1320fd73130SBarry Smith #undef __FUNCT__
1330fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
1340fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1350fd73130SBarry Smith {
1360fd73130SBarry Smith   PetscErrorCode ierr;
1370fd73130SBarry Smith   Mat            B;
1380fd73130SBarry Smith 
1390fd73130SBarry Smith   PetscFunctionBegin;
140a2ea699eSBarry Smith   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1410fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1426bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1430fd73130SBarry Smith   PetscFunctionReturn(0);
1440fd73130SBarry Smith }
1450fd73130SBarry Smith 
1460fd73130SBarry Smith #undef __FUNCT__
1474a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
148dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1494cb9d9b8SBarry Smith {
150dfbe8321SBarry Smith   PetscErrorCode ierr;
1514cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1524cb9d9b8SBarry Smith 
1534cb9d9b8SBarry Smith   PetscFunctionBegin;
1546bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1556bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1566bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1576bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1586bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
159bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
16000de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C","",NULL);CHKERRQ(ierr);
16100de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C","",NULL);CHKERRQ(ierr);
162dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163b9b97703SBarry Smith   PetscFunctionReturn(0);
164b9b97703SBarry Smith }
165b9b97703SBarry Smith 
1660bad9183SKris Buschelman /*MC
167fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1680bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1690bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1700bad9183SKris Buschelman 
1710bad9183SKris Buschelman   Operations provided:
1720bad9183SKris Buschelman . MatMult
1730bad9183SKris Buschelman . MatMultTranspose
1740bad9183SKris Buschelman . MatMultAdd
1750bad9183SKris Buschelman . MatMultTransposeAdd
1760bad9183SKris Buschelman 
1770bad9183SKris Buschelman   Level: advanced
1780bad9183SKris Buschelman 
179b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1800bad9183SKris Buschelman M*/
1810bad9183SKris Buschelman 
1824a2ae208SSatish Balay #undef __FUNCT__
1834a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
184*8cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
18582b1193eSBarry Smith {
186dfbe8321SBarry Smith   PetscErrorCode ierr;
1874cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
18864b52464SHong Zhang   PetscMPIInt    size;
18982b1193eSBarry Smith 
19082b1193eSBarry Smith   PetscFunctionBegin;
19138f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
192b0a32e0cSBarry Smith   A->data  = (void*)b;
19326fbe8dcSKarl Rupp 
194cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
19526fbe8dcSKarl Rupp 
196356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
197f4a53059SBarry Smith 
198cd3bbe55SBarry Smith   b->AIJ  = 0;
199cd3bbe55SBarry Smith   b->dof  = 0;
200f4a53059SBarry Smith   b->OAIJ = 0;
201f4a53059SBarry Smith   b->ctx  = 0;
202f4a53059SBarry Smith   b->w    = 0;
203ce94432eSBarry Smith   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
20464b52464SHong Zhang   if (size == 1) {
20564b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
20664b52464SHong Zhang   } else {
20764b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
20864b52464SHong Zhang   }
20982b1193eSBarry Smith   PetscFunctionReturn(0);
21082b1193eSBarry Smith }
21182b1193eSBarry Smith 
212cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2134a2ae208SSatish Balay #undef __FUNCT__
214b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
215dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21682b1193eSBarry Smith {
2174cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
218bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
219d2840e09SBarry Smith   const PetscScalar *x,*v;
220d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
221dfbe8321SBarry Smith   PetscErrorCode    ierr;
222d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
223d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
22482b1193eSBarry Smith 
225bcc973b7SBarry Smith   PetscFunctionBegin;
2263649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2271ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
228bcc973b7SBarry Smith   idx  = a->j;
229bcc973b7SBarry Smith   v    = a->a;
230bcc973b7SBarry Smith   ii   = a->i;
231bcc973b7SBarry Smith 
232bcc973b7SBarry Smith   for (i=0; i<m; i++) {
233bcc973b7SBarry Smith     jrow  = ii[i];
234bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
235bcc973b7SBarry Smith     sum1  = 0.0;
236bcc973b7SBarry Smith     sum2  = 0.0;
23726fbe8dcSKarl Rupp 
238b3c51c66SMatthew Knepley     nonzerorow += (n>0);
239bcc973b7SBarry Smith     for (j=0; j<n; j++) {
240bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
241bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
242bcc973b7SBarry Smith       jrow++;
243bcc973b7SBarry Smith     }
244bcc973b7SBarry Smith     y[2*i]   = sum1;
245bcc973b7SBarry Smith     y[2*i+1] = sum2;
246bcc973b7SBarry Smith   }
247bcc973b7SBarry Smith 
248dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2501ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
25182b1193eSBarry Smith   PetscFunctionReturn(0);
25282b1193eSBarry Smith }
253bcc973b7SBarry Smith 
2544a2ae208SSatish Balay #undef __FUNCT__
255b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
256dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
25782b1193eSBarry Smith {
2584cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
259bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
260d2840e09SBarry Smith   const PetscScalar *x,*v;
261d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
262dfbe8321SBarry Smith   PetscErrorCode    ierr;
263d2840e09SBarry Smith   PetscInt          n,i;
264d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
26582b1193eSBarry Smith 
266bcc973b7SBarry Smith   PetscFunctionBegin;
267d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2683649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2691ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
270bfec09a0SHong Zhang 
271bcc973b7SBarry Smith   for (i=0; i<m; i++) {
272bfec09a0SHong Zhang     idx    = a->j + a->i[i];
273bfec09a0SHong Zhang     v      = a->a + a->i[i];
274bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
275bcc973b7SBarry Smith     alpha1 = x[2*i];
276bcc973b7SBarry Smith     alpha2 = x[2*i+1];
27726fbe8dcSKarl Rupp     while (n-->0) {
27826fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
27926fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
28026fbe8dcSKarl Rupp       idx++; v++;
28126fbe8dcSKarl Rupp     }
282bcc973b7SBarry Smith   }
283dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2843649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2851ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28682b1193eSBarry Smith   PetscFunctionReturn(0);
28782b1193eSBarry Smith }
288bcc973b7SBarry Smith 
2894a2ae208SSatish Balay #undef __FUNCT__
290b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
291dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
29282b1193eSBarry Smith {
2934cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
294bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
295d2840e09SBarry Smith   const PetscScalar *x,*v;
296d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
297dfbe8321SBarry Smith   PetscErrorCode    ierr;
298b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
299d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
30082b1193eSBarry Smith 
301bcc973b7SBarry Smith   PetscFunctionBegin;
302f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3033649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3041ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
305bcc973b7SBarry Smith   idx  = a->j;
306bcc973b7SBarry Smith   v    = a->a;
307bcc973b7SBarry Smith   ii   = a->i;
308bcc973b7SBarry Smith 
309bcc973b7SBarry Smith   for (i=0; i<m; i++) {
310bcc973b7SBarry Smith     jrow = ii[i];
311bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
312bcc973b7SBarry Smith     sum1 = 0.0;
313bcc973b7SBarry Smith     sum2 = 0.0;
314bcc973b7SBarry Smith     for (j=0; j<n; j++) {
315bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
316bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
317bcc973b7SBarry Smith       jrow++;
318bcc973b7SBarry Smith     }
319bcc973b7SBarry Smith     y[2*i]   += sum1;
320bcc973b7SBarry Smith     y[2*i+1] += sum2;
321bcc973b7SBarry Smith   }
322bcc973b7SBarry Smith 
323dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3243649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3251ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
326bcc973b7SBarry Smith   PetscFunctionReturn(0);
32782b1193eSBarry Smith }
3284a2ae208SSatish Balay #undef __FUNCT__
329b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
330dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
33182b1193eSBarry Smith {
3324cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
333bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
334d2840e09SBarry Smith   const PetscScalar *x,*v;
335d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
336dfbe8321SBarry Smith   PetscErrorCode    ierr;
337d2840e09SBarry Smith   PetscInt          n,i;
338d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
33982b1193eSBarry Smith 
340bcc973b7SBarry Smith   PetscFunctionBegin;
341f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3423649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
344bfec09a0SHong Zhang 
345bcc973b7SBarry Smith   for (i=0; i<m; i++) {
346bfec09a0SHong Zhang     idx    = a->j + a->i[i];
347bfec09a0SHong Zhang     v      = a->a + a->i[i];
348bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
349bcc973b7SBarry Smith     alpha1 = x[2*i];
350bcc973b7SBarry Smith     alpha2 = x[2*i+1];
35126fbe8dcSKarl Rupp     while (n-->0) {
35226fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
35326fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
35426fbe8dcSKarl Rupp       idx++; v++;
35526fbe8dcSKarl Rupp     }
356bcc973b7SBarry Smith   }
357dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3583649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3591ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
360bcc973b7SBarry Smith   PetscFunctionReturn(0);
36182b1193eSBarry Smith }
362cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3634a2ae208SSatish Balay #undef __FUNCT__
364b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
365dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
366bcc973b7SBarry Smith {
3674cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
368bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
369d2840e09SBarry Smith   const PetscScalar *x,*v;
370d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
371dfbe8321SBarry Smith   PetscErrorCode    ierr;
372d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
373d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
37482b1193eSBarry Smith 
375bcc973b7SBarry Smith   PetscFunctionBegin;
3763649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3771ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
378bcc973b7SBarry Smith   idx  = a->j;
379bcc973b7SBarry Smith   v    = a->a;
380bcc973b7SBarry Smith   ii   = a->i;
381bcc973b7SBarry Smith 
382bcc973b7SBarry Smith   for (i=0; i<m; i++) {
383bcc973b7SBarry Smith     jrow  = ii[i];
384bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
385bcc973b7SBarry Smith     sum1  = 0.0;
386bcc973b7SBarry Smith     sum2  = 0.0;
387bcc973b7SBarry Smith     sum3  = 0.0;
38826fbe8dcSKarl Rupp 
389b3c51c66SMatthew Knepley     nonzerorow += (n>0);
390bcc973b7SBarry Smith     for (j=0; j<n; j++) {
391bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
392bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
393bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
394bcc973b7SBarry Smith       jrow++;
395bcc973b7SBarry Smith      }
396bcc973b7SBarry Smith     y[3*i]   = sum1;
397bcc973b7SBarry Smith     y[3*i+1] = sum2;
398bcc973b7SBarry Smith     y[3*i+2] = sum3;
399bcc973b7SBarry Smith   }
400bcc973b7SBarry Smith 
401dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
4023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4031ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
404bcc973b7SBarry Smith   PetscFunctionReturn(0);
405bcc973b7SBarry Smith }
406bcc973b7SBarry Smith 
4074a2ae208SSatish Balay #undef __FUNCT__
408b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
409dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
410bcc973b7SBarry Smith {
4114cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
412bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
413d2840e09SBarry Smith   const PetscScalar *x,*v;
414d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
415dfbe8321SBarry Smith   PetscErrorCode    ierr;
416d2840e09SBarry Smith   PetscInt          n,i;
417d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
418bcc973b7SBarry Smith 
419bcc973b7SBarry Smith   PetscFunctionBegin;
420d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
4213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4221ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
423bfec09a0SHong Zhang 
424bcc973b7SBarry Smith   for (i=0; i<m; i++) {
425bfec09a0SHong Zhang     idx    = a->j + a->i[i];
426bfec09a0SHong Zhang     v      = a->a + a->i[i];
427bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
428bcc973b7SBarry Smith     alpha1 = x[3*i];
429bcc973b7SBarry Smith     alpha2 = x[3*i+1];
430bcc973b7SBarry Smith     alpha3 = x[3*i+2];
431bcc973b7SBarry Smith     while (n-->0) {
432bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
433bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
434bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
435bcc973b7SBarry Smith       idx++; v++;
436bcc973b7SBarry Smith     }
437bcc973b7SBarry Smith   }
438dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4401ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
441bcc973b7SBarry Smith   PetscFunctionReturn(0);
442bcc973b7SBarry Smith }
443bcc973b7SBarry Smith 
4444a2ae208SSatish Balay #undef __FUNCT__
445b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
446dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
447bcc973b7SBarry Smith {
4484cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
449bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
450d2840e09SBarry Smith   const PetscScalar *x,*v;
451d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
452dfbe8321SBarry Smith   PetscErrorCode    ierr;
453b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
454d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
455bcc973b7SBarry Smith 
456bcc973b7SBarry Smith   PetscFunctionBegin;
457f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4583649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4591ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
460bcc973b7SBarry Smith   idx  = a->j;
461bcc973b7SBarry Smith   v    = a->a;
462bcc973b7SBarry Smith   ii   = a->i;
463bcc973b7SBarry Smith 
464bcc973b7SBarry Smith   for (i=0; i<m; i++) {
465bcc973b7SBarry Smith     jrow = ii[i];
466bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
467bcc973b7SBarry Smith     sum1 = 0.0;
468bcc973b7SBarry Smith     sum2 = 0.0;
469bcc973b7SBarry Smith     sum3 = 0.0;
470bcc973b7SBarry Smith     for (j=0; j<n; j++) {
471bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
472bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
473bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
474bcc973b7SBarry Smith       jrow++;
475bcc973b7SBarry Smith     }
476bcc973b7SBarry Smith     y[3*i]   += sum1;
477bcc973b7SBarry Smith     y[3*i+1] += sum2;
478bcc973b7SBarry Smith     y[3*i+2] += sum3;
479bcc973b7SBarry Smith   }
480bcc973b7SBarry Smith 
481dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4823649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4831ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
484bcc973b7SBarry Smith   PetscFunctionReturn(0);
485bcc973b7SBarry Smith }
4864a2ae208SSatish Balay #undef __FUNCT__
487b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
488dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
489bcc973b7SBarry Smith {
4904cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
491bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
492d2840e09SBarry Smith   const PetscScalar *x,*v;
493d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
494dfbe8321SBarry Smith   PetscErrorCode    ierr;
495d2840e09SBarry Smith   PetscInt          n,i;
496d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
497bcc973b7SBarry Smith 
498bcc973b7SBarry Smith   PetscFunctionBegin;
499f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5003649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5011ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
502bcc973b7SBarry Smith   for (i=0; i<m; i++) {
503bfec09a0SHong Zhang     idx    = a->j + a->i[i];
504bfec09a0SHong Zhang     v      = a->a + a->i[i];
505bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
506bcc973b7SBarry Smith     alpha1 = x[3*i];
507bcc973b7SBarry Smith     alpha2 = x[3*i+1];
508bcc973b7SBarry Smith     alpha3 = x[3*i+2];
509bcc973b7SBarry Smith     while (n-->0) {
510bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
511bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
512bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
513bcc973b7SBarry Smith       idx++; v++;
514bcc973b7SBarry Smith     }
515bcc973b7SBarry Smith   }
516dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
5173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
519bcc973b7SBarry Smith   PetscFunctionReturn(0);
520bcc973b7SBarry Smith }
521bcc973b7SBarry Smith 
522bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
5234a2ae208SSatish Balay #undef __FUNCT__
524b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
525dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
526bcc973b7SBarry Smith {
5274cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
528bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
529d2840e09SBarry Smith   const PetscScalar *x,*v;
530d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
531dfbe8321SBarry Smith   PetscErrorCode    ierr;
532d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
533d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
534bcc973b7SBarry Smith 
535bcc973b7SBarry Smith   PetscFunctionBegin;
5363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5371ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
538bcc973b7SBarry Smith   idx  = a->j;
539bcc973b7SBarry Smith   v    = a->a;
540bcc973b7SBarry Smith   ii   = a->i;
541bcc973b7SBarry Smith 
542bcc973b7SBarry Smith   for (i=0; i<m; i++) {
543bcc973b7SBarry Smith     jrow        = ii[i];
544bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
545bcc973b7SBarry Smith     sum1        = 0.0;
546bcc973b7SBarry Smith     sum2        = 0.0;
547bcc973b7SBarry Smith     sum3        = 0.0;
548bcc973b7SBarry Smith     sum4        = 0.0;
549b3c51c66SMatthew Knepley     nonzerorow += (n>0);
550bcc973b7SBarry Smith     for (j=0; j<n; j++) {
551bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
552bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
553bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
554bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
555bcc973b7SBarry Smith       jrow++;
556bcc973b7SBarry Smith     }
557bcc973b7SBarry Smith     y[4*i]   = sum1;
558bcc973b7SBarry Smith     y[4*i+1] = sum2;
559bcc973b7SBarry Smith     y[4*i+2] = sum3;
560bcc973b7SBarry Smith     y[4*i+3] = sum4;
561bcc973b7SBarry Smith   }
562bcc973b7SBarry Smith 
563dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5643649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5651ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
566bcc973b7SBarry Smith   PetscFunctionReturn(0);
567bcc973b7SBarry Smith }
568bcc973b7SBarry Smith 
5694a2ae208SSatish Balay #undef __FUNCT__
570b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
571dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
572bcc973b7SBarry Smith {
5734cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
574bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
575d2840e09SBarry Smith   const PetscScalar *x,*v;
576d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
577dfbe8321SBarry Smith   PetscErrorCode    ierr;
578d2840e09SBarry Smith   PetscInt          n,i;
579d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
580bcc973b7SBarry Smith 
581bcc973b7SBarry Smith   PetscFunctionBegin;
582d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5833649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5841ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
585bcc973b7SBarry Smith   for (i=0; i<m; i++) {
586bfec09a0SHong Zhang     idx    = a->j + a->i[i];
587bfec09a0SHong Zhang     v      = a->a + a->i[i];
588bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
589bcc973b7SBarry Smith     alpha1 = x[4*i];
590bcc973b7SBarry Smith     alpha2 = x[4*i+1];
591bcc973b7SBarry Smith     alpha3 = x[4*i+2];
592bcc973b7SBarry Smith     alpha4 = x[4*i+3];
593bcc973b7SBarry Smith     while (n-->0) {
594bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
595bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
596bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
597bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
598bcc973b7SBarry Smith       idx++; v++;
599bcc973b7SBarry Smith     }
600bcc973b7SBarry Smith   }
601dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6031ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
604bcc973b7SBarry Smith   PetscFunctionReturn(0);
605bcc973b7SBarry Smith }
606bcc973b7SBarry Smith 
6074a2ae208SSatish Balay #undef __FUNCT__
608b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
609dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
610bcc973b7SBarry Smith {
6114cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
612f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
613d2840e09SBarry Smith   const PetscScalar *x,*v;
614d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
615dfbe8321SBarry Smith   PetscErrorCode    ierr;
616b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
617d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
618f9fae5adSBarry Smith 
619f9fae5adSBarry Smith   PetscFunctionBegin;
620f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6221ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
623f9fae5adSBarry Smith   idx  = a->j;
624f9fae5adSBarry Smith   v    = a->a;
625f9fae5adSBarry Smith   ii   = a->i;
626f9fae5adSBarry Smith 
627f9fae5adSBarry Smith   for (i=0; i<m; i++) {
628f9fae5adSBarry Smith     jrow = ii[i];
629f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
630f9fae5adSBarry Smith     sum1 = 0.0;
631f9fae5adSBarry Smith     sum2 = 0.0;
632f9fae5adSBarry Smith     sum3 = 0.0;
633f9fae5adSBarry Smith     sum4 = 0.0;
634f9fae5adSBarry Smith     for (j=0; j<n; j++) {
635f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
636f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
637f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
638f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
639f9fae5adSBarry Smith       jrow++;
640f9fae5adSBarry Smith     }
641f9fae5adSBarry Smith     y[4*i]   += sum1;
642f9fae5adSBarry Smith     y[4*i+1] += sum2;
643f9fae5adSBarry Smith     y[4*i+2] += sum3;
644f9fae5adSBarry Smith     y[4*i+3] += sum4;
645f9fae5adSBarry Smith   }
646f9fae5adSBarry Smith 
647dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6483649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6491ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
650f9fae5adSBarry Smith   PetscFunctionReturn(0);
651bcc973b7SBarry Smith }
6524a2ae208SSatish Balay #undef __FUNCT__
653b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
654dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
655bcc973b7SBarry Smith {
6564cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658d2840e09SBarry Smith   const PetscScalar *x,*v;
659d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
660dfbe8321SBarry Smith   PetscErrorCode    ierr;
661d2840e09SBarry Smith   PetscInt          n,i;
662d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
663f9fae5adSBarry Smith 
664f9fae5adSBarry Smith   PetscFunctionBegin;
665f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6663649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6671ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
668bfec09a0SHong Zhang 
669f9fae5adSBarry Smith   for (i=0; i<m; i++) {
670bfec09a0SHong Zhang     idx    = a->j + a->i[i];
671bfec09a0SHong Zhang     v      = a->a + a->i[i];
672f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
673f9fae5adSBarry Smith     alpha1 = x[4*i];
674f9fae5adSBarry Smith     alpha2 = x[4*i+1];
675f9fae5adSBarry Smith     alpha3 = x[4*i+2];
676f9fae5adSBarry Smith     alpha4 = x[4*i+3];
677f9fae5adSBarry Smith     while (n-->0) {
678f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
679f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
680f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
681f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
682f9fae5adSBarry Smith       idx++; v++;
683f9fae5adSBarry Smith     }
684f9fae5adSBarry Smith   }
685dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6863649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6871ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
688f9fae5adSBarry Smith   PetscFunctionReturn(0);
689f9fae5adSBarry Smith }
690f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6916cd98798SBarry Smith 
6924a2ae208SSatish Balay #undef __FUNCT__
693b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
694dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
695f9fae5adSBarry Smith {
6964cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
697f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
698d2840e09SBarry Smith   const PetscScalar *x,*v;
699d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
700dfbe8321SBarry Smith   PetscErrorCode    ierr;
701d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
702d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
703f9fae5adSBarry Smith 
704f9fae5adSBarry Smith   PetscFunctionBegin;
7053649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7061ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
707f9fae5adSBarry Smith   idx  = a->j;
708f9fae5adSBarry Smith   v    = a->a;
709f9fae5adSBarry Smith   ii   = a->i;
710f9fae5adSBarry Smith 
711f9fae5adSBarry Smith   for (i=0; i<m; i++) {
712f9fae5adSBarry Smith     jrow  = ii[i];
713f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
714f9fae5adSBarry Smith     sum1  = 0.0;
715f9fae5adSBarry Smith     sum2  = 0.0;
716f9fae5adSBarry Smith     sum3  = 0.0;
717f9fae5adSBarry Smith     sum4  = 0.0;
718f9fae5adSBarry Smith     sum5  = 0.0;
71926fbe8dcSKarl Rupp 
720b3c51c66SMatthew Knepley     nonzerorow += (n>0);
721f9fae5adSBarry Smith     for (j=0; j<n; j++) {
722f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
723f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
724f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
725f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
726f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
727f9fae5adSBarry Smith       jrow++;
728f9fae5adSBarry Smith     }
729f9fae5adSBarry Smith     y[5*i]   = sum1;
730f9fae5adSBarry Smith     y[5*i+1] = sum2;
731f9fae5adSBarry Smith     y[5*i+2] = sum3;
732f9fae5adSBarry Smith     y[5*i+3] = sum4;
733f9fae5adSBarry Smith     y[5*i+4] = sum5;
734f9fae5adSBarry Smith   }
735f9fae5adSBarry Smith 
736dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
7373649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7381ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
739f9fae5adSBarry Smith   PetscFunctionReturn(0);
740f9fae5adSBarry Smith }
741f9fae5adSBarry Smith 
7424a2ae208SSatish Balay #undef __FUNCT__
743b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
744dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
745f9fae5adSBarry Smith {
7464cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
747f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
748d2840e09SBarry Smith   const PetscScalar *x,*v;
749d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
750dfbe8321SBarry Smith   PetscErrorCode    ierr;
751d2840e09SBarry Smith   PetscInt          n,i;
752d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
753f9fae5adSBarry Smith 
754f9fae5adSBarry Smith   PetscFunctionBegin;
755d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7563649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7571ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
758bfec09a0SHong Zhang 
759f9fae5adSBarry Smith   for (i=0; i<m; i++) {
760bfec09a0SHong Zhang     idx    = a->j + a->i[i];
761bfec09a0SHong Zhang     v      = a->a + a->i[i];
762f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
763f9fae5adSBarry Smith     alpha1 = x[5*i];
764f9fae5adSBarry Smith     alpha2 = x[5*i+1];
765f9fae5adSBarry Smith     alpha3 = x[5*i+2];
766f9fae5adSBarry Smith     alpha4 = x[5*i+3];
767f9fae5adSBarry Smith     alpha5 = x[5*i+4];
768f9fae5adSBarry Smith     while (n-->0) {
769f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
770f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
771f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
772f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
773f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
774f9fae5adSBarry Smith       idx++; v++;
775f9fae5adSBarry Smith     }
776f9fae5adSBarry Smith   }
777dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7783649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
780f9fae5adSBarry Smith   PetscFunctionReturn(0);
781f9fae5adSBarry Smith }
782f9fae5adSBarry Smith 
7834a2ae208SSatish Balay #undef __FUNCT__
784b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
785dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
786f9fae5adSBarry Smith {
7874cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
788f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
789d2840e09SBarry Smith   const PetscScalar *x,*v;
790d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
791dfbe8321SBarry Smith   PetscErrorCode    ierr;
792b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
793d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
794f9fae5adSBarry Smith 
795f9fae5adSBarry Smith   PetscFunctionBegin;
796f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7973649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
799f9fae5adSBarry Smith   idx  = a->j;
800f9fae5adSBarry Smith   v    = a->a;
801f9fae5adSBarry Smith   ii   = a->i;
802f9fae5adSBarry Smith 
803f9fae5adSBarry Smith   for (i=0; i<m; i++) {
804f9fae5adSBarry Smith     jrow = ii[i];
805f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
806f9fae5adSBarry Smith     sum1 = 0.0;
807f9fae5adSBarry Smith     sum2 = 0.0;
808f9fae5adSBarry Smith     sum3 = 0.0;
809f9fae5adSBarry Smith     sum4 = 0.0;
810f9fae5adSBarry Smith     sum5 = 0.0;
811f9fae5adSBarry Smith     for (j=0; j<n; j++) {
812f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
813f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
814f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
815f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
816f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
817f9fae5adSBarry Smith       jrow++;
818f9fae5adSBarry Smith     }
819f9fae5adSBarry Smith     y[5*i]   += sum1;
820f9fae5adSBarry Smith     y[5*i+1] += sum2;
821f9fae5adSBarry Smith     y[5*i+2] += sum3;
822f9fae5adSBarry Smith     y[5*i+3] += sum4;
823f9fae5adSBarry Smith     y[5*i+4] += sum5;
824f9fae5adSBarry Smith   }
825f9fae5adSBarry Smith 
826dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8273649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8281ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
829f9fae5adSBarry Smith   PetscFunctionReturn(0);
830f9fae5adSBarry Smith }
831f9fae5adSBarry Smith 
8324a2ae208SSatish Balay #undef __FUNCT__
833b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
834dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
835f9fae5adSBarry Smith {
8364cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
837f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
838d2840e09SBarry Smith   const PetscScalar *x,*v;
839d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
840dfbe8321SBarry Smith   PetscErrorCode    ierr;
841d2840e09SBarry Smith   PetscInt          n,i;
842d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
843f9fae5adSBarry Smith 
844f9fae5adSBarry Smith   PetscFunctionBegin;
845f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8463649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
848bfec09a0SHong Zhang 
849f9fae5adSBarry Smith   for (i=0; i<m; i++) {
850bfec09a0SHong Zhang     idx    = a->j + a->i[i];
851bfec09a0SHong Zhang     v      = a->a + a->i[i];
852f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
853f9fae5adSBarry Smith     alpha1 = x[5*i];
854f9fae5adSBarry Smith     alpha2 = x[5*i+1];
855f9fae5adSBarry Smith     alpha3 = x[5*i+2];
856f9fae5adSBarry Smith     alpha4 = x[5*i+3];
857f9fae5adSBarry Smith     alpha5 = x[5*i+4];
858f9fae5adSBarry Smith     while (n-->0) {
859f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
860f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
861f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
862f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
863f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
864f9fae5adSBarry Smith       idx++; v++;
865f9fae5adSBarry Smith     }
866f9fae5adSBarry Smith   }
867dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8691ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
870f9fae5adSBarry Smith   PetscFunctionReturn(0);
871bcc973b7SBarry Smith }
872bcc973b7SBarry Smith 
8736cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8744a2ae208SSatish Balay #undef __FUNCT__
875b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
876dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8776cd98798SBarry Smith {
8786cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8796cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
880d2840e09SBarry Smith   const PetscScalar *x,*v;
881d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
882dfbe8321SBarry Smith   PetscErrorCode    ierr;
883d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
884d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8856cd98798SBarry Smith 
8866cd98798SBarry Smith   PetscFunctionBegin;
8873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8881ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8896cd98798SBarry Smith   idx  = a->j;
8906cd98798SBarry Smith   v    = a->a;
8916cd98798SBarry Smith   ii   = a->i;
8926cd98798SBarry Smith 
8936cd98798SBarry Smith   for (i=0; i<m; i++) {
8946cd98798SBarry Smith     jrow  = ii[i];
8956cd98798SBarry Smith     n     = ii[i+1] - jrow;
8966cd98798SBarry Smith     sum1  = 0.0;
8976cd98798SBarry Smith     sum2  = 0.0;
8986cd98798SBarry Smith     sum3  = 0.0;
8996cd98798SBarry Smith     sum4  = 0.0;
9006cd98798SBarry Smith     sum5  = 0.0;
9016cd98798SBarry Smith     sum6  = 0.0;
90226fbe8dcSKarl Rupp 
903b3c51c66SMatthew Knepley     nonzerorow += (n>0);
9046cd98798SBarry Smith     for (j=0; j<n; j++) {
9056cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9066cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9076cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9086cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9096cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9106cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9116cd98798SBarry Smith       jrow++;
9126cd98798SBarry Smith     }
9136cd98798SBarry Smith     y[6*i]   = sum1;
9146cd98798SBarry Smith     y[6*i+1] = sum2;
9156cd98798SBarry Smith     y[6*i+2] = sum3;
9166cd98798SBarry Smith     y[6*i+3] = sum4;
9176cd98798SBarry Smith     y[6*i+4] = sum5;
9186cd98798SBarry Smith     y[6*i+5] = sum6;
9196cd98798SBarry Smith   }
9206cd98798SBarry Smith 
921dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
9223649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9231ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9246cd98798SBarry Smith   PetscFunctionReturn(0);
9256cd98798SBarry Smith }
9266cd98798SBarry Smith 
9274a2ae208SSatish Balay #undef __FUNCT__
928b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
929dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
9306cd98798SBarry Smith {
9316cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9326cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
933d2840e09SBarry Smith   const PetscScalar *x,*v;
934d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
935dfbe8321SBarry Smith   PetscErrorCode    ierr;
936d2840e09SBarry Smith   PetscInt          n,i;
937d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9386cd98798SBarry Smith 
9396cd98798SBarry Smith   PetscFunctionBegin;
940d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
9413649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9421ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
943bfec09a0SHong Zhang 
9446cd98798SBarry Smith   for (i=0; i<m; i++) {
945bfec09a0SHong Zhang     idx    = a->j + a->i[i];
946bfec09a0SHong Zhang     v      = a->a + a->i[i];
9476cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9486cd98798SBarry Smith     alpha1 = x[6*i];
9496cd98798SBarry Smith     alpha2 = x[6*i+1];
9506cd98798SBarry Smith     alpha3 = x[6*i+2];
9516cd98798SBarry Smith     alpha4 = x[6*i+3];
9526cd98798SBarry Smith     alpha5 = x[6*i+4];
9536cd98798SBarry Smith     alpha6 = x[6*i+5];
9546cd98798SBarry Smith     while (n-->0) {
9556cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9566cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9576cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9586cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9596cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9606cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9616cd98798SBarry Smith       idx++; v++;
9626cd98798SBarry Smith     }
9636cd98798SBarry Smith   }
964dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9661ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9676cd98798SBarry Smith   PetscFunctionReturn(0);
9686cd98798SBarry Smith }
9696cd98798SBarry Smith 
9704a2ae208SSatish Balay #undef __FUNCT__
971b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
972dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9736cd98798SBarry Smith {
9746cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9756cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
976d2840e09SBarry Smith   const PetscScalar *x,*v;
977d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
978dfbe8321SBarry Smith   PetscErrorCode    ierr;
979b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
980d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9816cd98798SBarry Smith 
9826cd98798SBarry Smith   PetscFunctionBegin;
9836cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9866cd98798SBarry Smith   idx  = a->j;
9876cd98798SBarry Smith   v    = a->a;
9886cd98798SBarry Smith   ii   = a->i;
9896cd98798SBarry Smith 
9906cd98798SBarry Smith   for (i=0; i<m; i++) {
9916cd98798SBarry Smith     jrow = ii[i];
9926cd98798SBarry Smith     n    = ii[i+1] - jrow;
9936cd98798SBarry Smith     sum1 = 0.0;
9946cd98798SBarry Smith     sum2 = 0.0;
9956cd98798SBarry Smith     sum3 = 0.0;
9966cd98798SBarry Smith     sum4 = 0.0;
9976cd98798SBarry Smith     sum5 = 0.0;
9986cd98798SBarry Smith     sum6 = 0.0;
9996cd98798SBarry Smith     for (j=0; j<n; j++) {
10006cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
10016cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
10026cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
10036cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
10046cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
10056cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
10066cd98798SBarry Smith       jrow++;
10076cd98798SBarry Smith     }
10086cd98798SBarry Smith     y[6*i]   += sum1;
10096cd98798SBarry Smith     y[6*i+1] += sum2;
10106cd98798SBarry Smith     y[6*i+2] += sum3;
10116cd98798SBarry Smith     y[6*i+3] += sum4;
10126cd98798SBarry Smith     y[6*i+4] += sum5;
10136cd98798SBarry Smith     y[6*i+5] += sum6;
10146cd98798SBarry Smith   }
10156cd98798SBarry Smith 
1016dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10196cd98798SBarry Smith   PetscFunctionReturn(0);
10206cd98798SBarry Smith }
10216cd98798SBarry Smith 
10224a2ae208SSatish Balay #undef __FUNCT__
1023b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1024dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
10256cd98798SBarry Smith {
10266cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
10276cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1028d2840e09SBarry Smith   const PetscScalar *x,*v;
1029d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1030dfbe8321SBarry Smith   PetscErrorCode    ierr;
1031d2840e09SBarry Smith   PetscInt          n,i;
1032d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
10336cd98798SBarry Smith 
10346cd98798SBarry Smith   PetscFunctionBegin;
10356cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
10371ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1038bfec09a0SHong Zhang 
10396cd98798SBarry Smith   for (i=0; i<m; i++) {
1040bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1041bfec09a0SHong Zhang     v      = a->a + a->i[i];
10426cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
10436cd98798SBarry Smith     alpha1 = x[6*i];
10446cd98798SBarry Smith     alpha2 = x[6*i+1];
10456cd98798SBarry Smith     alpha3 = x[6*i+2];
10466cd98798SBarry Smith     alpha4 = x[6*i+3];
10476cd98798SBarry Smith     alpha5 = x[6*i+4];
10486cd98798SBarry Smith     alpha6 = x[6*i+5];
10496cd98798SBarry Smith     while (n-->0) {
10506cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10516cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10526cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10536cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10546cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10556cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10566cd98798SBarry Smith       idx++; v++;
10576cd98798SBarry Smith     }
10586cd98798SBarry Smith   }
1059dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10603649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10611ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10626cd98798SBarry Smith   PetscFunctionReturn(0);
10636cd98798SBarry Smith }
10646cd98798SBarry Smith 
106566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
106666ed3db0SBarry Smith #undef __FUNCT__
1067ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
1068ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1069ed8eea03SBarry Smith {
1070ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1071ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1072d2840e09SBarry Smith   const PetscScalar *x,*v;
1073d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1074ed8eea03SBarry Smith   PetscErrorCode    ierr;
1075d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1076d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1077ed8eea03SBarry Smith 
1078ed8eea03SBarry Smith   PetscFunctionBegin;
10793649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1080ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1081ed8eea03SBarry Smith   idx  = a->j;
1082ed8eea03SBarry Smith   v    = a->a;
1083ed8eea03SBarry Smith   ii   = a->i;
1084ed8eea03SBarry Smith 
1085ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1086ed8eea03SBarry Smith     jrow  = ii[i];
1087ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1088ed8eea03SBarry Smith     sum1  = 0.0;
1089ed8eea03SBarry Smith     sum2  = 0.0;
1090ed8eea03SBarry Smith     sum3  = 0.0;
1091ed8eea03SBarry Smith     sum4  = 0.0;
1092ed8eea03SBarry Smith     sum5  = 0.0;
1093ed8eea03SBarry Smith     sum6  = 0.0;
1094ed8eea03SBarry Smith     sum7  = 0.0;
109526fbe8dcSKarl Rupp 
1096b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1097ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1098ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1099ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1100ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1101ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1102ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1103ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1104ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1105ed8eea03SBarry Smith       jrow++;
1106ed8eea03SBarry Smith     }
1107ed8eea03SBarry Smith     y[7*i]   = sum1;
1108ed8eea03SBarry Smith     y[7*i+1] = sum2;
1109ed8eea03SBarry Smith     y[7*i+2] = sum3;
1110ed8eea03SBarry Smith     y[7*i+3] = sum4;
1111ed8eea03SBarry Smith     y[7*i+4] = sum5;
1112ed8eea03SBarry Smith     y[7*i+5] = sum6;
1113ed8eea03SBarry Smith     y[7*i+6] = sum7;
1114ed8eea03SBarry Smith   }
1115ed8eea03SBarry Smith 
1116dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
11173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1118ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1119ed8eea03SBarry Smith   PetscFunctionReturn(0);
1120ed8eea03SBarry Smith }
1121ed8eea03SBarry Smith 
1122ed8eea03SBarry Smith #undef __FUNCT__
1123ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1124ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1125ed8eea03SBarry Smith {
1126ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1127ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1128d2840e09SBarry Smith   const PetscScalar *x,*v;
1129d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1130ed8eea03SBarry Smith   PetscErrorCode    ierr;
1131d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1132d2840e09SBarry Smith   PetscInt          n,i;
1133ed8eea03SBarry Smith 
1134ed8eea03SBarry Smith   PetscFunctionBegin;
1135d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
11363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1137ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1138ed8eea03SBarry Smith 
1139ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1140ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1141ed8eea03SBarry Smith     v      = a->a + a->i[i];
1142ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1143ed8eea03SBarry Smith     alpha1 = x[7*i];
1144ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1145ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1146ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1147ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1148ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1149ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1150ed8eea03SBarry Smith     while (n-->0) {
1151ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1152ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1153ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1154ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1155ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1156ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1157ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1158ed8eea03SBarry Smith       idx++; v++;
1159ed8eea03SBarry Smith     }
1160ed8eea03SBarry Smith   }
1161dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11623649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1163ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1164ed8eea03SBarry Smith   PetscFunctionReturn(0);
1165ed8eea03SBarry Smith }
1166ed8eea03SBarry Smith 
1167ed8eea03SBarry Smith #undef __FUNCT__
1168ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1169ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1170ed8eea03SBarry Smith {
1171ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1172ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1173d2840e09SBarry Smith   const PetscScalar *x,*v;
1174d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1175ed8eea03SBarry Smith   PetscErrorCode    ierr;
1176d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1177ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1178ed8eea03SBarry Smith 
1179ed8eea03SBarry Smith   PetscFunctionBegin;
1180ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11813649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1182ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1183ed8eea03SBarry Smith   idx  = a->j;
1184ed8eea03SBarry Smith   v    = a->a;
1185ed8eea03SBarry Smith   ii   = a->i;
1186ed8eea03SBarry Smith 
1187ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1188ed8eea03SBarry Smith     jrow = ii[i];
1189ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1190ed8eea03SBarry Smith     sum1 = 0.0;
1191ed8eea03SBarry Smith     sum2 = 0.0;
1192ed8eea03SBarry Smith     sum3 = 0.0;
1193ed8eea03SBarry Smith     sum4 = 0.0;
1194ed8eea03SBarry Smith     sum5 = 0.0;
1195ed8eea03SBarry Smith     sum6 = 0.0;
1196ed8eea03SBarry Smith     sum7 = 0.0;
1197ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1198ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1199ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1200ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1201ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1202ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1203ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1204ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1205ed8eea03SBarry Smith       jrow++;
1206ed8eea03SBarry Smith     }
1207ed8eea03SBarry Smith     y[7*i]   += sum1;
1208ed8eea03SBarry Smith     y[7*i+1] += sum2;
1209ed8eea03SBarry Smith     y[7*i+2] += sum3;
1210ed8eea03SBarry Smith     y[7*i+3] += sum4;
1211ed8eea03SBarry Smith     y[7*i+4] += sum5;
1212ed8eea03SBarry Smith     y[7*i+5] += sum6;
1213ed8eea03SBarry Smith     y[7*i+6] += sum7;
1214ed8eea03SBarry Smith   }
1215ed8eea03SBarry Smith 
1216dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1218ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1219ed8eea03SBarry Smith   PetscFunctionReturn(0);
1220ed8eea03SBarry Smith }
1221ed8eea03SBarry Smith 
1222ed8eea03SBarry Smith #undef __FUNCT__
1223ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1224ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1225ed8eea03SBarry Smith {
1226ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1227ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1228d2840e09SBarry Smith   const PetscScalar *x,*v;
1229d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1230ed8eea03SBarry Smith   PetscErrorCode    ierr;
1231d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1232d2840e09SBarry Smith   PetscInt          n,i;
1233ed8eea03SBarry Smith 
1234ed8eea03SBarry Smith   PetscFunctionBegin;
1235ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1237ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1238ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1239ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1240ed8eea03SBarry Smith     v      = a->a + a->i[i];
1241ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1242ed8eea03SBarry Smith     alpha1 = x[7*i];
1243ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1244ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1245ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1246ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1247ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1248ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1249ed8eea03SBarry Smith     while (n-->0) {
1250ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1251ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1252ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1253ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1254ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1255ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1256ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1257ed8eea03SBarry Smith       idx++; v++;
1258ed8eea03SBarry Smith     }
1259ed8eea03SBarry Smith   }
1260dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12613649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1262ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1263ed8eea03SBarry Smith   PetscFunctionReturn(0);
1264ed8eea03SBarry Smith }
1265ed8eea03SBarry Smith 
1266ed8eea03SBarry Smith #undef __FUNCT__
126766ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1268dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
126966ed3db0SBarry Smith {
127066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
127166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1272d2840e09SBarry Smith   const PetscScalar *x,*v;
1273d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1274dfbe8321SBarry Smith   PetscErrorCode    ierr;
1275d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1276d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
127766ed3db0SBarry Smith 
127866ed3db0SBarry Smith   PetscFunctionBegin;
12793649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12801ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
128166ed3db0SBarry Smith   idx  = a->j;
128266ed3db0SBarry Smith   v    = a->a;
128366ed3db0SBarry Smith   ii   = a->i;
128466ed3db0SBarry Smith 
128566ed3db0SBarry Smith   for (i=0; i<m; i++) {
128666ed3db0SBarry Smith     jrow  = ii[i];
128766ed3db0SBarry Smith     n     = ii[i+1] - jrow;
128866ed3db0SBarry Smith     sum1  = 0.0;
128966ed3db0SBarry Smith     sum2  = 0.0;
129066ed3db0SBarry Smith     sum3  = 0.0;
129166ed3db0SBarry Smith     sum4  = 0.0;
129266ed3db0SBarry Smith     sum5  = 0.0;
129366ed3db0SBarry Smith     sum6  = 0.0;
129466ed3db0SBarry Smith     sum7  = 0.0;
129566ed3db0SBarry Smith     sum8  = 0.0;
129626fbe8dcSKarl Rupp 
1297b3c51c66SMatthew Knepley     nonzerorow += (n>0);
129866ed3db0SBarry Smith     for (j=0; j<n; j++) {
129966ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
130066ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
130166ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
130266ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
130366ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
130466ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
130566ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
130666ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
130766ed3db0SBarry Smith       jrow++;
130866ed3db0SBarry Smith     }
130966ed3db0SBarry Smith     y[8*i]   = sum1;
131066ed3db0SBarry Smith     y[8*i+1] = sum2;
131166ed3db0SBarry Smith     y[8*i+2] = sum3;
131266ed3db0SBarry Smith     y[8*i+3] = sum4;
131366ed3db0SBarry Smith     y[8*i+4] = sum5;
131466ed3db0SBarry Smith     y[8*i+5] = sum6;
131566ed3db0SBarry Smith     y[8*i+6] = sum7;
131666ed3db0SBarry Smith     y[8*i+7] = sum8;
131766ed3db0SBarry Smith   }
131866ed3db0SBarry Smith 
1319dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
13203649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13211ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
132266ed3db0SBarry Smith   PetscFunctionReturn(0);
132366ed3db0SBarry Smith }
132466ed3db0SBarry Smith 
132566ed3db0SBarry Smith #undef __FUNCT__
132666ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1327dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
132866ed3db0SBarry Smith {
132966ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
133066ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1331d2840e09SBarry Smith   const PetscScalar *x,*v;
1332d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1333dfbe8321SBarry Smith   PetscErrorCode    ierr;
1334d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1335d2840e09SBarry Smith   PetscInt          n,i;
133666ed3db0SBarry Smith 
133766ed3db0SBarry Smith   PetscFunctionBegin;
1338d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
13393649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13401ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1341bfec09a0SHong Zhang 
134266ed3db0SBarry Smith   for (i=0; i<m; i++) {
1343bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1344bfec09a0SHong Zhang     v      = a->a + a->i[i];
134566ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
134666ed3db0SBarry Smith     alpha1 = x[8*i];
134766ed3db0SBarry Smith     alpha2 = x[8*i+1];
134866ed3db0SBarry Smith     alpha3 = x[8*i+2];
134966ed3db0SBarry Smith     alpha4 = x[8*i+3];
135066ed3db0SBarry Smith     alpha5 = x[8*i+4];
135166ed3db0SBarry Smith     alpha6 = x[8*i+5];
135266ed3db0SBarry Smith     alpha7 = x[8*i+6];
135366ed3db0SBarry Smith     alpha8 = x[8*i+7];
135466ed3db0SBarry Smith     while (n-->0) {
135566ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
135666ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
135766ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
135866ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
135966ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
136066ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
136166ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
136266ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
136366ed3db0SBarry Smith       idx++; v++;
136466ed3db0SBarry Smith     }
136566ed3db0SBarry Smith   }
1366dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13681ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
136966ed3db0SBarry Smith   PetscFunctionReturn(0);
137066ed3db0SBarry Smith }
137166ed3db0SBarry Smith 
137266ed3db0SBarry Smith #undef __FUNCT__
137366ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1374dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
137566ed3db0SBarry Smith {
137666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
137766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1378d2840e09SBarry Smith   const PetscScalar *x,*v;
1379d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1380dfbe8321SBarry Smith   PetscErrorCode    ierr;
1381d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1382b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
138366ed3db0SBarry Smith 
138466ed3db0SBarry Smith   PetscFunctionBegin;
138566ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
138866ed3db0SBarry Smith   idx  = a->j;
138966ed3db0SBarry Smith   v    = a->a;
139066ed3db0SBarry Smith   ii   = a->i;
139166ed3db0SBarry Smith 
139266ed3db0SBarry Smith   for (i=0; i<m; i++) {
139366ed3db0SBarry Smith     jrow = ii[i];
139466ed3db0SBarry Smith     n    = ii[i+1] - jrow;
139566ed3db0SBarry Smith     sum1 = 0.0;
139666ed3db0SBarry Smith     sum2 = 0.0;
139766ed3db0SBarry Smith     sum3 = 0.0;
139866ed3db0SBarry Smith     sum4 = 0.0;
139966ed3db0SBarry Smith     sum5 = 0.0;
140066ed3db0SBarry Smith     sum6 = 0.0;
140166ed3db0SBarry Smith     sum7 = 0.0;
140266ed3db0SBarry Smith     sum8 = 0.0;
140366ed3db0SBarry Smith     for (j=0; j<n; j++) {
140466ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
140566ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
140666ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
140766ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
140866ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
140966ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
141066ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
141166ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
141266ed3db0SBarry Smith       jrow++;
141366ed3db0SBarry Smith     }
141466ed3db0SBarry Smith     y[8*i]   += sum1;
141566ed3db0SBarry Smith     y[8*i+1] += sum2;
141666ed3db0SBarry Smith     y[8*i+2] += sum3;
141766ed3db0SBarry Smith     y[8*i+3] += sum4;
141866ed3db0SBarry Smith     y[8*i+4] += sum5;
141966ed3db0SBarry Smith     y[8*i+5] += sum6;
142066ed3db0SBarry Smith     y[8*i+6] += sum7;
142166ed3db0SBarry Smith     y[8*i+7] += sum8;
142266ed3db0SBarry Smith   }
142366ed3db0SBarry Smith 
1424dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14253649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14261ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
142766ed3db0SBarry Smith   PetscFunctionReturn(0);
142866ed3db0SBarry Smith }
142966ed3db0SBarry Smith 
143066ed3db0SBarry Smith #undef __FUNCT__
143166ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1432dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
143366ed3db0SBarry Smith {
143466ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
143566ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1436d2840e09SBarry Smith   const PetscScalar *x,*v;
1437d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1438dfbe8321SBarry Smith   PetscErrorCode    ierr;
1439d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1440d2840e09SBarry Smith   PetscInt          n,i;
144166ed3db0SBarry Smith 
144266ed3db0SBarry Smith   PetscFunctionBegin;
144366ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14443649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14451ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
144666ed3db0SBarry Smith   for (i=0; i<m; i++) {
1447bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1448bfec09a0SHong Zhang     v      = a->a + a->i[i];
144966ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
145066ed3db0SBarry Smith     alpha1 = x[8*i];
145166ed3db0SBarry Smith     alpha2 = x[8*i+1];
145266ed3db0SBarry Smith     alpha3 = x[8*i+2];
145366ed3db0SBarry Smith     alpha4 = x[8*i+3];
145466ed3db0SBarry Smith     alpha5 = x[8*i+4];
145566ed3db0SBarry Smith     alpha6 = x[8*i+5];
145666ed3db0SBarry Smith     alpha7 = x[8*i+6];
145766ed3db0SBarry Smith     alpha8 = x[8*i+7];
145866ed3db0SBarry Smith     while (n-->0) {
145966ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
146066ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
146166ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
146266ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
146366ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
146466ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
146566ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
146666ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
146766ed3db0SBarry Smith       idx++; v++;
146866ed3db0SBarry Smith     }
146966ed3db0SBarry Smith   }
1470dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14713649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14721ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14732f7816d4SBarry Smith   PetscFunctionReturn(0);
14742f7816d4SBarry Smith }
14752f7816d4SBarry Smith 
14762b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
14772b692628SMatthew Knepley #undef __FUNCT__
14782b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1479dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14802b692628SMatthew Knepley {
14812b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14822b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1483d2840e09SBarry Smith   const PetscScalar *x,*v;
1484d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1485dfbe8321SBarry Smith   PetscErrorCode    ierr;
1486d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1487d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14882b692628SMatthew Knepley 
14892b692628SMatthew Knepley   PetscFunctionBegin;
14903649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14911ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14922b692628SMatthew Knepley   idx  = a->j;
14932b692628SMatthew Knepley   v    = a->a;
14942b692628SMatthew Knepley   ii   = a->i;
14952b692628SMatthew Knepley 
14962b692628SMatthew Knepley   for (i=0; i<m; i++) {
14972b692628SMatthew Knepley     jrow  = ii[i];
14982b692628SMatthew Knepley     n     = ii[i+1] - jrow;
14992b692628SMatthew Knepley     sum1  = 0.0;
15002b692628SMatthew Knepley     sum2  = 0.0;
15012b692628SMatthew Knepley     sum3  = 0.0;
15022b692628SMatthew Knepley     sum4  = 0.0;
15032b692628SMatthew Knepley     sum5  = 0.0;
15042b692628SMatthew Knepley     sum6  = 0.0;
15052b692628SMatthew Knepley     sum7  = 0.0;
15062b692628SMatthew Knepley     sum8  = 0.0;
15072b692628SMatthew Knepley     sum9  = 0.0;
150826fbe8dcSKarl Rupp 
1509b3c51c66SMatthew Knepley     nonzerorow += (n>0);
15102b692628SMatthew Knepley     for (j=0; j<n; j++) {
15112b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15122b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15132b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15142b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15152b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15162b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15172b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15182b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15192b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15202b692628SMatthew Knepley       jrow++;
15212b692628SMatthew Knepley     }
15222b692628SMatthew Knepley     y[9*i]   = sum1;
15232b692628SMatthew Knepley     y[9*i+1] = sum2;
15242b692628SMatthew Knepley     y[9*i+2] = sum3;
15252b692628SMatthew Knepley     y[9*i+3] = sum4;
15262b692628SMatthew Knepley     y[9*i+4] = sum5;
15272b692628SMatthew Knepley     y[9*i+5] = sum6;
15282b692628SMatthew Knepley     y[9*i+6] = sum7;
15292b692628SMatthew Knepley     y[9*i+7] = sum8;
15302b692628SMatthew Knepley     y[9*i+8] = sum9;
15312b692628SMatthew Knepley   }
15322b692628SMatthew Knepley 
1533dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
15343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15351ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15362b692628SMatthew Knepley   PetscFunctionReturn(0);
15372b692628SMatthew Knepley }
15382b692628SMatthew Knepley 
1539b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1540b9cda22cSBarry Smith 
15412b692628SMatthew Knepley #undef __FUNCT__
15422b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1543dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
15442b692628SMatthew Knepley {
15452b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15462b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1547d2840e09SBarry Smith   const PetscScalar *x,*v;
1548d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1549dfbe8321SBarry Smith   PetscErrorCode    ierr;
1550d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1551d2840e09SBarry Smith   PetscInt          n,i;
15522b692628SMatthew Knepley 
15532b692628SMatthew Knepley   PetscFunctionBegin;
1554d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
15553649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15561ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15572b692628SMatthew Knepley 
15582b692628SMatthew Knepley   for (i=0; i<m; i++) {
15592b692628SMatthew Knepley     idx    = a->j + a->i[i];
15602b692628SMatthew Knepley     v      = a->a + a->i[i];
15612b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15622b692628SMatthew Knepley     alpha1 = x[9*i];
15632b692628SMatthew Knepley     alpha2 = x[9*i+1];
15642b692628SMatthew Knepley     alpha3 = x[9*i+2];
15652b692628SMatthew Knepley     alpha4 = x[9*i+3];
15662b692628SMatthew Knepley     alpha5 = x[9*i+4];
15672b692628SMatthew Knepley     alpha6 = x[9*i+5];
15682b692628SMatthew Knepley     alpha7 = x[9*i+6];
15692b692628SMatthew Knepley     alpha8 = x[9*i+7];
15702f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15712b692628SMatthew Knepley     while (n-->0) {
15722b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15732b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15742b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15752b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15762b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15772b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15782b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15792b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15802b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15812b692628SMatthew Knepley       idx++; v++;
15822b692628SMatthew Knepley     }
15832b692628SMatthew Knepley   }
1584dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15853649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15861ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15872b692628SMatthew Knepley   PetscFunctionReturn(0);
15882b692628SMatthew Knepley }
15892b692628SMatthew Knepley 
15902b692628SMatthew Knepley #undef __FUNCT__
15912b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1592dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15932b692628SMatthew Knepley {
15942b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15952b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1596d2840e09SBarry Smith   const PetscScalar *x,*v;
1597d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1598dfbe8321SBarry Smith   PetscErrorCode    ierr;
1599d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1600b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
16012b692628SMatthew Knepley 
16022b692628SMatthew Knepley   PetscFunctionBegin;
16032b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16043649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
16051ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16062b692628SMatthew Knepley   idx  = a->j;
16072b692628SMatthew Knepley   v    = a->a;
16082b692628SMatthew Knepley   ii   = a->i;
16092b692628SMatthew Knepley 
16102b692628SMatthew Knepley   for (i=0; i<m; i++) {
16112b692628SMatthew Knepley     jrow = ii[i];
16122b692628SMatthew Knepley     n    = ii[i+1] - jrow;
16132b692628SMatthew Knepley     sum1 = 0.0;
16142b692628SMatthew Knepley     sum2 = 0.0;
16152b692628SMatthew Knepley     sum3 = 0.0;
16162b692628SMatthew Knepley     sum4 = 0.0;
16172b692628SMatthew Knepley     sum5 = 0.0;
16182b692628SMatthew Knepley     sum6 = 0.0;
16192b692628SMatthew Knepley     sum7 = 0.0;
16202b692628SMatthew Knepley     sum8 = 0.0;
16212b692628SMatthew Knepley     sum9 = 0.0;
16222b692628SMatthew Knepley     for (j=0; j<n; j++) {
16232b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
16242b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
16252b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
16262b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
16272b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
16282b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
16292b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
16302b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
16312b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
16322b692628SMatthew Knepley       jrow++;
16332b692628SMatthew Knepley     }
16342b692628SMatthew Knepley     y[9*i]   += sum1;
16352b692628SMatthew Knepley     y[9*i+1] += sum2;
16362b692628SMatthew Knepley     y[9*i+2] += sum3;
16372b692628SMatthew Knepley     y[9*i+3] += sum4;
16382b692628SMatthew Knepley     y[9*i+4] += sum5;
16392b692628SMatthew Knepley     y[9*i+5] += sum6;
16402b692628SMatthew Knepley     y[9*i+6] += sum7;
16412b692628SMatthew Knepley     y[9*i+7] += sum8;
16422b692628SMatthew Knepley     y[9*i+8] += sum9;
16432b692628SMatthew Knepley   }
16442b692628SMatthew Knepley 
1645efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
16463649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16471ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16482b692628SMatthew Knepley   PetscFunctionReturn(0);
16492b692628SMatthew Knepley }
16502b692628SMatthew Knepley 
16512b692628SMatthew Knepley #undef __FUNCT__
16522b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1653dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
16542b692628SMatthew Knepley {
16552b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
16562b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1657d2840e09SBarry Smith   const PetscScalar *x,*v;
1658d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1659dfbe8321SBarry Smith   PetscErrorCode    ierr;
1660d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1661d2840e09SBarry Smith   PetscInt          n,i;
16622b692628SMatthew Knepley 
16632b692628SMatthew Knepley   PetscFunctionBegin;
16642b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16653649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
16661ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16672b692628SMatthew Knepley   for (i=0; i<m; i++) {
16682b692628SMatthew Knepley     idx    = a->j + a->i[i];
16692b692628SMatthew Knepley     v      = a->a + a->i[i];
16702b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
16712b692628SMatthew Knepley     alpha1 = x[9*i];
16722b692628SMatthew Knepley     alpha2 = x[9*i+1];
16732b692628SMatthew Knepley     alpha3 = x[9*i+2];
16742b692628SMatthew Knepley     alpha4 = x[9*i+3];
16752b692628SMatthew Knepley     alpha5 = x[9*i+4];
16762b692628SMatthew Knepley     alpha6 = x[9*i+5];
16772b692628SMatthew Knepley     alpha7 = x[9*i+6];
16782b692628SMatthew Knepley     alpha8 = x[9*i+7];
16792b692628SMatthew Knepley     alpha9 = x[9*i+8];
16802b692628SMatthew Knepley     while (n-->0) {
16812b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16822b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16832b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16842b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16852b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16862b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16872b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16882b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16892b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16902b692628SMatthew Knepley       idx++; v++;
16912b692628SMatthew Knepley     }
16922b692628SMatthew Knepley   }
1693dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16943649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16951ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16962b692628SMatthew Knepley   PetscFunctionReturn(0);
16972b692628SMatthew Knepley }
1698b9cda22cSBarry Smith #undef __FUNCT__
1699b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1700b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1701b9cda22cSBarry Smith {
1702b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1703b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1704d2840e09SBarry Smith   const PetscScalar *x,*v;
1705d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1706b9cda22cSBarry Smith   PetscErrorCode    ierr;
1707d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1708d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1709b9cda22cSBarry Smith 
1710b9cda22cSBarry Smith   PetscFunctionBegin;
17113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1712b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1713b9cda22cSBarry Smith   idx  = a->j;
1714b9cda22cSBarry Smith   v    = a->a;
1715b9cda22cSBarry Smith   ii   = a->i;
1716b9cda22cSBarry Smith 
1717b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1718b9cda22cSBarry Smith     jrow  = ii[i];
1719b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1720b9cda22cSBarry Smith     sum1  = 0.0;
1721b9cda22cSBarry Smith     sum2  = 0.0;
1722b9cda22cSBarry Smith     sum3  = 0.0;
1723b9cda22cSBarry Smith     sum4  = 0.0;
1724b9cda22cSBarry Smith     sum5  = 0.0;
1725b9cda22cSBarry Smith     sum6  = 0.0;
1726b9cda22cSBarry Smith     sum7  = 0.0;
1727b9cda22cSBarry Smith     sum8  = 0.0;
1728b9cda22cSBarry Smith     sum9  = 0.0;
1729b9cda22cSBarry Smith     sum10 = 0.0;
173026fbe8dcSKarl Rupp 
1731b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1732b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1733b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1734b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1735b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1736b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1737b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1738b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1739b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1740b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1741b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1742b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1743b9cda22cSBarry Smith       jrow++;
1744b9cda22cSBarry Smith     }
1745b9cda22cSBarry Smith     y[10*i]   = sum1;
1746b9cda22cSBarry Smith     y[10*i+1] = sum2;
1747b9cda22cSBarry Smith     y[10*i+2] = sum3;
1748b9cda22cSBarry Smith     y[10*i+3] = sum4;
1749b9cda22cSBarry Smith     y[10*i+4] = sum5;
1750b9cda22cSBarry Smith     y[10*i+5] = sum6;
1751b9cda22cSBarry Smith     y[10*i+6] = sum7;
1752b9cda22cSBarry Smith     y[10*i+7] = sum8;
1753b9cda22cSBarry Smith     y[10*i+8] = sum9;
1754b9cda22cSBarry Smith     y[10*i+9] = sum10;
1755b9cda22cSBarry Smith   }
1756b9cda22cSBarry Smith 
1757dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
17583649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1759b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1760b9cda22cSBarry Smith   PetscFunctionReturn(0);
1761b9cda22cSBarry Smith }
1762b9cda22cSBarry Smith 
1763b9cda22cSBarry Smith #undef __FUNCT__
1764dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1765b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1766b9cda22cSBarry Smith {
1767b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1768b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1769d2840e09SBarry Smith   const PetscScalar *x,*v;
1770d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1771b9cda22cSBarry Smith   PetscErrorCode    ierr;
1772d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1773b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1774b9cda22cSBarry Smith 
1775b9cda22cSBarry Smith   PetscFunctionBegin;
1776b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
17773649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1778b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1779b9cda22cSBarry Smith   idx  = a->j;
1780b9cda22cSBarry Smith   v    = a->a;
1781b9cda22cSBarry Smith   ii   = a->i;
1782b9cda22cSBarry Smith 
1783b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1784b9cda22cSBarry Smith     jrow  = ii[i];
1785b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1786b9cda22cSBarry Smith     sum1  = 0.0;
1787b9cda22cSBarry Smith     sum2  = 0.0;
1788b9cda22cSBarry Smith     sum3  = 0.0;
1789b9cda22cSBarry Smith     sum4  = 0.0;
1790b9cda22cSBarry Smith     sum5  = 0.0;
1791b9cda22cSBarry Smith     sum6  = 0.0;
1792b9cda22cSBarry Smith     sum7  = 0.0;
1793b9cda22cSBarry Smith     sum8  = 0.0;
1794b9cda22cSBarry Smith     sum9  = 0.0;
1795b9cda22cSBarry Smith     sum10 = 0.0;
1796b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1797b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1798b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1799b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1800b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1801b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1802b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1803b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1804b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1805b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1806b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1807b9cda22cSBarry Smith       jrow++;
1808b9cda22cSBarry Smith     }
1809b9cda22cSBarry Smith     y[10*i]   += sum1;
1810b9cda22cSBarry Smith     y[10*i+1] += sum2;
1811b9cda22cSBarry Smith     y[10*i+2] += sum3;
1812b9cda22cSBarry Smith     y[10*i+3] += sum4;
1813b9cda22cSBarry Smith     y[10*i+4] += sum5;
1814b9cda22cSBarry Smith     y[10*i+5] += sum6;
1815b9cda22cSBarry Smith     y[10*i+6] += sum7;
1816b9cda22cSBarry Smith     y[10*i+7] += sum8;
1817b9cda22cSBarry Smith     y[10*i+8] += sum9;
1818b9cda22cSBarry Smith     y[10*i+9] += sum10;
1819b9cda22cSBarry Smith   }
1820b9cda22cSBarry Smith 
1821dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18223649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1823b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1824b9cda22cSBarry Smith   PetscFunctionReturn(0);
1825b9cda22cSBarry Smith }
1826b9cda22cSBarry Smith 
1827b9cda22cSBarry Smith #undef __FUNCT__
1828b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1829b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1830b9cda22cSBarry Smith {
1831b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1832b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1833d2840e09SBarry Smith   const PetscScalar *x,*v;
1834d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1835b9cda22cSBarry Smith   PetscErrorCode    ierr;
1836d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1837d2840e09SBarry Smith   PetscInt          n,i;
1838b9cda22cSBarry Smith 
1839b9cda22cSBarry Smith   PetscFunctionBegin;
1840d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
18413649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1842b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1843b9cda22cSBarry Smith 
1844b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1845b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1846b9cda22cSBarry Smith     v       = a->a + a->i[i];
1847b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1848e29fdc22SBarry Smith     alpha1  = x[10*i];
1849e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1850e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1851e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1852e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1853e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1854e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1855e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1856e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1857b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1858b9cda22cSBarry Smith     while (n-->0) {
1859e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1860e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1861e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1862e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1863e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1864e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1865e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1866e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1867e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1868b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1869b9cda22cSBarry Smith       idx++; v++;
1870b9cda22cSBarry Smith     }
1871b9cda22cSBarry Smith   }
1872dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18733649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1874b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1875b9cda22cSBarry Smith   PetscFunctionReturn(0);
1876b9cda22cSBarry Smith }
1877b9cda22cSBarry Smith 
1878b9cda22cSBarry Smith #undef __FUNCT__
1879b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1880b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1881b9cda22cSBarry Smith {
1882b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1883b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1884d2840e09SBarry Smith   const PetscScalar *x,*v;
1885d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1886b9cda22cSBarry Smith   PetscErrorCode    ierr;
1887d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1888d2840e09SBarry Smith   PetscInt          n,i;
1889b9cda22cSBarry Smith 
1890b9cda22cSBarry Smith   PetscFunctionBegin;
1891b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18923649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1893b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1894b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1895b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1896b9cda22cSBarry Smith     v       = a->a + a->i[i];
1897b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1898b9cda22cSBarry Smith     alpha1  = x[10*i];
1899b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1900b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1901b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1902b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1903b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1904b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1905b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1906b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1907b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1908b9cda22cSBarry Smith     while (n-->0) {
1909b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1910b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1911b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1912b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1913b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1914b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1915b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1916b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1917b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1918b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1919b9cda22cSBarry Smith       idx++; v++;
1920b9cda22cSBarry Smith     }
1921b9cda22cSBarry Smith   }
1922dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
19233649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1924b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1925b9cda22cSBarry Smith   PetscFunctionReturn(0);
1926b9cda22cSBarry Smith }
1927b9cda22cSBarry Smith 
19282b692628SMatthew Knepley 
19292f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
19302f7816d4SBarry Smith #undef __FUNCT__
1931dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11"
1932dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1933dbdd7285SBarry Smith {
1934dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1935dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1936d2840e09SBarry Smith   const PetscScalar *x,*v;
1937d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1938dbdd7285SBarry Smith   PetscErrorCode    ierr;
1939d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1940d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1941dbdd7285SBarry Smith 
1942dbdd7285SBarry Smith   PetscFunctionBegin;
19433649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1944dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1945dbdd7285SBarry Smith   idx  = a->j;
1946dbdd7285SBarry Smith   v    = a->a;
1947dbdd7285SBarry Smith   ii   = a->i;
1948dbdd7285SBarry Smith 
1949dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1950dbdd7285SBarry Smith     jrow  = ii[i];
1951dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1952dbdd7285SBarry Smith     sum1  = 0.0;
1953dbdd7285SBarry Smith     sum2  = 0.0;
1954dbdd7285SBarry Smith     sum3  = 0.0;
1955dbdd7285SBarry Smith     sum4  = 0.0;
1956dbdd7285SBarry Smith     sum5  = 0.0;
1957dbdd7285SBarry Smith     sum6  = 0.0;
1958dbdd7285SBarry Smith     sum7  = 0.0;
1959dbdd7285SBarry Smith     sum8  = 0.0;
1960dbdd7285SBarry Smith     sum9  = 0.0;
1961dbdd7285SBarry Smith     sum10 = 0.0;
1962dbdd7285SBarry Smith     sum11 = 0.0;
196326fbe8dcSKarl Rupp 
1964dbdd7285SBarry Smith     nonzerorow += (n>0);
1965dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1966dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1967dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1968dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1969dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1970dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1971dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1972dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1973dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1974dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1975dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1976dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1977dbdd7285SBarry Smith       jrow++;
1978dbdd7285SBarry Smith     }
1979dbdd7285SBarry Smith     y[11*i]    = sum1;
1980dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1981dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1982dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1983dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1984dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1985dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1986dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1987dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1988dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1989dbdd7285SBarry Smith     y[11*i+10] = sum11;
1990dbdd7285SBarry Smith   }
1991dbdd7285SBarry Smith 
1992dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19933649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1994dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1995dbdd7285SBarry Smith   PetscFunctionReturn(0);
1996dbdd7285SBarry Smith }
1997dbdd7285SBarry Smith 
1998dbdd7285SBarry Smith #undef __FUNCT__
1999dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
2000dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2001dbdd7285SBarry Smith {
2002dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2003dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2004d2840e09SBarry Smith   const PetscScalar *x,*v;
2005d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2006dbdd7285SBarry Smith   PetscErrorCode    ierr;
2007d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2008dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
2009dbdd7285SBarry Smith 
2010dbdd7285SBarry Smith   PetscFunctionBegin;
2011dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20123649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2013dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2014dbdd7285SBarry Smith   idx  = a->j;
2015dbdd7285SBarry Smith   v    = a->a;
2016dbdd7285SBarry Smith   ii   = a->i;
2017dbdd7285SBarry Smith 
2018dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2019dbdd7285SBarry Smith     jrow  = ii[i];
2020dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
2021dbdd7285SBarry Smith     sum1  = 0.0;
2022dbdd7285SBarry Smith     sum2  = 0.0;
2023dbdd7285SBarry Smith     sum3  = 0.0;
2024dbdd7285SBarry Smith     sum4  = 0.0;
2025dbdd7285SBarry Smith     sum5  = 0.0;
2026dbdd7285SBarry Smith     sum6  = 0.0;
2027dbdd7285SBarry Smith     sum7  = 0.0;
2028dbdd7285SBarry Smith     sum8  = 0.0;
2029dbdd7285SBarry Smith     sum9  = 0.0;
2030dbdd7285SBarry Smith     sum10 = 0.0;
2031dbdd7285SBarry Smith     sum11 = 0.0;
2032dbdd7285SBarry Smith     for (j=0; j<n; j++) {
2033dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
2034dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
2035dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
2036dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
2037dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
2038dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
2039dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
2040dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
2041dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
2042dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
2043dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
2044dbdd7285SBarry Smith       jrow++;
2045dbdd7285SBarry Smith     }
2046dbdd7285SBarry Smith     y[11*i]    += sum1;
2047dbdd7285SBarry Smith     y[11*i+1]  += sum2;
2048dbdd7285SBarry Smith     y[11*i+2]  += sum3;
2049dbdd7285SBarry Smith     y[11*i+3]  += sum4;
2050dbdd7285SBarry Smith     y[11*i+4]  += sum5;
2051dbdd7285SBarry Smith     y[11*i+5]  += sum6;
2052dbdd7285SBarry Smith     y[11*i+6]  += sum7;
2053dbdd7285SBarry Smith     y[11*i+7]  += sum8;
2054dbdd7285SBarry Smith     y[11*i+8]  += sum9;
2055dbdd7285SBarry Smith     y[11*i+9]  += sum10;
2056dbdd7285SBarry Smith     y[11*i+10] += sum11;
2057dbdd7285SBarry Smith   }
2058dbdd7285SBarry Smith 
2059dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20603649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2061dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2062dbdd7285SBarry Smith   PetscFunctionReturn(0);
2063dbdd7285SBarry Smith }
2064dbdd7285SBarry Smith 
2065dbdd7285SBarry Smith #undef __FUNCT__
2066dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2067dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2068dbdd7285SBarry Smith {
2069dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2070dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2071d2840e09SBarry Smith   const PetscScalar *x,*v;
2072d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2073dbdd7285SBarry Smith   PetscErrorCode    ierr;
2074d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2075d2840e09SBarry Smith   PetscInt          n,i;
2076dbdd7285SBarry Smith 
2077dbdd7285SBarry Smith   PetscFunctionBegin;
2078d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
20793649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2080dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2081dbdd7285SBarry Smith 
2082dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2083dbdd7285SBarry Smith     idx     = a->j + a->i[i];
2084dbdd7285SBarry Smith     v       = a->a + a->i[i];
2085dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
2086dbdd7285SBarry Smith     alpha1  = x[11*i];
2087dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2088dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2089dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2090dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2091dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2092dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2093dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2094dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2095dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2096dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2097dbdd7285SBarry Smith     while (n-->0) {
2098dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2099dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2100dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2101dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2102dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2103dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2104dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2105dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2106dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2107dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2108dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2109dbdd7285SBarry Smith       idx++; v++;
2110dbdd7285SBarry Smith     }
2111dbdd7285SBarry Smith   }
2112dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
21133649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2114dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2115dbdd7285SBarry Smith   PetscFunctionReturn(0);
2116dbdd7285SBarry Smith }
2117dbdd7285SBarry Smith 
2118dbdd7285SBarry Smith #undef __FUNCT__
2119dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2120dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2121dbdd7285SBarry Smith {
2122dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2123dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2124d2840e09SBarry Smith   const PetscScalar *x,*v;
2125d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2126dbdd7285SBarry Smith   PetscErrorCode    ierr;
2127d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2128d2840e09SBarry Smith   PetscInt          n,i;
2129dbdd7285SBarry Smith 
2130dbdd7285SBarry Smith   PetscFunctionBegin;
2131dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
21323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2133dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2134dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2135dbdd7285SBarry Smith     idx     = a->j + a->i[i];
2136dbdd7285SBarry Smith     v       = a->a + a->i[i];
2137dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
2138dbdd7285SBarry Smith     alpha1  = x[11*i];
2139dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2140dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2141dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2142dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2143dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2144dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2145dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2146dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2147dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2148dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2149dbdd7285SBarry Smith     while (n-->0) {
2150dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2151dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2152dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2153dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2154dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2155dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2156dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2157dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2158dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2159dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2160dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2161dbdd7285SBarry Smith       idx++; v++;
2162dbdd7285SBarry Smith     }
2163dbdd7285SBarry Smith   }
2164dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
21653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2166dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2167dbdd7285SBarry Smith   PetscFunctionReturn(0);
2168dbdd7285SBarry Smith }
2169dbdd7285SBarry Smith 
2170dbdd7285SBarry Smith 
2171dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2172dbdd7285SBarry Smith #undef __FUNCT__
21732f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
2174dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21752f7816d4SBarry Smith {
21762f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21772f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2178d2840e09SBarry Smith   const PetscScalar *x,*v;
2179d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21802f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2181dfbe8321SBarry Smith   PetscErrorCode    ierr;
2182d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2183d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
21842f7816d4SBarry Smith 
21852f7816d4SBarry Smith   PetscFunctionBegin;
21863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21871ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
21882f7816d4SBarry Smith   idx  = a->j;
21892f7816d4SBarry Smith   v    = a->a;
21902f7816d4SBarry Smith   ii   = a->i;
21912f7816d4SBarry Smith 
21922f7816d4SBarry Smith   for (i=0; i<m; i++) {
21932f7816d4SBarry Smith     jrow  = ii[i];
21942f7816d4SBarry Smith     n     = ii[i+1] - jrow;
21952f7816d4SBarry Smith     sum1  = 0.0;
21962f7816d4SBarry Smith     sum2  = 0.0;
21972f7816d4SBarry Smith     sum3  = 0.0;
21982f7816d4SBarry Smith     sum4  = 0.0;
21992f7816d4SBarry Smith     sum5  = 0.0;
22002f7816d4SBarry Smith     sum6  = 0.0;
22012f7816d4SBarry Smith     sum7  = 0.0;
22022f7816d4SBarry Smith     sum8  = 0.0;
22032f7816d4SBarry Smith     sum9  = 0.0;
22042f7816d4SBarry Smith     sum10 = 0.0;
22052f7816d4SBarry Smith     sum11 = 0.0;
22062f7816d4SBarry Smith     sum12 = 0.0;
22072f7816d4SBarry Smith     sum13 = 0.0;
22082f7816d4SBarry Smith     sum14 = 0.0;
22092f7816d4SBarry Smith     sum15 = 0.0;
22102f7816d4SBarry Smith     sum16 = 0.0;
221126fbe8dcSKarl Rupp 
2212b3c51c66SMatthew Knepley     nonzerorow += (n>0);
22132f7816d4SBarry Smith     for (j=0; j<n; j++) {
22142f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22152f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22162f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22172f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22182f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22192f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22202f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22212f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22222f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22232f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22242f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22252f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22262f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22272f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22282f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22292f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22302f7816d4SBarry Smith       jrow++;
22312f7816d4SBarry Smith     }
22322f7816d4SBarry Smith     y[16*i]    = sum1;
22332f7816d4SBarry Smith     y[16*i+1]  = sum2;
22342f7816d4SBarry Smith     y[16*i+2]  = sum3;
22352f7816d4SBarry Smith     y[16*i+3]  = sum4;
22362f7816d4SBarry Smith     y[16*i+4]  = sum5;
22372f7816d4SBarry Smith     y[16*i+5]  = sum6;
22382f7816d4SBarry Smith     y[16*i+6]  = sum7;
22392f7816d4SBarry Smith     y[16*i+7]  = sum8;
22402f7816d4SBarry Smith     y[16*i+8]  = sum9;
22412f7816d4SBarry Smith     y[16*i+9]  = sum10;
22422f7816d4SBarry Smith     y[16*i+10] = sum11;
22432f7816d4SBarry Smith     y[16*i+11] = sum12;
22442f7816d4SBarry Smith     y[16*i+12] = sum13;
22452f7816d4SBarry Smith     y[16*i+13] = sum14;
22462f7816d4SBarry Smith     y[16*i+14] = sum15;
22472f7816d4SBarry Smith     y[16*i+15] = sum16;
22482f7816d4SBarry Smith   }
22492f7816d4SBarry Smith 
2250dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
22513649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22521ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22532f7816d4SBarry Smith   PetscFunctionReturn(0);
22542f7816d4SBarry Smith }
22552f7816d4SBarry Smith 
22562f7816d4SBarry Smith #undef __FUNCT__
22572f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2258dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
22592f7816d4SBarry Smith {
22602f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22612f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2262d2840e09SBarry Smith   const PetscScalar *x,*v;
2263d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22642f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2265dfbe8321SBarry Smith   PetscErrorCode    ierr;
2266d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2267d2840e09SBarry Smith   PetscInt          n,i;
22682f7816d4SBarry Smith 
22692f7816d4SBarry Smith   PetscFunctionBegin;
2270d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
22713649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22721ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2273bfec09a0SHong Zhang 
22742f7816d4SBarry Smith   for (i=0; i<m; i++) {
2275bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2276bfec09a0SHong Zhang     v       = a->a + a->i[i];
22772f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
22782f7816d4SBarry Smith     alpha1  = x[16*i];
22792f7816d4SBarry Smith     alpha2  = x[16*i+1];
22802f7816d4SBarry Smith     alpha3  = x[16*i+2];
22812f7816d4SBarry Smith     alpha4  = x[16*i+3];
22822f7816d4SBarry Smith     alpha5  = x[16*i+4];
22832f7816d4SBarry Smith     alpha6  = x[16*i+5];
22842f7816d4SBarry Smith     alpha7  = x[16*i+6];
22852f7816d4SBarry Smith     alpha8  = x[16*i+7];
22862f7816d4SBarry Smith     alpha9  = x[16*i+8];
22872f7816d4SBarry Smith     alpha10 = x[16*i+9];
22882f7816d4SBarry Smith     alpha11 = x[16*i+10];
22892f7816d4SBarry Smith     alpha12 = x[16*i+11];
22902f7816d4SBarry Smith     alpha13 = x[16*i+12];
22912f7816d4SBarry Smith     alpha14 = x[16*i+13];
22922f7816d4SBarry Smith     alpha15 = x[16*i+14];
22932f7816d4SBarry Smith     alpha16 = x[16*i+15];
22942f7816d4SBarry Smith     while (n-->0) {
22952f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22962f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22972f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22982f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22992f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
23002f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
23012f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
23022f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
23032f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
23042f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
23052f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
23062f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
23072f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23082f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23092f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23102f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23112f7816d4SBarry Smith       idx++; v++;
23122f7816d4SBarry Smith     }
23132f7816d4SBarry Smith   }
2314dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23161ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23172f7816d4SBarry Smith   PetscFunctionReturn(0);
23182f7816d4SBarry Smith }
23192f7816d4SBarry Smith 
23202f7816d4SBarry Smith #undef __FUNCT__
23212f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2322dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23232f7816d4SBarry Smith {
23242f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23252f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2326d2840e09SBarry Smith   const PetscScalar *x,*v;
2327d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
23282f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2329dfbe8321SBarry Smith   PetscErrorCode    ierr;
2330d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2331b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
23322f7816d4SBarry Smith 
23332f7816d4SBarry Smith   PetscFunctionBegin;
23342f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23353649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23361ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23372f7816d4SBarry Smith   idx  = a->j;
23382f7816d4SBarry Smith   v    = a->a;
23392f7816d4SBarry Smith   ii   = a->i;
23402f7816d4SBarry Smith 
23412f7816d4SBarry Smith   for (i=0; i<m; i++) {
23422f7816d4SBarry Smith     jrow  = ii[i];
23432f7816d4SBarry Smith     n     = ii[i+1] - jrow;
23442f7816d4SBarry Smith     sum1  = 0.0;
23452f7816d4SBarry Smith     sum2  = 0.0;
23462f7816d4SBarry Smith     sum3  = 0.0;
23472f7816d4SBarry Smith     sum4  = 0.0;
23482f7816d4SBarry Smith     sum5  = 0.0;
23492f7816d4SBarry Smith     sum6  = 0.0;
23502f7816d4SBarry Smith     sum7  = 0.0;
23512f7816d4SBarry Smith     sum8  = 0.0;
23522f7816d4SBarry Smith     sum9  = 0.0;
23532f7816d4SBarry Smith     sum10 = 0.0;
23542f7816d4SBarry Smith     sum11 = 0.0;
23552f7816d4SBarry Smith     sum12 = 0.0;
23562f7816d4SBarry Smith     sum13 = 0.0;
23572f7816d4SBarry Smith     sum14 = 0.0;
23582f7816d4SBarry Smith     sum15 = 0.0;
23592f7816d4SBarry Smith     sum16 = 0.0;
23602f7816d4SBarry Smith     for (j=0; j<n; j++) {
23612f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
23622f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
23632f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
23642f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
23652f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
23662f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
23672f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
23682f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
23692f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
23702f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
23712f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
23722f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
23732f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
23742f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
23752f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
23762f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
23772f7816d4SBarry Smith       jrow++;
23782f7816d4SBarry Smith     }
23792f7816d4SBarry Smith     y[16*i]    += sum1;
23802f7816d4SBarry Smith     y[16*i+1]  += sum2;
23812f7816d4SBarry Smith     y[16*i+2]  += sum3;
23822f7816d4SBarry Smith     y[16*i+3]  += sum4;
23832f7816d4SBarry Smith     y[16*i+4]  += sum5;
23842f7816d4SBarry Smith     y[16*i+5]  += sum6;
23852f7816d4SBarry Smith     y[16*i+6]  += sum7;
23862f7816d4SBarry Smith     y[16*i+7]  += sum8;
23872f7816d4SBarry Smith     y[16*i+8]  += sum9;
23882f7816d4SBarry Smith     y[16*i+9]  += sum10;
23892f7816d4SBarry Smith     y[16*i+10] += sum11;
23902f7816d4SBarry Smith     y[16*i+11] += sum12;
23912f7816d4SBarry Smith     y[16*i+12] += sum13;
23922f7816d4SBarry Smith     y[16*i+13] += sum14;
23932f7816d4SBarry Smith     y[16*i+14] += sum15;
23942f7816d4SBarry Smith     y[16*i+15] += sum16;
23952f7816d4SBarry Smith   }
23962f7816d4SBarry Smith 
2397dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23983649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23991ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
24002f7816d4SBarry Smith   PetscFunctionReturn(0);
24012f7816d4SBarry Smith }
24022f7816d4SBarry Smith 
24032f7816d4SBarry Smith #undef __FUNCT__
24042f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2405dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
24062f7816d4SBarry Smith {
24072f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
24082f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2409d2840e09SBarry Smith   const PetscScalar *x,*v;
2410d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
24112f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2412dfbe8321SBarry Smith   PetscErrorCode    ierr;
2413d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2414d2840e09SBarry Smith   PetscInt          n,i;
24152f7816d4SBarry Smith 
24162f7816d4SBarry Smith   PetscFunctionBegin;
24172f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
24183649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
24191ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
24202f7816d4SBarry Smith   for (i=0; i<m; i++) {
2421bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2422bfec09a0SHong Zhang     v       = a->a + a->i[i];
24232f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
24242f7816d4SBarry Smith     alpha1  = x[16*i];
24252f7816d4SBarry Smith     alpha2  = x[16*i+1];
24262f7816d4SBarry Smith     alpha3  = x[16*i+2];
24272f7816d4SBarry Smith     alpha4  = x[16*i+3];
24282f7816d4SBarry Smith     alpha5  = x[16*i+4];
24292f7816d4SBarry Smith     alpha6  = x[16*i+5];
24302f7816d4SBarry Smith     alpha7  = x[16*i+6];
24312f7816d4SBarry Smith     alpha8  = x[16*i+7];
24322f7816d4SBarry Smith     alpha9  = x[16*i+8];
24332f7816d4SBarry Smith     alpha10 = x[16*i+9];
24342f7816d4SBarry Smith     alpha11 = x[16*i+10];
24352f7816d4SBarry Smith     alpha12 = x[16*i+11];
24362f7816d4SBarry Smith     alpha13 = x[16*i+12];
24372f7816d4SBarry Smith     alpha14 = x[16*i+13];
24382f7816d4SBarry Smith     alpha15 = x[16*i+14];
24392f7816d4SBarry Smith     alpha16 = x[16*i+15];
24402f7816d4SBarry Smith     while (n-->0) {
24412f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
24422f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
24432f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
24442f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
24452f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
24462f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
24472f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
24482f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
24492f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
24502f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
24512f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
24522f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
24532f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
24542f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
24552f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
24562f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
24572f7816d4SBarry Smith       idx++; v++;
24582f7816d4SBarry Smith     }
24592f7816d4SBarry Smith   }
2460dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
24613649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
24621ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
246366ed3db0SBarry Smith   PetscFunctionReturn(0);
246466ed3db0SBarry Smith }
246566ed3db0SBarry Smith 
2466ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2467ed1c418cSBarry Smith #undef __FUNCT__
2468ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2469ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2470ed1c418cSBarry Smith {
2471ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2472ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2473d2840e09SBarry Smith   const PetscScalar *x,*v;
2474d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2475ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2476ed1c418cSBarry Smith   PetscErrorCode    ierr;
2477d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2478d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2479ed1c418cSBarry Smith 
2480ed1c418cSBarry Smith   PetscFunctionBegin;
24813649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2482ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2483ed1c418cSBarry Smith   idx  = a->j;
2484ed1c418cSBarry Smith   v    = a->a;
2485ed1c418cSBarry Smith   ii   = a->i;
2486ed1c418cSBarry Smith 
2487ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2488ed1c418cSBarry Smith     jrow  = ii[i];
2489ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2490ed1c418cSBarry Smith     sum1  = 0.0;
2491ed1c418cSBarry Smith     sum2  = 0.0;
2492ed1c418cSBarry Smith     sum3  = 0.0;
2493ed1c418cSBarry Smith     sum4  = 0.0;
2494ed1c418cSBarry Smith     sum5  = 0.0;
2495ed1c418cSBarry Smith     sum6  = 0.0;
2496ed1c418cSBarry Smith     sum7  = 0.0;
2497ed1c418cSBarry Smith     sum8  = 0.0;
2498ed1c418cSBarry Smith     sum9  = 0.0;
2499ed1c418cSBarry Smith     sum10 = 0.0;
2500ed1c418cSBarry Smith     sum11 = 0.0;
2501ed1c418cSBarry Smith     sum12 = 0.0;
2502ed1c418cSBarry Smith     sum13 = 0.0;
2503ed1c418cSBarry Smith     sum14 = 0.0;
2504ed1c418cSBarry Smith     sum15 = 0.0;
2505ed1c418cSBarry Smith     sum16 = 0.0;
2506ed1c418cSBarry Smith     sum17 = 0.0;
2507ed1c418cSBarry Smith     sum18 = 0.0;
250826fbe8dcSKarl Rupp 
2509ed1c418cSBarry Smith     nonzerorow += (n>0);
2510ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2511ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2512ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2513ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2514ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2515ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2516ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2517ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2518ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2519ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2520ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2521ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2522ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2523ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2524ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2525ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2526ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2527ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2528ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2529ed1c418cSBarry Smith       jrow++;
2530ed1c418cSBarry Smith     }
2531ed1c418cSBarry Smith     y[18*i]    = sum1;
2532ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2533ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2534ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2535ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2536ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2537ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2538ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2539ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2540ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2541ed1c418cSBarry Smith     y[18*i+10] = sum11;
2542ed1c418cSBarry Smith     y[18*i+11] = sum12;
2543ed1c418cSBarry Smith     y[18*i+12] = sum13;
2544ed1c418cSBarry Smith     y[18*i+13] = sum14;
2545ed1c418cSBarry Smith     y[18*i+14] = sum15;
2546ed1c418cSBarry Smith     y[18*i+15] = sum16;
2547ed1c418cSBarry Smith     y[18*i+16] = sum17;
2548ed1c418cSBarry Smith     y[18*i+17] = sum18;
2549ed1c418cSBarry Smith   }
2550ed1c418cSBarry Smith 
2551dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
25523649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2553ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2554ed1c418cSBarry Smith   PetscFunctionReturn(0);
2555ed1c418cSBarry Smith }
2556ed1c418cSBarry Smith 
2557ed1c418cSBarry Smith #undef __FUNCT__
2558ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2559ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2560ed1c418cSBarry Smith {
2561ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2562ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2563d2840e09SBarry Smith   const PetscScalar *x,*v;
2564d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2565ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2566ed1c418cSBarry Smith   PetscErrorCode    ierr;
2567d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2568d2840e09SBarry Smith   PetscInt          n,i;
2569ed1c418cSBarry Smith 
2570ed1c418cSBarry Smith   PetscFunctionBegin;
2571d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
25723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2573ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2574ed1c418cSBarry Smith 
2575ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2576ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2577ed1c418cSBarry Smith     v       = a->a + a->i[i];
2578ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2579ed1c418cSBarry Smith     alpha1  = x[18*i];
2580ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2581ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2582ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2583ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2584ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2585ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2586ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2587ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2588ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2589ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2590ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2591ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2592ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2593ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2594ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2595ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2596ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2597ed1c418cSBarry Smith     while (n-->0) {
2598ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2599ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2600ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2601ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2602ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2603ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2604ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2605ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2606ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2607ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2608ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2609ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2610ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2611ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2612ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2613ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2614ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2615ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2616ed1c418cSBarry Smith       idx++; v++;
2617ed1c418cSBarry Smith     }
2618ed1c418cSBarry Smith   }
2619dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26203649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2621ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2622ed1c418cSBarry Smith   PetscFunctionReturn(0);
2623ed1c418cSBarry Smith }
2624ed1c418cSBarry Smith 
2625ed1c418cSBarry Smith #undef __FUNCT__
2626ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2627ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2628ed1c418cSBarry Smith {
2629ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2630ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2631d2840e09SBarry Smith   const PetscScalar *x,*v;
2632d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2633ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2634ed1c418cSBarry Smith   PetscErrorCode    ierr;
2635d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2636ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2637ed1c418cSBarry Smith 
2638ed1c418cSBarry Smith   PetscFunctionBegin;
2639ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26403649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2641ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2642ed1c418cSBarry Smith   idx  = a->j;
2643ed1c418cSBarry Smith   v    = a->a;
2644ed1c418cSBarry Smith   ii   = a->i;
2645ed1c418cSBarry Smith 
2646ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2647ed1c418cSBarry Smith     jrow  = ii[i];
2648ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2649ed1c418cSBarry Smith     sum1  = 0.0;
2650ed1c418cSBarry Smith     sum2  = 0.0;
2651ed1c418cSBarry Smith     sum3  = 0.0;
2652ed1c418cSBarry Smith     sum4  = 0.0;
2653ed1c418cSBarry Smith     sum5  = 0.0;
2654ed1c418cSBarry Smith     sum6  = 0.0;
2655ed1c418cSBarry Smith     sum7  = 0.0;
2656ed1c418cSBarry Smith     sum8  = 0.0;
2657ed1c418cSBarry Smith     sum9  = 0.0;
2658ed1c418cSBarry Smith     sum10 = 0.0;
2659ed1c418cSBarry Smith     sum11 = 0.0;
2660ed1c418cSBarry Smith     sum12 = 0.0;
2661ed1c418cSBarry Smith     sum13 = 0.0;
2662ed1c418cSBarry Smith     sum14 = 0.0;
2663ed1c418cSBarry Smith     sum15 = 0.0;
2664ed1c418cSBarry Smith     sum16 = 0.0;
2665ed1c418cSBarry Smith     sum17 = 0.0;
2666ed1c418cSBarry Smith     sum18 = 0.0;
2667ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2668ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2669ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2670ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2671ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2672ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2673ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2674ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2675ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2676ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2677ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2678ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2679ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2680ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2681ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2682ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2683ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2684ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2685ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2686ed1c418cSBarry Smith       jrow++;
2687ed1c418cSBarry Smith     }
2688ed1c418cSBarry Smith     y[18*i]    += sum1;
2689ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2690ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2691ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2692ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2693ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2694ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2695ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2696ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2697ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2698ed1c418cSBarry Smith     y[18*i+10] += sum11;
2699ed1c418cSBarry Smith     y[18*i+11] += sum12;
2700ed1c418cSBarry Smith     y[18*i+12] += sum13;
2701ed1c418cSBarry Smith     y[18*i+13] += sum14;
2702ed1c418cSBarry Smith     y[18*i+14] += sum15;
2703ed1c418cSBarry Smith     y[18*i+15] += sum16;
2704ed1c418cSBarry Smith     y[18*i+16] += sum17;
2705ed1c418cSBarry Smith     y[18*i+17] += sum18;
2706ed1c418cSBarry Smith   }
2707ed1c418cSBarry Smith 
2708dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
27093649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2710ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2711ed1c418cSBarry Smith   PetscFunctionReturn(0);
2712ed1c418cSBarry Smith }
2713ed1c418cSBarry Smith 
2714ed1c418cSBarry Smith #undef __FUNCT__
2715ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2716ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2717ed1c418cSBarry Smith {
2718ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2719ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2720d2840e09SBarry Smith   const PetscScalar *x,*v;
2721d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2722ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2723ed1c418cSBarry Smith   PetscErrorCode    ierr;
2724d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2725d2840e09SBarry Smith   PetscInt          n,i;
2726ed1c418cSBarry Smith 
2727ed1c418cSBarry Smith   PetscFunctionBegin;
2728ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27293649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2730ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2731ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2732ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2733ed1c418cSBarry Smith     v       = a->a + a->i[i];
2734ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2735ed1c418cSBarry Smith     alpha1  = x[18*i];
2736ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2737ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2738ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2739ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2740ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2741ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2742ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2743ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2744ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2745ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2746ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2747ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2748ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2749ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2750ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2751ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2752ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2753ed1c418cSBarry Smith     while (n-->0) {
2754ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2755ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2756ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2757ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2758ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2759ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2760ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2761ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2762ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2763ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2764ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2765ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2766ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2767ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2768ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2769ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2770ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2771ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2772ed1c418cSBarry Smith       idx++; v++;
2773ed1c418cSBarry Smith     }
2774ed1c418cSBarry Smith   }
2775dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
27763649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2777ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2778ed1c418cSBarry Smith   PetscFunctionReturn(0);
2779ed1c418cSBarry Smith }
2780ed1c418cSBarry Smith 
27816861f193SBarry Smith #undef __FUNCT__
27826861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N"
27836861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27846861f193SBarry Smith {
27856861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27866861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27876861f193SBarry Smith   const PetscScalar *x,*v;
27886861f193SBarry Smith   PetscScalar       *y,*sums;
27896861f193SBarry Smith   PetscErrorCode    ierr;
27906861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27916861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27926861f193SBarry Smith 
27936861f193SBarry Smith   PetscFunctionBegin;
27943649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27956861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27966861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27976861f193SBarry Smith   idx  = a->j;
27986861f193SBarry Smith   v    = a->a;
27996861f193SBarry Smith   ii   = a->i;
28006861f193SBarry Smith 
28016861f193SBarry Smith   for (i=0; i<m; i++) {
28026861f193SBarry Smith     jrow = ii[i];
28036861f193SBarry Smith     n    = ii[i+1] - jrow;
28046861f193SBarry Smith     sums = y + dof*i;
28056861f193SBarry Smith     for (j=0; j<n; j++) {
28066861f193SBarry Smith       for (k=0; k<dof; k++) {
28076861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
28086861f193SBarry Smith       }
28096861f193SBarry Smith       jrow++;
28106861f193SBarry Smith     }
28116861f193SBarry Smith   }
28126861f193SBarry Smith 
28136861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28143649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28156861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28166861f193SBarry Smith   PetscFunctionReturn(0);
28176861f193SBarry Smith }
28186861f193SBarry Smith 
28196861f193SBarry Smith #undef __FUNCT__
28206861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
28216861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28226861f193SBarry Smith {
28236861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28246861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28256861f193SBarry Smith   const PetscScalar *x,*v;
28266861f193SBarry Smith   PetscScalar       *y,*sums;
28276861f193SBarry Smith   PetscErrorCode    ierr;
28286861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28296861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
28306861f193SBarry Smith 
28316861f193SBarry Smith   PetscFunctionBegin;
28326861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28333649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28346861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28356861f193SBarry Smith   idx  = a->j;
28366861f193SBarry Smith   v    = a->a;
28376861f193SBarry Smith   ii   = a->i;
28386861f193SBarry Smith 
28396861f193SBarry Smith   for (i=0; i<m; i++) {
28406861f193SBarry Smith     jrow = ii[i];
28416861f193SBarry Smith     n    = ii[i+1] - jrow;
28426861f193SBarry Smith     sums = y + dof*i;
28436861f193SBarry Smith     for (j=0; j<n; j++) {
28446861f193SBarry Smith       for (k=0; k<dof; k++) {
28456861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
28466861f193SBarry Smith       }
28476861f193SBarry Smith       jrow++;
28486861f193SBarry Smith     }
28496861f193SBarry Smith   }
28506861f193SBarry Smith 
28516861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28523649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28536861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28546861f193SBarry Smith   PetscFunctionReturn(0);
28556861f193SBarry Smith }
28566861f193SBarry Smith 
28576861f193SBarry Smith #undef __FUNCT__
28586861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
28596861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
28606861f193SBarry Smith {
28616861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28626861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28636861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28646861f193SBarry Smith   PetscScalar       *y;
28656861f193SBarry Smith   PetscErrorCode    ierr;
28666861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28676861f193SBarry Smith   PetscInt          n,i,k;
28686861f193SBarry Smith 
28696861f193SBarry Smith   PetscFunctionBegin;
28703649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28716861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
28726861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
28736861f193SBarry Smith   for (i=0; i<m; i++) {
28746861f193SBarry Smith     idx   = a->j + a->i[i];
28756861f193SBarry Smith     v     = a->a + a->i[i];
28766861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
28776861f193SBarry Smith     alpha = x + dof*i;
28786861f193SBarry Smith     while (n-->0) {
28796861f193SBarry Smith       for (k=0; k<dof; k++) {
28806861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28816861f193SBarry Smith       }
288283ce7366SBarry Smith       idx++; v++;
28836861f193SBarry Smith     }
28846861f193SBarry Smith   }
28856861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28863649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28876861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28886861f193SBarry Smith   PetscFunctionReturn(0);
28896861f193SBarry Smith }
28906861f193SBarry Smith 
28916861f193SBarry Smith #undef __FUNCT__
28926861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
28936861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28946861f193SBarry Smith {
28956861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28966861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28976861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28986861f193SBarry Smith   PetscScalar       *y;
28996861f193SBarry Smith   PetscErrorCode    ierr;
29006861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
29016861f193SBarry Smith   PetscInt          n,i,k;
29026861f193SBarry Smith 
29036861f193SBarry Smith   PetscFunctionBegin;
29046861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
29053649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
29066861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
29076861f193SBarry Smith   for (i=0; i<m; i++) {
29086861f193SBarry Smith     idx   = a->j + a->i[i];
29096861f193SBarry Smith     v     = a->a + a->i[i];
29106861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
29116861f193SBarry Smith     alpha = x + dof*i;
29126861f193SBarry Smith     while (n-->0) {
29136861f193SBarry Smith       for (k=0; k<dof; k++) {
29146861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
29156861f193SBarry Smith       }
291683ce7366SBarry Smith       idx++; v++;
29176861f193SBarry Smith     }
29186861f193SBarry Smith   }
29196861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
29203649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
29216861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
29226861f193SBarry Smith   PetscFunctionReturn(0);
29236861f193SBarry Smith }
29246861f193SBarry Smith 
2925f4a53059SBarry Smith /*===================================================================================*/
29264a2ae208SSatish Balay #undef __FUNCT__
29274a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2928dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2929f4a53059SBarry Smith {
29304cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2931dfbe8321SBarry Smith   PetscErrorCode ierr;
2932f4a53059SBarry Smith 
2933b24ad042SBarry Smith   PetscFunctionBegin;
2934f4a53059SBarry Smith   /* start the scatter */
2935ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29364cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2937ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29384cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2939f4a53059SBarry Smith   PetscFunctionReturn(0);
2940f4a53059SBarry Smith }
2941f4a53059SBarry Smith 
29424a2ae208SSatish Balay #undef __FUNCT__
29434a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2944dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
29454cb9d9b8SBarry Smith {
29464cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2947dfbe8321SBarry Smith   PetscErrorCode ierr;
2948b24ad042SBarry Smith 
29494cb9d9b8SBarry Smith   PetscFunctionBegin;
29504cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
29514cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2952ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2953ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29544cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29554cb9d9b8SBarry Smith }
29564cb9d9b8SBarry Smith 
29574a2ae208SSatish Balay #undef __FUNCT__
29584a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2959dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29604cb9d9b8SBarry Smith {
29614cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2962dfbe8321SBarry Smith   PetscErrorCode ierr;
29634cb9d9b8SBarry Smith 
2964b24ad042SBarry Smith   PetscFunctionBegin;
29654cb9d9b8SBarry Smith   /* start the scatter */
2966ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2967d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2968ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2969717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
29704cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29714cb9d9b8SBarry Smith }
29724cb9d9b8SBarry Smith 
29734a2ae208SSatish Balay #undef __FUNCT__
29744a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2975dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29764cb9d9b8SBarry Smith {
29774cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2978dfbe8321SBarry Smith   PetscErrorCode ierr;
2979b24ad042SBarry Smith 
29804cb9d9b8SBarry Smith   PetscFunctionBegin;
29814cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2982ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2983d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2984ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29854cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29864cb9d9b8SBarry Smith }
29874cb9d9b8SBarry Smith 
298895ddefa2SHong Zhang /* ----------------------------------------------------------------*/
29899c4fc4c7SBarry Smith #undef __FUNCT__
29907ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
29917ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
29927ba1a0bfSKris Buschelman {
29937ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
29940298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
29957ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
29967ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
29977ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2998d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29997ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3000d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3001d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
30027ba1a0bfSKris Buschelman   MatScalar          *ca;
3003d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
30047ba1a0bfSKris Buschelman 
30057ba1a0bfSKris Buschelman   PetscFunctionBegin;
30067ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
30077ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
30087ba1a0bfSKris Buschelman 
30097ba1a0bfSKris Buschelman   cn = pn*ppdof;
30107ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
30117ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
30127ba1a0bfSKris Buschelman   ierr  = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
30137ba1a0bfSKris Buschelman   ci[0] = 0;
30147ba1a0bfSKris Buschelman 
30157ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
301674ed9c26SBarry Smith   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
3017c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3018c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
30197ba1a0bfSKris Buschelman 
30207ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
30217ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
30227ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3023a2ea699eSBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);CHKERRQ(ierr);
30247ba1a0bfSKris Buschelman   current_space = free_space;
30257ba1a0bfSKris Buschelman 
30267ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
30277ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
30287ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
30297ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
30307ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
30317ba1a0bfSKris Buschelman       ptanzi = 0;
30327ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
30337ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
30347ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
30357ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
30367ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
30377ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
30387ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
30397ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
30407ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
30417ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
30427ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
30437ba1a0bfSKris Buschelman           }
30447ba1a0bfSKris Buschelman         }
30457ba1a0bfSKris Buschelman       }
30467ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
30477ba1a0bfSKris Buschelman       ptaj = ptasparserow;
30487ba1a0bfSKris Buschelman       cnzi = 0;
30497ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
30507ba1a0bfSKris Buschelman         /* Get offset within block of P */
30517ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
30527ba1a0bfSKris Buschelman         /* Get block row of P */
30537ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
30547ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
30557ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
30567ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
30577ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
30587ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
30597ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30607ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30617ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30627ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30637ba1a0bfSKris Buschelman           }
30647ba1a0bfSKris Buschelman         }
30657ba1a0bfSKris Buschelman       }
30667ba1a0bfSKris Buschelman 
30677ba1a0bfSKris Buschelman       /* sort sparserow */
30687ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30697ba1a0bfSKris Buschelman 
30707ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30717ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30727ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
30734238b7adSHong Zhang         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
30747ba1a0bfSKris Buschelman       }
30757ba1a0bfSKris Buschelman 
30767ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
30777ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
307826fbe8dcSKarl Rupp 
30797ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30807ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30817ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30827ba1a0bfSKris Buschelman 
308326fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
308426fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
308526fbe8dcSKarl Rupp 
30867ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30877ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30887ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30897ba1a0bfSKris Buschelman     }
30907ba1a0bfSKris Buschelman   }
30917ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30927ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30937ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
30947ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3095a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
309674ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30977ba1a0bfSKris Buschelman 
30987ba1a0bfSKris Buschelman   /* Allocate space for ca */
30997ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
31007ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
31017ba1a0bfSKris Buschelman 
31027ba1a0bfSKris Buschelman   /* put together the new matrix */
3103ce94432eSBarry Smith   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
31047ba1a0bfSKris Buschelman 
31057ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
31067ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
31077ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ*)((*C)->data);
3108e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3109e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
31107ba1a0bfSKris Buschelman   c->nonew   = 0;
311126fbe8dcSKarl Rupp 
3112d76021d8SHong Zhang   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
31137ba1a0bfSKris Buschelman 
31147ba1a0bfSKris Buschelman   /* Clean up. */
31157ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
31167ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31177ba1a0bfSKris Buschelman }
31187ba1a0bfSKris Buschelman 
31197ba1a0bfSKris Buschelman #undef __FUNCT__
31207ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
31217ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
31227ba1a0bfSKris Buschelman {
31237ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
31247ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
31257ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
31267ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
31277ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
31287ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
31297ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3130a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3131d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3132d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3133d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3134a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3135d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
31367ba1a0bfSKris Buschelman 
31377ba1a0bfSKris Buschelman   PetscFunctionBegin;
31387ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
313974ed9c26SBarry Smith   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
314074ed9c26SBarry Smith   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
314174ed9c26SBarry Smith   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
314274ed9c26SBarry Smith   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
31437ba1a0bfSKris Buschelman 
31447ba1a0bfSKris Buschelman   /* Clear old values in C */
31457ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
31467ba1a0bfSKris Buschelman 
31477ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
31487ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
31497ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
31507ba1a0bfSKris Buschelman     apnzj = 0;
31517ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
31527ba1a0bfSKris Buschelman       /* Get offset within block of P */
31537ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
31547ba1a0bfSKris Buschelman       /* Get block row of P */
31557ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
31567ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
31577ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
31587ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
31597ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
31607ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
31617ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
31627ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
31637ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31647ba1a0bfSKris Buschelman         }
31657ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31667ba1a0bfSKris Buschelman       }
3167dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31687ba1a0bfSKris Buschelman       aa++;
31697ba1a0bfSKris Buschelman     }
31707ba1a0bfSKris Buschelman 
31717ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31727ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31737ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31747ba1a0bfSKris Buschelman 
31757ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31767ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31777ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31787ba1a0bfSKris Buschelman     poffset = pi[prow];
31797ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
31807ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31817ba1a0bfSKris Buschelman     pJ = pj+poffset;
31827ba1a0bfSKris Buschelman     pA = pa+poffset;
31837ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
31847ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
31857ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
31867ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
31877ba1a0bfSKris Buschelman       pJ++;
31887ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31897ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
319026fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
31917ba1a0bfSKris Buschelman       }
3192dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31937ba1a0bfSKris Buschelman       pA++;
31947ba1a0bfSKris Buschelman     }
31957ba1a0bfSKris Buschelman 
31967ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31977ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
31987ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31997ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
32007ba1a0bfSKris Buschelman     }
32017ba1a0bfSKris Buschelman   }
32027ba1a0bfSKris Buschelman 
32037ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
32047ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32057ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320674ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
320795ddefa2SHong Zhang   PetscFunctionReturn(0);
320895ddefa2SHong Zhang }
32097ba1a0bfSKris Buschelman 
32102121bac1SHong Zhang #undef __FUNCT__
32112121bac1SHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ"
3212f7a08781SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
32132121bac1SHong Zhang {
32142121bac1SHong Zhang   PetscErrorCode ierr;
32152121bac1SHong Zhang 
32162121bac1SHong Zhang   PetscFunctionBegin;
32172121bac1SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
32182121bac1SHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
32192121bac1SHong Zhang   }
32202121bac1SHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
32212121bac1SHong Zhang   PetscFunctionReturn(0);
32222121bac1SHong Zhang }
32232121bac1SHong Zhang 
322495ddefa2SHong Zhang #undef __FUNCT__
322595ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3226f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
322795ddefa2SHong Zhang {
322895ddefa2SHong Zhang   PetscErrorCode ierr;
322995ddefa2SHong Zhang 
323095ddefa2SHong Zhang   PetscFunctionBegin;
323195ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
323295ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
323395ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
323495ddefa2SHong Zhang   PetscFunctionReturn(0);
323595ddefa2SHong Zhang }
323695ddefa2SHong Zhang 
323795ddefa2SHong Zhang #undef __FUNCT__
323895ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3239f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
324095ddefa2SHong Zhang {
324195ddefa2SHong Zhang   PetscFunctionBegin;
3242e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
32437ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
32447ba1a0bfSKris Buschelman }
32457ba1a0bfSKris Buschelman 
32467ba1a0bfSKris Buschelman #undef __FUNCT__
32479e03dfbbSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ"
3248f7a08781SBarry Smith PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
32499e03dfbbSHong Zhang {
32509e03dfbbSHong Zhang   PetscErrorCode ierr;
32519e03dfbbSHong Zhang 
32529e03dfbbSHong Zhang   PetscFunctionBegin;
32539e03dfbbSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
32549e03dfbbSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
32559e03dfbbSHong Zhang   }
32569e03dfbbSHong Zhang   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
32579e03dfbbSHong Zhang   PetscFunctionReturn(0);
32589e03dfbbSHong Zhang }
32599e03dfbbSHong Zhang 
32609e03dfbbSHong Zhang #undef __FUNCT__
32610fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3262*8cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32639c4fc4c7SBarry Smith {
32649c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3265ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
32669c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
32679c4fc4c7SBarry Smith   PetscErrorCode ierr;
32680fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3269ba8c8a56SBarry Smith   PetscInt       *cols;
3270ba8c8a56SBarry Smith   PetscScalar    *vals;
32719c4fc4c7SBarry Smith 
32729c4fc4c7SBarry Smith   PetscFunctionBegin;
32739c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
32747c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
32759c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32769c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
327726fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
32789c4fc4c7SBarry Smith   }
3279ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
32809c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
32819c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
32829c4fc4c7SBarry Smith   ii   = 0;
32839c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3284ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32850fd73130SBarry Smith     for (j=0; j<dof; j++) {
328626fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3287ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32889c4fc4c7SBarry Smith       ii++;
32899c4fc4c7SBarry Smith     }
3290ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32919c4fc4c7SBarry Smith   }
32929c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3293ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3294ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3295ceb03754SKris Buschelman 
3296ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
32978ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3298c3d102feSKris Buschelman   } else {
3299ceb03754SKris Buschelman     *newmat = B;
3300c3d102feSKris Buschelman   }
33019c4fc4c7SBarry Smith   PetscFunctionReturn(0);
33029c4fc4c7SBarry Smith }
33039c4fc4c7SBarry Smith 
3304c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3305be1d678aSKris Buschelman 
33060fd73130SBarry Smith #undef __FUNCT__
33070fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3308*8cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
33090fd73130SBarry Smith {
33100fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3311ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
33120fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
33130fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
33140fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
33150fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
33160298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
33170298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
33180fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
33190fd73130SBarry Smith   PetscErrorCode ierr;
33200fd73130SBarry Smith   PetscScalar    *vals,*ovals;
33210fd73130SBarry Smith 
33220fd73130SBarry Smith   PetscFunctionBegin;
3323d0f46423SBarry Smith   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3324d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33250fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
33260fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
33270fd73130SBarry Smith     for (j=0; j<dof; j++) {
33280fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
33290fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
33300fd73130SBarry Smith     }
33310fd73130SBarry Smith   }
3332ce94432eSBarry Smith   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
33330fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
33340fd73130SBarry Smith 
33357a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3336d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3337d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33380fd73130SBarry Smith   garray = mpiaij->garray;
33390fd73130SBarry Smith 
33400fd73130SBarry Smith   ii = rstart;
3341d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33420fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33430fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33440fd73130SBarry Smith     for (j=0; j<dof; j++) {
33450fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33460fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33470fd73130SBarry Smith       }
33480fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33490fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33500fd73130SBarry Smith       }
3351ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3352ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33530fd73130SBarry Smith       ii++;
33540fd73130SBarry Smith     }
33550fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33560fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33570fd73130SBarry Smith   }
33580fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33590fd73130SBarry Smith 
3360ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3361ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3362ceb03754SKris Buschelman 
3363ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
33647adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33657adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
336626fbe8dcSKarl Rupp 
33678ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
336826fbe8dcSKarl Rupp 
33697adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3370c3d102feSKris Buschelman   } else {
3371ceb03754SKris Buschelman     *newmat = B;
3372c3d102feSKris Buschelman   }
33730fd73130SBarry Smith   PetscFunctionReturn(0);
33740fd73130SBarry Smith }
33750fd73130SBarry Smith 
33769e516c8fSBarry Smith #undef __FUNCT__
33779e516c8fSBarry Smith #define __FUNCT__ "MatGetSubMatrix_MAIJ"
33789e516c8fSBarry Smith PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
33799e516c8fSBarry Smith {
33809e516c8fSBarry Smith   PetscErrorCode ierr;
33819e516c8fSBarry Smith   Mat            A;
33829e516c8fSBarry Smith 
33839e516c8fSBarry Smith   PetscFunctionBegin;
33849e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
33859e516c8fSBarry Smith   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
33869e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
33879e516c8fSBarry Smith   PetscFunctionReturn(0);
33889e516c8fSBarry Smith }
33890fd73130SBarry Smith 
3390bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3391ff585edeSJed Brown #undef __FUNCT__
3392ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ"
3393ff585edeSJed Brown /*@C
33940bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33950bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33960bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33970bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33980bad9183SKris Buschelman 
3399ff585edeSJed Brown   Collective
3400ff585edeSJed Brown 
3401ff585edeSJed Brown   Input Parameters:
3402ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3403ff585edeSJed Brown - dof - the block size (number of components per node)
3404ff585edeSJed Brown 
3405ff585edeSJed Brown   Output Parameter:
3406ff585edeSJed Brown . maij - the new MAIJ matrix
3407ff585edeSJed Brown 
34080bad9183SKris Buschelman   Operations provided:
34090fd73130SBarry Smith + MatMult
34100bad9183SKris Buschelman . MatMultTranspose
34110bad9183SKris Buschelman . MatMultAdd
34120bad9183SKris Buschelman . MatMultTransposeAdd
34130fd73130SBarry Smith - MatView
34140bad9183SKris Buschelman 
34150bad9183SKris Buschelman   Level: advanced
34160bad9183SKris Buschelman 
3417b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3418ff585edeSJed Brown @*/
34197087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
342082b1193eSBarry Smith {
3421dfbe8321SBarry Smith   PetscErrorCode ierr;
3422b24ad042SBarry Smith   PetscMPIInt    size;
3423b24ad042SBarry Smith   PetscInt       n;
342482b1193eSBarry Smith   Mat            B;
342582b1193eSBarry Smith 
342682b1193eSBarry Smith   PetscFunctionBegin;
3427d72c5c08SBarry Smith   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3428d72c5c08SBarry Smith 
342926fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
343026fbe8dcSKarl Rupp   else {
3431ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3432d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3433bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3434bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3435bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3436bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
343726fbe8dcSKarl Rupp 
3438cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3439d72c5c08SBarry Smith 
3440ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3441f4a53059SBarry Smith     if (size == 1) {
3442feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3443feb9fe23SJed Brown 
3444b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
344526fbe8dcSKarl Rupp 
34460298fd71SBarry Smith       B->ops->setup   = NULL;
34474cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
34480fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3449feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3450b9b97703SBarry Smith       b->dof          = dof;
34514cb9d9b8SBarry Smith       b->AIJ          = A;
345226fbe8dcSKarl Rupp 
3453d72c5c08SBarry Smith       if (dof == 2) {
3454cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3455cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3456cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3457cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3458bcc973b7SBarry Smith       } else if (dof == 3) {
3459bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3460bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3461bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3462bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3463bcc973b7SBarry Smith       } else if (dof == 4) {
3464bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3465bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3466bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3467bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3468f9fae5adSBarry Smith       } else if (dof == 5) {
3469f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3470f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3471f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3472f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34736cd98798SBarry Smith       } else if (dof == 6) {
34746cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34756cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34766cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34776cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3478ed8eea03SBarry Smith       } else if (dof == 7) {
3479ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3480ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3481ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3482ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
348366ed3db0SBarry Smith       } else if (dof == 8) {
348466ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
348566ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
348666ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
348766ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34882b692628SMatthew Knepley       } else if (dof == 9) {
34892b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34902b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34912b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34922b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3493b9cda22cSBarry Smith       } else if (dof == 10) {
3494b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3495b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3496b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3497b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3498dbdd7285SBarry Smith       } else if (dof == 11) {
3499dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3500dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3501dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3502dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
35032f7816d4SBarry Smith       } else if (dof == 16) {
35042f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
35052f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
35062f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
35072f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3508ed1c418cSBarry Smith       } else if (dof == 18) {
3509ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3510ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3511ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3512ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
351382b1193eSBarry Smith       } else {
35146861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
35156861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
35166861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
35176861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
351882b1193eSBarry Smith       }
351900de8ff0SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
352000de8ff0SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C","MatPtAP_SeqAIJ_SeqMAIJ",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3521f4a53059SBarry Smith     } else {
3522f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3523feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3524f4a53059SBarry Smith       IS          from,to;
3525f4a53059SBarry Smith       Vec         gvec;
3526f4a53059SBarry Smith 
3527b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
352826fbe8dcSKarl Rupp 
35290298fd71SBarry Smith       B->ops->setup   = NULL;
3530d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
35310fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
353226fbe8dcSKarl Rupp 
3533b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3534b9b97703SBarry Smith       b->dof = dof;
3535b9b97703SBarry Smith       b->A   = A;
353626fbe8dcSKarl Rupp 
35374cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
35384cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
35394cb9d9b8SBarry Smith 
3540f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3541a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3542a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
354313288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3544a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3545f4a53059SBarry Smith 
3546f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3547ce94432eSBarry Smith       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3548f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3549f4a53059SBarry Smith 
3550f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3551ce94432eSBarry Smith       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3552f4a53059SBarry Smith 
3553f4a53059SBarry Smith       /* generate the scatter context */
3554f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3555f4a53059SBarry Smith 
35566bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
35576bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
35586bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3559f4a53059SBarry Smith 
3560f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
35614cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
35624cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
35634cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
356426fbe8dcSKarl Rupp 
356500de8ff0SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
356600de8ff0SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C","MatPtAP_MPIAIJ_MPIMAIJ",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3567f4a53059SBarry Smith     }
35689e516c8fSBarry Smith     B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
35694994cf47SJed Brown     ierr  = MatSetUp(B);CHKERRQ(ierr);
3570cd3bbe55SBarry Smith     *maij = B;
35710d2bece7SBarry Smith     ierr  = MatViewFromOptions(B,"-mat_view");CHKERRQ(ierr);
3572d72c5c08SBarry Smith   }
357382b1193eSBarry Smith   PetscFunctionReturn(0);
357482b1193eSBarry Smith }
3575