xref: /petsc/src/mat/impls/maij/maij.c (revision 0d2bece73096e7ea2d1439c7d0758e018fac5b96)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
21b45d2f2cSJed Brown #include <petsc-private/vecimpl.h>
2282b1193eSBarry Smith 
234a2ae208SSatish Balay #undef __FUNCT__
244a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
25ff585edeSJed Brown /*@C
26ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27ff585edeSJed Brown 
28ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29ff585edeSJed Brown 
30ff585edeSJed Brown    Input Parameter:
31ff585edeSJed Brown .  A - the MAIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Output Parameter:
34ff585edeSJed Brown .  B - the AIJ matrix
35ff585edeSJed Brown 
36ff585edeSJed Brown    Level: advanced
37ff585edeSJed Brown 
38ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39ff585edeSJed Brown 
40ff585edeSJed Brown .seealso: MatCreateMAIJ()
41ff585edeSJed Brown @*/
427087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43b9b97703SBarry Smith {
44dfbe8321SBarry Smith   PetscErrorCode ierr;
45ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
46b9b97703SBarry Smith 
47b9b97703SBarry Smith   PetscFunctionBegin;
48251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50b9b97703SBarry Smith   if (ismpimaij) {
51b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith     *B = b->A;
54b9b97703SBarry Smith   } else if (isseqmaij) {
55b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56b9b97703SBarry Smith 
57b9b97703SBarry Smith     *B = b->AIJ;
58b9b97703SBarry Smith   } else {
59b9b97703SBarry Smith     *B = A;
60b9b97703SBarry Smith   }
61b9b97703SBarry Smith   PetscFunctionReturn(0);
62b9b97703SBarry Smith }
63b9b97703SBarry Smith 
644a2ae208SSatish Balay #undef __FUNCT__
654a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
66ff585edeSJed Brown /*@C
67ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68ff585edeSJed Brown 
693f9fe445SBarry Smith    Logically Collective
70ff585edeSJed Brown 
71ff585edeSJed Brown    Input Parameter:
72ff585edeSJed Brown +  A - the MAIJ matrix
73ff585edeSJed Brown -  dof - the block size for the new matrix
74ff585edeSJed Brown 
75ff585edeSJed Brown    Output Parameter:
76ff585edeSJed Brown .  B - the new MAIJ matrix
77ff585edeSJed Brown 
78ff585edeSJed Brown    Level: advanced
79ff585edeSJed Brown 
80ff585edeSJed Brown .seealso: MatCreateMAIJ()
81ff585edeSJed Brown @*/
827087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83b9b97703SBarry Smith {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
853b98c0a2SBarry Smith   Mat            Aij = PETSC_NULL;
86b9b97703SBarry Smith 
87b9b97703SBarry Smith   PetscFunctionBegin;
88c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
89b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91b9b97703SBarry Smith   PetscFunctionReturn(0);
92b9b97703SBarry Smith }
93b9b97703SBarry Smith 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9782b1193eSBarry Smith {
98dfbe8321SBarry Smith   PetscErrorCode ierr;
994cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
10082b1193eSBarry Smith 
10182b1193eSBarry Smith   PetscFunctionBegin;
1026bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1042121bac1SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqmaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1052121bac1SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_seqaij_seqmaij_C","",PETSC_NULL);CHKERRQ(ierr);
1064cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1074cb9d9b8SBarry Smith }
1084cb9d9b8SBarry Smith 
1094a2ae208SSatish Balay #undef __FUNCT__
110356c569eSBarry Smith #define __FUNCT__ "MatSetUp_MAIJ"
111356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
112356c569eSBarry Smith {
113356c569eSBarry Smith   PetscFunctionBegin;
114356c569eSBarry Smith   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115356c569eSBarry Smith   PetscFunctionReturn(0);
116356c569eSBarry Smith }
117356c569eSBarry Smith 
118356c569eSBarry Smith #undef __FUNCT__
1190fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
1200fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1210fd73130SBarry Smith {
1220fd73130SBarry Smith   PetscErrorCode ierr;
1230fd73130SBarry Smith   Mat            B;
1240fd73130SBarry Smith 
1250fd73130SBarry Smith   PetscFunctionBegin;
126ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1270fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1286bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1290fd73130SBarry Smith   PetscFunctionReturn(0);
1300fd73130SBarry Smith }
1310fd73130SBarry Smith 
1320fd73130SBarry Smith #undef __FUNCT__
1330fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
1340fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1350fd73130SBarry Smith {
1360fd73130SBarry Smith   PetscErrorCode ierr;
1370fd73130SBarry Smith   Mat            B;
1380fd73130SBarry Smith 
1390fd73130SBarry Smith   PetscFunctionBegin;
140ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
1410fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1426bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1430fd73130SBarry Smith   PetscFunctionReturn(0);
1440fd73130SBarry Smith }
1450fd73130SBarry Smith 
1460fd73130SBarry Smith #undef __FUNCT__
1474a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
148dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1494cb9d9b8SBarry Smith {
150dfbe8321SBarry Smith   PetscErrorCode ierr;
1514cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1524cb9d9b8SBarry Smith 
1534cb9d9b8SBarry Smith   PetscFunctionBegin;
1546bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1556bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1566bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1576bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1586bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
159bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1609e03dfbbSHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_mpimaij_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
1619e03dfbbSHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C","",PETSC_NULL);CHKERRQ(ierr);
162dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163b9b97703SBarry Smith   PetscFunctionReturn(0);
164b9b97703SBarry Smith }
165b9b97703SBarry Smith 
1660bad9183SKris Buschelman /*MC
167fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1680bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1690bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1700bad9183SKris Buschelman 
1710bad9183SKris Buschelman   Operations provided:
1720bad9183SKris Buschelman . MatMult
1730bad9183SKris Buschelman . MatMultTranspose
1740bad9183SKris Buschelman . MatMultAdd
1750bad9183SKris Buschelman . MatMultTransposeAdd
1760bad9183SKris Buschelman 
1770bad9183SKris Buschelman   Level: advanced
1780bad9183SKris Buschelman 
179b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1800bad9183SKris Buschelman M*/
1810bad9183SKris Buschelman 
1824a2ae208SSatish Balay #undef __FUNCT__
1834a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
18415221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatCreate_MAIJ(Mat A)
18582b1193eSBarry Smith {
186dfbe8321SBarry Smith   PetscErrorCode ierr;
1874cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
18864b52464SHong Zhang   PetscMPIInt    size;
18982b1193eSBarry Smith 
19082b1193eSBarry Smith   PetscFunctionBegin;
19138f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
192b0a32e0cSBarry Smith   A->data  = (void*)b;
193cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
194356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
195f4a53059SBarry Smith 
196cd3bbe55SBarry Smith   b->AIJ  = 0;
197cd3bbe55SBarry Smith   b->dof  = 0;
198f4a53059SBarry Smith   b->OAIJ = 0;
199f4a53059SBarry Smith   b->ctx  = 0;
200f4a53059SBarry Smith   b->w    = 0;
2017adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
20264b52464SHong Zhang   if (size == 1){
20364b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
20464b52464SHong Zhang   } else {
20564b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
20664b52464SHong Zhang   }
20782b1193eSBarry Smith   PetscFunctionReturn(0);
20882b1193eSBarry Smith }
20982b1193eSBarry Smith 
210cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2114a2ae208SSatish Balay #undef __FUNCT__
212b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
213dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21482b1193eSBarry Smith {
2154cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
216bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
217d2840e09SBarry Smith   const PetscScalar *x,*v;
218d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
219dfbe8321SBarry Smith   PetscErrorCode    ierr;
220d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
221d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
22282b1193eSBarry Smith 
223bcc973b7SBarry Smith   PetscFunctionBegin;
2243649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2251ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
226bcc973b7SBarry Smith   idx  = a->j;
227bcc973b7SBarry Smith   v    = a->a;
228bcc973b7SBarry Smith   ii   = a->i;
229bcc973b7SBarry Smith 
230bcc973b7SBarry Smith   for (i=0; i<m; i++) {
231bcc973b7SBarry Smith     jrow = ii[i];
232bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
233bcc973b7SBarry Smith     sum1  = 0.0;
234bcc973b7SBarry Smith     sum2  = 0.0;
235b3c51c66SMatthew Knepley     nonzerorow += (n>0);
236bcc973b7SBarry Smith     for (j=0; j<n; j++) {
237bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
238bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
239bcc973b7SBarry Smith       jrow++;
240bcc973b7SBarry Smith      }
241bcc973b7SBarry Smith     y[2*i]   = sum1;
242bcc973b7SBarry Smith     y[2*i+1] = sum2;
243bcc973b7SBarry Smith   }
244bcc973b7SBarry Smith 
245dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2463649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2471ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
24882b1193eSBarry Smith   PetscFunctionReturn(0);
24982b1193eSBarry Smith }
250bcc973b7SBarry Smith 
2514a2ae208SSatish Balay #undef __FUNCT__
252b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
253dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
25482b1193eSBarry Smith {
2554cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
256bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
257d2840e09SBarry Smith   const PetscScalar *x,*v;
258d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
259dfbe8321SBarry Smith   PetscErrorCode    ierr;
260d2840e09SBarry Smith   PetscInt          n,i;
261d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
26282b1193eSBarry Smith 
263bcc973b7SBarry Smith   PetscFunctionBegin;
264d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2653649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2661ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
267bfec09a0SHong Zhang 
268bcc973b7SBarry Smith   for (i=0; i<m; i++) {
269bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
270bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
271bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
272bcc973b7SBarry Smith     alpha1 = x[2*i];
273bcc973b7SBarry Smith     alpha2 = x[2*i+1];
274bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
275bcc973b7SBarry Smith   }
276dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2773649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2781ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27982b1193eSBarry Smith   PetscFunctionReturn(0);
28082b1193eSBarry Smith }
281bcc973b7SBarry Smith 
2824a2ae208SSatish Balay #undef __FUNCT__
283b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
284dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
28582b1193eSBarry Smith {
2864cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
287bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
288d2840e09SBarry Smith   const PetscScalar *x,*v;
289d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
290dfbe8321SBarry Smith   PetscErrorCode    ierr;
291b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
292d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
29382b1193eSBarry Smith 
294bcc973b7SBarry Smith   PetscFunctionBegin;
295f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2963649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2971ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
298bcc973b7SBarry Smith   idx  = a->j;
299bcc973b7SBarry Smith   v    = a->a;
300bcc973b7SBarry Smith   ii   = a->i;
301bcc973b7SBarry Smith 
302bcc973b7SBarry Smith   for (i=0; i<m; i++) {
303bcc973b7SBarry Smith     jrow = ii[i];
304bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
305bcc973b7SBarry Smith     sum1  = 0.0;
306bcc973b7SBarry Smith     sum2  = 0.0;
307bcc973b7SBarry Smith     for (j=0; j<n; j++) {
308bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
309bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
310bcc973b7SBarry Smith       jrow++;
311bcc973b7SBarry Smith      }
312bcc973b7SBarry Smith     y[2*i]   += sum1;
313bcc973b7SBarry Smith     y[2*i+1] += sum2;
314bcc973b7SBarry Smith   }
315bcc973b7SBarry Smith 
316dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
319bcc973b7SBarry Smith   PetscFunctionReturn(0);
32082b1193eSBarry Smith }
3214a2ae208SSatish Balay #undef __FUNCT__
322b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
323dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
32482b1193eSBarry Smith {
3254cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
326bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
327d2840e09SBarry Smith   const PetscScalar *x,*v;
328d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
329dfbe8321SBarry Smith   PetscErrorCode    ierr;
330d2840e09SBarry Smith   PetscInt          n,i;
331d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
33282b1193eSBarry Smith 
333bcc973b7SBarry Smith   PetscFunctionBegin;
334f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3353649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3361ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
337bfec09a0SHong Zhang 
338bcc973b7SBarry Smith   for (i=0; i<m; i++) {
339bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
340bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
341bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
342bcc973b7SBarry Smith     alpha1 = x[2*i];
343bcc973b7SBarry Smith     alpha2 = x[2*i+1];
344bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
345bcc973b7SBarry Smith   }
346dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3473649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3481ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
349bcc973b7SBarry Smith   PetscFunctionReturn(0);
35082b1193eSBarry Smith }
351cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3524a2ae208SSatish Balay #undef __FUNCT__
353b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
354dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
355bcc973b7SBarry Smith {
3564cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
357bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
358d2840e09SBarry Smith   const PetscScalar *x,*v;
359d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
360dfbe8321SBarry Smith   PetscErrorCode    ierr;
361d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
362d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
36382b1193eSBarry Smith 
364bcc973b7SBarry Smith   PetscFunctionBegin;
3653649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3661ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
367bcc973b7SBarry Smith   idx  = a->j;
368bcc973b7SBarry Smith   v    = a->a;
369bcc973b7SBarry Smith   ii   = a->i;
370bcc973b7SBarry Smith 
371bcc973b7SBarry Smith   for (i=0; i<m; i++) {
372bcc973b7SBarry Smith     jrow = ii[i];
373bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
374bcc973b7SBarry Smith     sum1  = 0.0;
375bcc973b7SBarry Smith     sum2  = 0.0;
376bcc973b7SBarry Smith     sum3  = 0.0;
377b3c51c66SMatthew Knepley     nonzerorow += (n>0);
378bcc973b7SBarry Smith     for (j=0; j<n; j++) {
379bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
380bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
381bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
382bcc973b7SBarry Smith       jrow++;
383bcc973b7SBarry Smith      }
384bcc973b7SBarry Smith     y[3*i]   = sum1;
385bcc973b7SBarry Smith     y[3*i+1] = sum2;
386bcc973b7SBarry Smith     y[3*i+2] = sum3;
387bcc973b7SBarry Smith   }
388bcc973b7SBarry Smith 
389dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3903649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3911ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
392bcc973b7SBarry Smith   PetscFunctionReturn(0);
393bcc973b7SBarry Smith }
394bcc973b7SBarry Smith 
3954a2ae208SSatish Balay #undef __FUNCT__
396b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
397dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
398bcc973b7SBarry Smith {
3994cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
400bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
401d2840e09SBarry Smith   const PetscScalar *x,*v;
402d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
403dfbe8321SBarry Smith   PetscErrorCode    ierr;
404d2840e09SBarry Smith   PetscInt          n,i;
405d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
406bcc973b7SBarry Smith 
407bcc973b7SBarry Smith   PetscFunctionBegin;
408d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
4093649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4101ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
411bfec09a0SHong Zhang 
412bcc973b7SBarry Smith   for (i=0; i<m; i++) {
413bfec09a0SHong Zhang     idx    = a->j + a->i[i];
414bfec09a0SHong Zhang     v      = a->a + a->i[i];
415bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
416bcc973b7SBarry Smith     alpha1 = x[3*i];
417bcc973b7SBarry Smith     alpha2 = x[3*i+1];
418bcc973b7SBarry Smith     alpha3 = x[3*i+2];
419bcc973b7SBarry Smith     while (n-->0) {
420bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
421bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
422bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
423bcc973b7SBarry Smith       idx++; v++;
424bcc973b7SBarry Smith     }
425bcc973b7SBarry Smith   }
426dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4273649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4281ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
429bcc973b7SBarry Smith   PetscFunctionReturn(0);
430bcc973b7SBarry Smith }
431bcc973b7SBarry Smith 
4324a2ae208SSatish Balay #undef __FUNCT__
433b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
434dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
435bcc973b7SBarry Smith {
4364cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
437bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
438d2840e09SBarry Smith   const PetscScalar *x,*v;
439d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
440dfbe8321SBarry Smith   PetscErrorCode    ierr;
441b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
442d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
443bcc973b7SBarry Smith 
444bcc973b7SBarry Smith   PetscFunctionBegin;
445f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4463649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
448bcc973b7SBarry Smith   idx  = a->j;
449bcc973b7SBarry Smith   v    = a->a;
450bcc973b7SBarry Smith   ii   = a->i;
451bcc973b7SBarry Smith 
452bcc973b7SBarry Smith   for (i=0; i<m; i++) {
453bcc973b7SBarry Smith     jrow = ii[i];
454bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
455bcc973b7SBarry Smith     sum1  = 0.0;
456bcc973b7SBarry Smith     sum2  = 0.0;
457bcc973b7SBarry Smith     sum3  = 0.0;
458bcc973b7SBarry Smith     for (j=0; j<n; j++) {
459bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
460bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
461bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
462bcc973b7SBarry Smith       jrow++;
463bcc973b7SBarry Smith      }
464bcc973b7SBarry Smith     y[3*i]   += sum1;
465bcc973b7SBarry Smith     y[3*i+1] += sum2;
466bcc973b7SBarry Smith     y[3*i+2] += sum3;
467bcc973b7SBarry Smith   }
468bcc973b7SBarry Smith 
469dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4711ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
472bcc973b7SBarry Smith   PetscFunctionReturn(0);
473bcc973b7SBarry Smith }
4744a2ae208SSatish Balay #undef __FUNCT__
475b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
476dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
477bcc973b7SBarry Smith {
4784cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
479bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
480d2840e09SBarry Smith   const PetscScalar *x,*v;
481d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
482dfbe8321SBarry Smith   PetscErrorCode    ierr;
483d2840e09SBarry Smith   PetscInt          n,i;
484d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
485bcc973b7SBarry Smith 
486bcc973b7SBarry Smith   PetscFunctionBegin;
487f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4883649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4891ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
490bcc973b7SBarry Smith   for (i=0; i<m; i++) {
491bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
492bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
493bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
494bcc973b7SBarry Smith     alpha1 = x[3*i];
495bcc973b7SBarry Smith     alpha2 = x[3*i+1];
496bcc973b7SBarry Smith     alpha3 = x[3*i+2];
497bcc973b7SBarry Smith     while (n-->0) {
498bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
499bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
500bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
501bcc973b7SBarry Smith       idx++; v++;
502bcc973b7SBarry Smith     }
503bcc973b7SBarry Smith   }
504dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
5053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
507bcc973b7SBarry Smith   PetscFunctionReturn(0);
508bcc973b7SBarry Smith }
509bcc973b7SBarry Smith 
510bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
5114a2ae208SSatish Balay #undef __FUNCT__
512b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
513dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
514bcc973b7SBarry Smith {
5154cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
516bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
517d2840e09SBarry Smith   const PetscScalar *x,*v;
518d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
519dfbe8321SBarry Smith   PetscErrorCode    ierr;
520d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
521d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
522bcc973b7SBarry Smith 
523bcc973b7SBarry Smith   PetscFunctionBegin;
5243649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5251ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
526bcc973b7SBarry Smith   idx  = a->j;
527bcc973b7SBarry Smith   v    = a->a;
528bcc973b7SBarry Smith   ii   = a->i;
529bcc973b7SBarry Smith 
530bcc973b7SBarry Smith   for (i=0; i<m; i++) {
531bcc973b7SBarry Smith     jrow = ii[i];
532bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
533bcc973b7SBarry Smith     sum1  = 0.0;
534bcc973b7SBarry Smith     sum2  = 0.0;
535bcc973b7SBarry Smith     sum3  = 0.0;
536bcc973b7SBarry Smith     sum4  = 0.0;
537b3c51c66SMatthew Knepley     nonzerorow += (n>0);
538bcc973b7SBarry Smith     for (j=0; j<n; j++) {
539bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
540bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
541bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
542bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
543bcc973b7SBarry Smith       jrow++;
544bcc973b7SBarry Smith      }
545bcc973b7SBarry Smith     y[4*i]   = sum1;
546bcc973b7SBarry Smith     y[4*i+1] = sum2;
547bcc973b7SBarry Smith     y[4*i+2] = sum3;
548bcc973b7SBarry Smith     y[4*i+3] = sum4;
549bcc973b7SBarry Smith   }
550bcc973b7SBarry Smith 
551dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5523649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5531ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
554bcc973b7SBarry Smith   PetscFunctionReturn(0);
555bcc973b7SBarry Smith }
556bcc973b7SBarry Smith 
5574a2ae208SSatish Balay #undef __FUNCT__
558b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
559dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
560bcc973b7SBarry Smith {
5614cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
562bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
563d2840e09SBarry Smith   const PetscScalar *x,*v;
564d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
565dfbe8321SBarry Smith   PetscErrorCode    ierr;
566d2840e09SBarry Smith   PetscInt          n,i;
567d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
568bcc973b7SBarry Smith 
569bcc973b7SBarry Smith   PetscFunctionBegin;
570d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5713649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5721ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
573bcc973b7SBarry Smith   for (i=0; i<m; i++) {
574bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
575bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
576bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
577bcc973b7SBarry Smith     alpha1 = x[4*i];
578bcc973b7SBarry Smith     alpha2 = x[4*i+1];
579bcc973b7SBarry Smith     alpha3 = x[4*i+2];
580bcc973b7SBarry Smith     alpha4 = x[4*i+3];
581bcc973b7SBarry Smith     while (n-->0) {
582bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
583bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
584bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
585bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
586bcc973b7SBarry Smith       idx++; v++;
587bcc973b7SBarry Smith     }
588bcc973b7SBarry Smith   }
589dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5903649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5911ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
592bcc973b7SBarry Smith   PetscFunctionReturn(0);
593bcc973b7SBarry Smith }
594bcc973b7SBarry Smith 
5954a2ae208SSatish Balay #undef __FUNCT__
596b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
597dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
598bcc973b7SBarry Smith {
5994cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
600f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
601d2840e09SBarry Smith   const PetscScalar *x,*v;
602d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
603dfbe8321SBarry Smith   PetscErrorCode    ierr;
604b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
605d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
606f9fae5adSBarry Smith 
607f9fae5adSBarry Smith   PetscFunctionBegin;
608f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6093649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6101ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
611f9fae5adSBarry Smith   idx  = a->j;
612f9fae5adSBarry Smith   v    = a->a;
613f9fae5adSBarry Smith   ii   = a->i;
614f9fae5adSBarry Smith 
615f9fae5adSBarry Smith   for (i=0; i<m; i++) {
616f9fae5adSBarry Smith     jrow = ii[i];
617f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
618f9fae5adSBarry Smith     sum1  = 0.0;
619f9fae5adSBarry Smith     sum2  = 0.0;
620f9fae5adSBarry Smith     sum3  = 0.0;
621f9fae5adSBarry Smith     sum4  = 0.0;
622f9fae5adSBarry Smith     for (j=0; j<n; j++) {
623f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
624f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
625f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
626f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
627f9fae5adSBarry Smith       jrow++;
628f9fae5adSBarry Smith      }
629f9fae5adSBarry Smith     y[4*i]   += sum1;
630f9fae5adSBarry Smith     y[4*i+1] += sum2;
631f9fae5adSBarry Smith     y[4*i+2] += sum3;
632f9fae5adSBarry Smith     y[4*i+3] += sum4;
633f9fae5adSBarry Smith   }
634f9fae5adSBarry Smith 
635dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6363649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6371ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
638f9fae5adSBarry Smith   PetscFunctionReturn(0);
639bcc973b7SBarry Smith }
6404a2ae208SSatish Balay #undef __FUNCT__
641b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
642dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
643bcc973b7SBarry Smith {
6444cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
645f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
646d2840e09SBarry Smith   const PetscScalar *x,*v;
647d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
648dfbe8321SBarry Smith   PetscErrorCode    ierr;
649d2840e09SBarry Smith   PetscInt          n,i;
650d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
651f9fae5adSBarry Smith 
652f9fae5adSBarry Smith   PetscFunctionBegin;
653f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6543649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6551ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
656bfec09a0SHong Zhang 
657f9fae5adSBarry Smith   for (i=0; i<m; i++) {
658bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
659bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
660f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
661f9fae5adSBarry Smith     alpha1 = x[4*i];
662f9fae5adSBarry Smith     alpha2 = x[4*i+1];
663f9fae5adSBarry Smith     alpha3 = x[4*i+2];
664f9fae5adSBarry Smith     alpha4 = x[4*i+3];
665f9fae5adSBarry Smith     while (n-->0) {
666f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
667f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
668f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
669f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
670f9fae5adSBarry Smith       idx++; v++;
671f9fae5adSBarry Smith     }
672f9fae5adSBarry Smith   }
673dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6743649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6751ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
676f9fae5adSBarry Smith   PetscFunctionReturn(0);
677f9fae5adSBarry Smith }
678f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6796cd98798SBarry Smith 
6804a2ae208SSatish Balay #undef __FUNCT__
681b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
682dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
683f9fae5adSBarry Smith {
6844cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
685f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
686d2840e09SBarry Smith   const PetscScalar *x,*v;
687d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
688dfbe8321SBarry Smith   PetscErrorCode    ierr;
689d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
690d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
691f9fae5adSBarry Smith 
692f9fae5adSBarry Smith   PetscFunctionBegin;
6933649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6941ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
695f9fae5adSBarry Smith   idx  = a->j;
696f9fae5adSBarry Smith   v    = a->a;
697f9fae5adSBarry Smith   ii   = a->i;
698f9fae5adSBarry Smith 
699f9fae5adSBarry Smith   for (i=0; i<m; i++) {
700f9fae5adSBarry Smith     jrow = ii[i];
701f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
702f9fae5adSBarry Smith     sum1  = 0.0;
703f9fae5adSBarry Smith     sum2  = 0.0;
704f9fae5adSBarry Smith     sum3  = 0.0;
705f9fae5adSBarry Smith     sum4  = 0.0;
706f9fae5adSBarry Smith     sum5  = 0.0;
707b3c51c66SMatthew Knepley     nonzerorow += (n>0);
708f9fae5adSBarry Smith     for (j=0; j<n; j++) {
709f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
710f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
711f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
712f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
713f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
714f9fae5adSBarry Smith       jrow++;
715f9fae5adSBarry Smith      }
716f9fae5adSBarry Smith     y[5*i]   = sum1;
717f9fae5adSBarry Smith     y[5*i+1] = sum2;
718f9fae5adSBarry Smith     y[5*i+2] = sum3;
719f9fae5adSBarry Smith     y[5*i+3] = sum4;
720f9fae5adSBarry Smith     y[5*i+4] = sum5;
721f9fae5adSBarry Smith   }
722f9fae5adSBarry Smith 
723dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
7243649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7251ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
726f9fae5adSBarry Smith   PetscFunctionReturn(0);
727f9fae5adSBarry Smith }
728f9fae5adSBarry Smith 
7294a2ae208SSatish Balay #undef __FUNCT__
730b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
731dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
732f9fae5adSBarry Smith {
7334cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
734f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
735d2840e09SBarry Smith   const PetscScalar *x,*v;
736d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
737dfbe8321SBarry Smith   PetscErrorCode    ierr;
738d2840e09SBarry Smith   PetscInt          n,i;
739d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
740f9fae5adSBarry Smith 
741f9fae5adSBarry Smith   PetscFunctionBegin;
742d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7433649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7441ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
745bfec09a0SHong Zhang 
746f9fae5adSBarry Smith   for (i=0; i<m; i++) {
747bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
748bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
749f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
750f9fae5adSBarry Smith     alpha1 = x[5*i];
751f9fae5adSBarry Smith     alpha2 = x[5*i+1];
752f9fae5adSBarry Smith     alpha3 = x[5*i+2];
753f9fae5adSBarry Smith     alpha4 = x[5*i+3];
754f9fae5adSBarry Smith     alpha5 = x[5*i+4];
755f9fae5adSBarry Smith     while (n-->0) {
756f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
757f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
758f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
759f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
760f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
761f9fae5adSBarry Smith       idx++; v++;
762f9fae5adSBarry Smith     }
763f9fae5adSBarry Smith   }
764dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7661ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
767f9fae5adSBarry Smith   PetscFunctionReturn(0);
768f9fae5adSBarry Smith }
769f9fae5adSBarry Smith 
7704a2ae208SSatish Balay #undef __FUNCT__
771b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
772dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
773f9fae5adSBarry Smith {
7744cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
775f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
776d2840e09SBarry Smith   const PetscScalar *x,*v;
777d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
778dfbe8321SBarry Smith   PetscErrorCode    ierr;
779b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
780d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
781f9fae5adSBarry Smith 
782f9fae5adSBarry Smith   PetscFunctionBegin;
783f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
786f9fae5adSBarry Smith   idx  = a->j;
787f9fae5adSBarry Smith   v    = a->a;
788f9fae5adSBarry Smith   ii   = a->i;
789f9fae5adSBarry Smith 
790f9fae5adSBarry Smith   for (i=0; i<m; i++) {
791f9fae5adSBarry Smith     jrow = ii[i];
792f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
793f9fae5adSBarry Smith     sum1  = 0.0;
794f9fae5adSBarry Smith     sum2  = 0.0;
795f9fae5adSBarry Smith     sum3  = 0.0;
796f9fae5adSBarry Smith     sum4  = 0.0;
797f9fae5adSBarry Smith     sum5  = 0.0;
798f9fae5adSBarry Smith     for (j=0; j<n; j++) {
799f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
800f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
801f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
802f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
803f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
804f9fae5adSBarry Smith       jrow++;
805f9fae5adSBarry Smith      }
806f9fae5adSBarry Smith     y[5*i]   += sum1;
807f9fae5adSBarry Smith     y[5*i+1] += sum2;
808f9fae5adSBarry Smith     y[5*i+2] += sum3;
809f9fae5adSBarry Smith     y[5*i+3] += sum4;
810f9fae5adSBarry Smith     y[5*i+4] += sum5;
811f9fae5adSBarry Smith   }
812f9fae5adSBarry Smith 
813dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8143649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8151ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
816f9fae5adSBarry Smith   PetscFunctionReturn(0);
817f9fae5adSBarry Smith }
818f9fae5adSBarry Smith 
8194a2ae208SSatish Balay #undef __FUNCT__
820b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
821dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
822f9fae5adSBarry Smith {
8234cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
824f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
825d2840e09SBarry Smith   const PetscScalar *x,*v;
826d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
827dfbe8321SBarry Smith   PetscErrorCode    ierr;
828d2840e09SBarry Smith   PetscInt          n,i;
829d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
830f9fae5adSBarry Smith 
831f9fae5adSBarry Smith   PetscFunctionBegin;
832f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8333649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
835bfec09a0SHong Zhang 
836f9fae5adSBarry Smith   for (i=0; i<m; i++) {
837bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
838bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
839f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
840f9fae5adSBarry Smith     alpha1 = x[5*i];
841f9fae5adSBarry Smith     alpha2 = x[5*i+1];
842f9fae5adSBarry Smith     alpha3 = x[5*i+2];
843f9fae5adSBarry Smith     alpha4 = x[5*i+3];
844f9fae5adSBarry Smith     alpha5 = x[5*i+4];
845f9fae5adSBarry Smith     while (n-->0) {
846f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
847f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
848f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
849f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
850f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
851f9fae5adSBarry Smith       idx++; v++;
852f9fae5adSBarry Smith     }
853f9fae5adSBarry Smith   }
854dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8553649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8561ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
857f9fae5adSBarry Smith   PetscFunctionReturn(0);
858bcc973b7SBarry Smith }
859bcc973b7SBarry Smith 
8606cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8614a2ae208SSatish Balay #undef __FUNCT__
862b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
863dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8646cd98798SBarry Smith {
8656cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8666cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
867d2840e09SBarry Smith   const PetscScalar *x,*v;
868d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
869dfbe8321SBarry Smith   PetscErrorCode    ierr;
870d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
871d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
8726cd98798SBarry Smith 
8736cd98798SBarry Smith   PetscFunctionBegin;
8743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8751ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8766cd98798SBarry Smith   idx  = a->j;
8776cd98798SBarry Smith   v    = a->a;
8786cd98798SBarry Smith   ii   = a->i;
8796cd98798SBarry Smith 
8806cd98798SBarry Smith   for (i=0; i<m; i++) {
8816cd98798SBarry Smith     jrow = ii[i];
8826cd98798SBarry Smith     n    = ii[i+1] - jrow;
8836cd98798SBarry Smith     sum1  = 0.0;
8846cd98798SBarry Smith     sum2  = 0.0;
8856cd98798SBarry Smith     sum3  = 0.0;
8866cd98798SBarry Smith     sum4  = 0.0;
8876cd98798SBarry Smith     sum5  = 0.0;
8886cd98798SBarry Smith     sum6  = 0.0;
889b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8906cd98798SBarry Smith     for (j=0; j<n; j++) {
8916cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8926cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8936cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8946cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8956cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8966cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8976cd98798SBarry Smith       jrow++;
8986cd98798SBarry Smith      }
8996cd98798SBarry Smith     y[6*i]   = sum1;
9006cd98798SBarry Smith     y[6*i+1] = sum2;
9016cd98798SBarry Smith     y[6*i+2] = sum3;
9026cd98798SBarry Smith     y[6*i+3] = sum4;
9036cd98798SBarry Smith     y[6*i+4] = sum5;
9046cd98798SBarry Smith     y[6*i+5] = sum6;
9056cd98798SBarry Smith   }
9066cd98798SBarry Smith 
907dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
9083649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9091ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9106cd98798SBarry Smith   PetscFunctionReturn(0);
9116cd98798SBarry Smith }
9126cd98798SBarry Smith 
9134a2ae208SSatish Balay #undef __FUNCT__
914b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
915dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
9166cd98798SBarry Smith {
9176cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9186cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
919d2840e09SBarry Smith   const PetscScalar *x,*v;
920d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
921dfbe8321SBarry Smith   PetscErrorCode    ierr;
922d2840e09SBarry Smith   PetscInt          n,i;
923d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9246cd98798SBarry Smith 
9256cd98798SBarry Smith   PetscFunctionBegin;
926d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
9273649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9281ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
929bfec09a0SHong Zhang 
9306cd98798SBarry Smith   for (i=0; i<m; i++) {
931bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
932bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9336cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9346cd98798SBarry Smith     alpha1 = x[6*i];
9356cd98798SBarry Smith     alpha2 = x[6*i+1];
9366cd98798SBarry Smith     alpha3 = x[6*i+2];
9376cd98798SBarry Smith     alpha4 = x[6*i+3];
9386cd98798SBarry Smith     alpha5 = x[6*i+4];
9396cd98798SBarry Smith     alpha6 = x[6*i+5];
9406cd98798SBarry Smith     while (n-->0) {
9416cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9426cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9436cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9446cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9456cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9466cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9476cd98798SBarry Smith       idx++; v++;
9486cd98798SBarry Smith     }
9496cd98798SBarry Smith   }
950dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9513649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9521ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9536cd98798SBarry Smith   PetscFunctionReturn(0);
9546cd98798SBarry Smith }
9556cd98798SBarry Smith 
9564a2ae208SSatish Balay #undef __FUNCT__
957b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
958dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9596cd98798SBarry Smith {
9606cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9616cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
962d2840e09SBarry Smith   const PetscScalar *x,*v;
963d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
964dfbe8321SBarry Smith   PetscErrorCode    ierr;
965b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
966d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9676cd98798SBarry Smith 
9686cd98798SBarry Smith   PetscFunctionBegin;
9696cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9703649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9726cd98798SBarry Smith   idx  = a->j;
9736cd98798SBarry Smith   v    = a->a;
9746cd98798SBarry Smith   ii   = a->i;
9756cd98798SBarry Smith 
9766cd98798SBarry Smith   for (i=0; i<m; i++) {
9776cd98798SBarry Smith     jrow = ii[i];
9786cd98798SBarry Smith     n    = ii[i+1] - jrow;
9796cd98798SBarry Smith     sum1  = 0.0;
9806cd98798SBarry Smith     sum2  = 0.0;
9816cd98798SBarry Smith     sum3  = 0.0;
9826cd98798SBarry Smith     sum4  = 0.0;
9836cd98798SBarry Smith     sum5  = 0.0;
9846cd98798SBarry Smith     sum6  = 0.0;
9856cd98798SBarry Smith     for (j=0; j<n; j++) {
9866cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9876cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9886cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9896cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9906cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9916cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9926cd98798SBarry Smith       jrow++;
9936cd98798SBarry Smith      }
9946cd98798SBarry Smith     y[6*i]   += sum1;
9956cd98798SBarry Smith     y[6*i+1] += sum2;
9966cd98798SBarry Smith     y[6*i+2] += sum3;
9976cd98798SBarry Smith     y[6*i+3] += sum4;
9986cd98798SBarry Smith     y[6*i+4] += sum5;
9996cd98798SBarry Smith     y[6*i+5] += sum6;
10006cd98798SBarry Smith   }
10016cd98798SBarry Smith 
1002dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10041ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10056cd98798SBarry Smith   PetscFunctionReturn(0);
10066cd98798SBarry Smith }
10076cd98798SBarry Smith 
10084a2ae208SSatish Balay #undef __FUNCT__
1009b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1010dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
10116cd98798SBarry Smith {
10126cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
10136cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1014d2840e09SBarry Smith   const PetscScalar *x,*v;
1015d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1016dfbe8321SBarry Smith   PetscErrorCode    ierr;
1017d2840e09SBarry Smith   PetscInt          n,i;
1018d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
10196cd98798SBarry Smith 
10206cd98798SBarry Smith   PetscFunctionBegin;
10216cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
10231ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1024bfec09a0SHong Zhang 
10256cd98798SBarry Smith   for (i=0; i<m; i++) {
1026bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1027bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
10286cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
10296cd98798SBarry Smith     alpha1 = x[6*i];
10306cd98798SBarry Smith     alpha2 = x[6*i+1];
10316cd98798SBarry Smith     alpha3 = x[6*i+2];
10326cd98798SBarry Smith     alpha4 = x[6*i+3];
10336cd98798SBarry Smith     alpha5 = x[6*i+4];
10346cd98798SBarry Smith     alpha6 = x[6*i+5];
10356cd98798SBarry Smith     while (n-->0) {
10366cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10376cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10386cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10396cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10406cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10416cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10426cd98798SBarry Smith       idx++; v++;
10436cd98798SBarry Smith     }
10446cd98798SBarry Smith   }
1045dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10463649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10471ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10486cd98798SBarry Smith   PetscFunctionReturn(0);
10496cd98798SBarry Smith }
10506cd98798SBarry Smith 
105166ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
105266ed3db0SBarry Smith #undef __FUNCT__
1053ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
1054ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1055ed8eea03SBarry Smith {
1056ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1057ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1058d2840e09SBarry Smith   const PetscScalar *x,*v;
1059d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1060ed8eea03SBarry Smith   PetscErrorCode    ierr;
1061d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1062d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1063ed8eea03SBarry Smith 
1064ed8eea03SBarry Smith   PetscFunctionBegin;
10653649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1066ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1067ed8eea03SBarry Smith   idx  = a->j;
1068ed8eea03SBarry Smith   v    = a->a;
1069ed8eea03SBarry Smith   ii   = a->i;
1070ed8eea03SBarry Smith 
1071ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1072ed8eea03SBarry Smith     jrow = ii[i];
1073ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1074ed8eea03SBarry Smith     sum1  = 0.0;
1075ed8eea03SBarry Smith     sum2  = 0.0;
1076ed8eea03SBarry Smith     sum3  = 0.0;
1077ed8eea03SBarry Smith     sum4  = 0.0;
1078ed8eea03SBarry Smith     sum5  = 0.0;
1079ed8eea03SBarry Smith     sum6  = 0.0;
1080ed8eea03SBarry Smith     sum7  = 0.0;
1081b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1082ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1083ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1084ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1085ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1086ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1087ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1088ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1089ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1090ed8eea03SBarry Smith       jrow++;
1091ed8eea03SBarry Smith      }
1092ed8eea03SBarry Smith     y[7*i]   = sum1;
1093ed8eea03SBarry Smith     y[7*i+1] = sum2;
1094ed8eea03SBarry Smith     y[7*i+2] = sum3;
1095ed8eea03SBarry Smith     y[7*i+3] = sum4;
1096ed8eea03SBarry Smith     y[7*i+4] = sum5;
1097ed8eea03SBarry Smith     y[7*i+5] = sum6;
1098ed8eea03SBarry Smith     y[7*i+6] = sum7;
1099ed8eea03SBarry Smith   }
1100ed8eea03SBarry Smith 
1101dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
11023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1103ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1104ed8eea03SBarry Smith   PetscFunctionReturn(0);
1105ed8eea03SBarry Smith }
1106ed8eea03SBarry Smith 
1107ed8eea03SBarry Smith #undef __FUNCT__
1108ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1109ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1110ed8eea03SBarry Smith {
1111ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1112ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1113d2840e09SBarry Smith   const PetscScalar *x,*v;
1114d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1115ed8eea03SBarry Smith   PetscErrorCode    ierr;
1116d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1117d2840e09SBarry Smith   PetscInt          n,i;
1118ed8eea03SBarry Smith 
1119ed8eea03SBarry Smith   PetscFunctionBegin;
1120d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
11213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1122ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1123ed8eea03SBarry Smith 
1124ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1125ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1126ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1127ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1128ed8eea03SBarry Smith     alpha1 = x[7*i];
1129ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1130ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1131ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1132ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1133ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1134ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1135ed8eea03SBarry Smith     while (n-->0) {
1136ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1137ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1138ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1139ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1140ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1141ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1142ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1143ed8eea03SBarry Smith       idx++; v++;
1144ed8eea03SBarry Smith     }
1145ed8eea03SBarry Smith   }
1146dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11473649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1148ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1149ed8eea03SBarry Smith   PetscFunctionReturn(0);
1150ed8eea03SBarry Smith }
1151ed8eea03SBarry Smith 
1152ed8eea03SBarry Smith #undef __FUNCT__
1153ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1154ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1155ed8eea03SBarry Smith {
1156ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1157ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1158d2840e09SBarry Smith   const PetscScalar *x,*v;
1159d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1160ed8eea03SBarry Smith   PetscErrorCode    ierr;
1161d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1162ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1163ed8eea03SBarry Smith 
1164ed8eea03SBarry Smith   PetscFunctionBegin;
1165ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11663649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1167ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1168ed8eea03SBarry Smith   idx  = a->j;
1169ed8eea03SBarry Smith   v    = a->a;
1170ed8eea03SBarry Smith   ii   = a->i;
1171ed8eea03SBarry Smith 
1172ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1173ed8eea03SBarry Smith     jrow = ii[i];
1174ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1175ed8eea03SBarry Smith     sum1  = 0.0;
1176ed8eea03SBarry Smith     sum2  = 0.0;
1177ed8eea03SBarry Smith     sum3  = 0.0;
1178ed8eea03SBarry Smith     sum4  = 0.0;
1179ed8eea03SBarry Smith     sum5  = 0.0;
1180ed8eea03SBarry Smith     sum6  = 0.0;
1181ed8eea03SBarry Smith     sum7  = 0.0;
1182ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1183ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1184ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1185ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1186ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1187ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1188ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1189ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1190ed8eea03SBarry Smith       jrow++;
1191ed8eea03SBarry Smith      }
1192ed8eea03SBarry Smith     y[7*i]   += sum1;
1193ed8eea03SBarry Smith     y[7*i+1] += sum2;
1194ed8eea03SBarry Smith     y[7*i+2] += sum3;
1195ed8eea03SBarry Smith     y[7*i+3] += sum4;
1196ed8eea03SBarry Smith     y[7*i+4] += sum5;
1197ed8eea03SBarry Smith     y[7*i+5] += sum6;
1198ed8eea03SBarry Smith     y[7*i+6] += sum7;
1199ed8eea03SBarry Smith   }
1200ed8eea03SBarry Smith 
1201dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1203ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1204ed8eea03SBarry Smith   PetscFunctionReturn(0);
1205ed8eea03SBarry Smith }
1206ed8eea03SBarry Smith 
1207ed8eea03SBarry Smith #undef __FUNCT__
1208ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1209ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1210ed8eea03SBarry Smith {
1211ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1212ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1213d2840e09SBarry Smith   const PetscScalar *x,*v;
1214d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1215ed8eea03SBarry Smith   PetscErrorCode    ierr;
1216d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1217d2840e09SBarry Smith   PetscInt          n,i;
1218ed8eea03SBarry Smith 
1219ed8eea03SBarry Smith   PetscFunctionBegin;
1220ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1222ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1223ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1224ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1225ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1226ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1227ed8eea03SBarry Smith     alpha1 = x[7*i];
1228ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1229ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1230ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1231ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1232ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1233ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1234ed8eea03SBarry Smith     while (n-->0) {
1235ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1236ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1237ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1238ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1239ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1240ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1241ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1242ed8eea03SBarry Smith       idx++; v++;
1243ed8eea03SBarry Smith     }
1244ed8eea03SBarry Smith   }
1245dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12463649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1247ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1248ed8eea03SBarry Smith   PetscFunctionReturn(0);
1249ed8eea03SBarry Smith }
1250ed8eea03SBarry Smith 
1251ed8eea03SBarry Smith #undef __FUNCT__
125266ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1253dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
125466ed3db0SBarry Smith {
125566ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
125666ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1257d2840e09SBarry Smith   const PetscScalar *x,*v;
1258d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1259dfbe8321SBarry Smith   PetscErrorCode    ierr;
1260d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1261d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
126266ed3db0SBarry Smith 
126366ed3db0SBarry Smith   PetscFunctionBegin;
12643649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12651ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
126666ed3db0SBarry Smith   idx  = a->j;
126766ed3db0SBarry Smith   v    = a->a;
126866ed3db0SBarry Smith   ii   = a->i;
126966ed3db0SBarry Smith 
127066ed3db0SBarry Smith   for (i=0; i<m; i++) {
127166ed3db0SBarry Smith     jrow = ii[i];
127266ed3db0SBarry Smith     n    = ii[i+1] - jrow;
127366ed3db0SBarry Smith     sum1  = 0.0;
127466ed3db0SBarry Smith     sum2  = 0.0;
127566ed3db0SBarry Smith     sum3  = 0.0;
127666ed3db0SBarry Smith     sum4  = 0.0;
127766ed3db0SBarry Smith     sum5  = 0.0;
127866ed3db0SBarry Smith     sum6  = 0.0;
127966ed3db0SBarry Smith     sum7  = 0.0;
128066ed3db0SBarry Smith     sum8  = 0.0;
1281b3c51c66SMatthew Knepley     nonzerorow += (n>0);
128266ed3db0SBarry Smith     for (j=0; j<n; j++) {
128366ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
128466ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
128566ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
128666ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
128766ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
128866ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
128966ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
129066ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
129166ed3db0SBarry Smith       jrow++;
129266ed3db0SBarry Smith      }
129366ed3db0SBarry Smith     y[8*i]   = sum1;
129466ed3db0SBarry Smith     y[8*i+1] = sum2;
129566ed3db0SBarry Smith     y[8*i+2] = sum3;
129666ed3db0SBarry Smith     y[8*i+3] = sum4;
129766ed3db0SBarry Smith     y[8*i+4] = sum5;
129866ed3db0SBarry Smith     y[8*i+5] = sum6;
129966ed3db0SBarry Smith     y[8*i+6] = sum7;
130066ed3db0SBarry Smith     y[8*i+7] = sum8;
130166ed3db0SBarry Smith   }
130266ed3db0SBarry Smith 
1303dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
13043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13051ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130666ed3db0SBarry Smith   PetscFunctionReturn(0);
130766ed3db0SBarry Smith }
130866ed3db0SBarry Smith 
130966ed3db0SBarry Smith #undef __FUNCT__
131066ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1311dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
131266ed3db0SBarry Smith {
131366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1315d2840e09SBarry Smith   const PetscScalar *x,*v;
1316d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1317dfbe8321SBarry Smith   PetscErrorCode    ierr;
1318d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1319d2840e09SBarry Smith   PetscInt          n,i;
132066ed3db0SBarry Smith 
132166ed3db0SBarry Smith   PetscFunctionBegin;
1322d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
13233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13241ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1325bfec09a0SHong Zhang 
132666ed3db0SBarry Smith   for (i=0; i<m; i++) {
1327bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1328bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
132966ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
133066ed3db0SBarry Smith     alpha1 = x[8*i];
133166ed3db0SBarry Smith     alpha2 = x[8*i+1];
133266ed3db0SBarry Smith     alpha3 = x[8*i+2];
133366ed3db0SBarry Smith     alpha4 = x[8*i+3];
133466ed3db0SBarry Smith     alpha5 = x[8*i+4];
133566ed3db0SBarry Smith     alpha6 = x[8*i+5];
133666ed3db0SBarry Smith     alpha7 = x[8*i+6];
133766ed3db0SBarry Smith     alpha8 = x[8*i+7];
133866ed3db0SBarry Smith     while (n-->0) {
133966ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
134066ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
134166ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
134266ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
134366ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
134466ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
134566ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
134666ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
134766ed3db0SBarry Smith       idx++; v++;
134866ed3db0SBarry Smith     }
134966ed3db0SBarry Smith   }
1350dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13513649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13521ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
135366ed3db0SBarry Smith   PetscFunctionReturn(0);
135466ed3db0SBarry Smith }
135566ed3db0SBarry Smith 
135666ed3db0SBarry Smith #undef __FUNCT__
135766ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1358dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
135966ed3db0SBarry Smith {
136066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1362d2840e09SBarry Smith   const PetscScalar *x,*v;
1363d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1364dfbe8321SBarry Smith   PetscErrorCode    ierr;
1365d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1366b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
136766ed3db0SBarry Smith 
136866ed3db0SBarry Smith   PetscFunctionBegin;
136966ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13703649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137266ed3db0SBarry Smith   idx  = a->j;
137366ed3db0SBarry Smith   v    = a->a;
137466ed3db0SBarry Smith   ii   = a->i;
137566ed3db0SBarry Smith 
137666ed3db0SBarry Smith   for (i=0; i<m; i++) {
137766ed3db0SBarry Smith     jrow = ii[i];
137866ed3db0SBarry Smith     n    = ii[i+1] - jrow;
137966ed3db0SBarry Smith     sum1  = 0.0;
138066ed3db0SBarry Smith     sum2  = 0.0;
138166ed3db0SBarry Smith     sum3  = 0.0;
138266ed3db0SBarry Smith     sum4  = 0.0;
138366ed3db0SBarry Smith     sum5  = 0.0;
138466ed3db0SBarry Smith     sum6  = 0.0;
138566ed3db0SBarry Smith     sum7  = 0.0;
138666ed3db0SBarry Smith     sum8  = 0.0;
138766ed3db0SBarry Smith     for (j=0; j<n; j++) {
138866ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
138966ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
139066ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
139166ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
139266ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
139366ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
139466ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
139566ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
139666ed3db0SBarry Smith       jrow++;
139766ed3db0SBarry Smith      }
139866ed3db0SBarry Smith     y[8*i]   += sum1;
139966ed3db0SBarry Smith     y[8*i+1] += sum2;
140066ed3db0SBarry Smith     y[8*i+2] += sum3;
140166ed3db0SBarry Smith     y[8*i+3] += sum4;
140266ed3db0SBarry Smith     y[8*i+4] += sum5;
140366ed3db0SBarry Smith     y[8*i+5] += sum6;
140466ed3db0SBarry Smith     y[8*i+6] += sum7;
140566ed3db0SBarry Smith     y[8*i+7] += sum8;
140666ed3db0SBarry Smith   }
140766ed3db0SBarry Smith 
1408dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14093649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14101ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
141166ed3db0SBarry Smith   PetscFunctionReturn(0);
141266ed3db0SBarry Smith }
141366ed3db0SBarry Smith 
141466ed3db0SBarry Smith #undef __FUNCT__
141566ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1416dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
141766ed3db0SBarry Smith {
141866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
141966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1420d2840e09SBarry Smith   const PetscScalar *x,*v;
1421d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1422dfbe8321SBarry Smith   PetscErrorCode    ierr;
1423d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1424d2840e09SBarry Smith   PetscInt          n,i;
142566ed3db0SBarry Smith 
142666ed3db0SBarry Smith   PetscFunctionBegin;
142766ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14283649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14291ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
143066ed3db0SBarry Smith   for (i=0; i<m; i++) {
1431bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1432bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
143366ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
143466ed3db0SBarry Smith     alpha1 = x[8*i];
143566ed3db0SBarry Smith     alpha2 = x[8*i+1];
143666ed3db0SBarry Smith     alpha3 = x[8*i+2];
143766ed3db0SBarry Smith     alpha4 = x[8*i+3];
143866ed3db0SBarry Smith     alpha5 = x[8*i+4];
143966ed3db0SBarry Smith     alpha6 = x[8*i+5];
144066ed3db0SBarry Smith     alpha7 = x[8*i+6];
144166ed3db0SBarry Smith     alpha8 = x[8*i+7];
144266ed3db0SBarry Smith     while (n-->0) {
144366ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
144466ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
144566ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
144666ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
144766ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
144866ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
144966ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
145066ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
145166ed3db0SBarry Smith       idx++; v++;
145266ed3db0SBarry Smith     }
145366ed3db0SBarry Smith   }
1454dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14553649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14561ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14572f7816d4SBarry Smith   PetscFunctionReturn(0);
14582f7816d4SBarry Smith }
14592f7816d4SBarry Smith 
14602b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
14612b692628SMatthew Knepley #undef __FUNCT__
14622b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1463dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14642b692628SMatthew Knepley {
14652b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14662b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1467d2840e09SBarry Smith   const PetscScalar *x,*v;
1468d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1469dfbe8321SBarry Smith   PetscErrorCode    ierr;
1470d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1471d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14722b692628SMatthew Knepley 
14732b692628SMatthew Knepley   PetscFunctionBegin;
14743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14751ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14762b692628SMatthew Knepley   idx  = a->j;
14772b692628SMatthew Knepley   v    = a->a;
14782b692628SMatthew Knepley   ii   = a->i;
14792b692628SMatthew Knepley 
14802b692628SMatthew Knepley   for (i=0; i<m; i++) {
14812b692628SMatthew Knepley     jrow = ii[i];
14822b692628SMatthew Knepley     n    = ii[i+1] - jrow;
14832b692628SMatthew Knepley     sum1  = 0.0;
14842b692628SMatthew Knepley     sum2  = 0.0;
14852b692628SMatthew Knepley     sum3  = 0.0;
14862b692628SMatthew Knepley     sum4  = 0.0;
14872b692628SMatthew Knepley     sum5  = 0.0;
14882b692628SMatthew Knepley     sum6  = 0.0;
14892b692628SMatthew Knepley     sum7  = 0.0;
14902b692628SMatthew Knepley     sum8  = 0.0;
14912b692628SMatthew Knepley     sum9  = 0.0;
1492b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14932b692628SMatthew Knepley     for (j=0; j<n; j++) {
14942b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14952b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14962b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14972b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14982b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14992b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15002b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15012b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15022b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15032b692628SMatthew Knepley       jrow++;
15042b692628SMatthew Knepley      }
15052b692628SMatthew Knepley     y[9*i]   = sum1;
15062b692628SMatthew Knepley     y[9*i+1] = sum2;
15072b692628SMatthew Knepley     y[9*i+2] = sum3;
15082b692628SMatthew Knepley     y[9*i+3] = sum4;
15092b692628SMatthew Knepley     y[9*i+4] = sum5;
15102b692628SMatthew Knepley     y[9*i+5] = sum6;
15112b692628SMatthew Knepley     y[9*i+6] = sum7;
15122b692628SMatthew Knepley     y[9*i+7] = sum8;
15132b692628SMatthew Knepley     y[9*i+8] = sum9;
15142b692628SMatthew Knepley   }
15152b692628SMatthew Knepley 
1516dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
15173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15181ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15192b692628SMatthew Knepley   PetscFunctionReturn(0);
15202b692628SMatthew Knepley }
15212b692628SMatthew Knepley 
1522b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1523b9cda22cSBarry Smith 
15242b692628SMatthew Knepley #undef __FUNCT__
15252b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1526dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
15272b692628SMatthew Knepley {
15282b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15292b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1530d2840e09SBarry Smith   const PetscScalar *x,*v;
1531d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1532dfbe8321SBarry Smith   PetscErrorCode    ierr;
1533d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1534d2840e09SBarry Smith   PetscInt          n,i;
15352b692628SMatthew Knepley 
15362b692628SMatthew Knepley   PetscFunctionBegin;
1537d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
15383649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15391ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15402b692628SMatthew Knepley 
15412b692628SMatthew Knepley   for (i=0; i<m; i++) {
15422b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15432b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15442b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15452b692628SMatthew Knepley     alpha1 = x[9*i];
15462b692628SMatthew Knepley     alpha2 = x[9*i+1];
15472b692628SMatthew Knepley     alpha3 = x[9*i+2];
15482b692628SMatthew Knepley     alpha4 = x[9*i+3];
15492b692628SMatthew Knepley     alpha5 = x[9*i+4];
15502b692628SMatthew Knepley     alpha6 = x[9*i+5];
15512b692628SMatthew Knepley     alpha7 = x[9*i+6];
15522b692628SMatthew Knepley     alpha8 = x[9*i+7];
15532f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15542b692628SMatthew Knepley     while (n-->0) {
15552b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15562b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15572b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15582b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15592b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15602b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15612b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15622b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15632b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15642b692628SMatthew Knepley       idx++; v++;
15652b692628SMatthew Knepley     }
15662b692628SMatthew Knepley   }
1567dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15691ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15702b692628SMatthew Knepley   PetscFunctionReturn(0);
15712b692628SMatthew Knepley }
15722b692628SMatthew Knepley 
15732b692628SMatthew Knepley #undef __FUNCT__
15742b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1575dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15762b692628SMatthew Knepley {
15772b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15782b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1579d2840e09SBarry Smith   const PetscScalar *x,*v;
1580d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1581dfbe8321SBarry Smith   PetscErrorCode    ierr;
1582d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1583b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15842b692628SMatthew Knepley 
15852b692628SMatthew Knepley   PetscFunctionBegin;
15862b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15881ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15892b692628SMatthew Knepley   idx  = a->j;
15902b692628SMatthew Knepley   v    = a->a;
15912b692628SMatthew Knepley   ii   = a->i;
15922b692628SMatthew Knepley 
15932b692628SMatthew Knepley   for (i=0; i<m; i++) {
15942b692628SMatthew Knepley     jrow = ii[i];
15952b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15962b692628SMatthew Knepley     sum1  = 0.0;
15972b692628SMatthew Knepley     sum2  = 0.0;
15982b692628SMatthew Knepley     sum3  = 0.0;
15992b692628SMatthew Knepley     sum4  = 0.0;
16002b692628SMatthew Knepley     sum5  = 0.0;
16012b692628SMatthew Knepley     sum6  = 0.0;
16022b692628SMatthew Knepley     sum7  = 0.0;
16032b692628SMatthew Knepley     sum8  = 0.0;
16042b692628SMatthew Knepley     sum9  = 0.0;
16052b692628SMatthew Knepley     for (j=0; j<n; j++) {
16062b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
16072b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
16082b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
16092b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
16102b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
16112b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
16122b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
16132b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
16142b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
16152b692628SMatthew Knepley       jrow++;
16162b692628SMatthew Knepley      }
16172b692628SMatthew Knepley     y[9*i]   += sum1;
16182b692628SMatthew Knepley     y[9*i+1] += sum2;
16192b692628SMatthew Knepley     y[9*i+2] += sum3;
16202b692628SMatthew Knepley     y[9*i+3] += sum4;
16212b692628SMatthew Knepley     y[9*i+4] += sum5;
16222b692628SMatthew Knepley     y[9*i+5] += sum6;
16232b692628SMatthew Knepley     y[9*i+6] += sum7;
16242b692628SMatthew Knepley     y[9*i+7] += sum8;
16252b692628SMatthew Knepley     y[9*i+8] += sum9;
16262b692628SMatthew Knepley   }
16272b692628SMatthew Knepley 
1628efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
16293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16301ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16312b692628SMatthew Knepley   PetscFunctionReturn(0);
16322b692628SMatthew Knepley }
16332b692628SMatthew Knepley 
16342b692628SMatthew Knepley #undef __FUNCT__
16352b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1636dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
16372b692628SMatthew Knepley {
16382b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
16392b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1640d2840e09SBarry Smith   const PetscScalar *x,*v;
1641d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1642dfbe8321SBarry Smith   PetscErrorCode    ierr;
1643d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1644d2840e09SBarry Smith   PetscInt          n,i;
16452b692628SMatthew Knepley 
16462b692628SMatthew Knepley   PetscFunctionBegin;
16472b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16483649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
16491ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16502b692628SMatthew Knepley   for (i=0; i<m; i++) {
16512b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
16522b692628SMatthew Knepley     v      = a->a + a->i[i] ;
16532b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
16542b692628SMatthew Knepley     alpha1 = x[9*i];
16552b692628SMatthew Knepley     alpha2 = x[9*i+1];
16562b692628SMatthew Knepley     alpha3 = x[9*i+2];
16572b692628SMatthew Knepley     alpha4 = x[9*i+3];
16582b692628SMatthew Knepley     alpha5 = x[9*i+4];
16592b692628SMatthew Knepley     alpha6 = x[9*i+5];
16602b692628SMatthew Knepley     alpha7 = x[9*i+6];
16612b692628SMatthew Knepley     alpha8 = x[9*i+7];
16622b692628SMatthew Knepley     alpha9 = x[9*i+8];
16632b692628SMatthew Knepley     while (n-->0) {
16642b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16652b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16662b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16672b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16682b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16692b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16702b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16712b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16722b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16732b692628SMatthew Knepley       idx++; v++;
16742b692628SMatthew Knepley     }
16752b692628SMatthew Knepley   }
1676dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16773649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16781ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16792b692628SMatthew Knepley   PetscFunctionReturn(0);
16802b692628SMatthew Knepley }
1681b9cda22cSBarry Smith #undef __FUNCT__
1682b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1683b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1684b9cda22cSBarry Smith {
1685b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1686b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1687d2840e09SBarry Smith   const PetscScalar *x,*v;
1688d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689b9cda22cSBarry Smith   PetscErrorCode    ierr;
1690d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1691d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1692b9cda22cSBarry Smith 
1693b9cda22cSBarry Smith   PetscFunctionBegin;
16943649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1695b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1696b9cda22cSBarry Smith   idx  = a->j;
1697b9cda22cSBarry Smith   v    = a->a;
1698b9cda22cSBarry Smith   ii   = a->i;
1699b9cda22cSBarry Smith 
1700b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1701b9cda22cSBarry Smith     jrow = ii[i];
1702b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1703b9cda22cSBarry Smith     sum1  = 0.0;
1704b9cda22cSBarry Smith     sum2  = 0.0;
1705b9cda22cSBarry Smith     sum3  = 0.0;
1706b9cda22cSBarry Smith     sum4  = 0.0;
1707b9cda22cSBarry Smith     sum5  = 0.0;
1708b9cda22cSBarry Smith     sum6  = 0.0;
1709b9cda22cSBarry Smith     sum7  = 0.0;
1710b9cda22cSBarry Smith     sum8  = 0.0;
1711b9cda22cSBarry Smith     sum9  = 0.0;
1712b9cda22cSBarry Smith     sum10 = 0.0;
1713b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1714b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1715b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1716b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1717b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1718b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1719b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1720b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1721b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1722b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1723b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1724b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1725b9cda22cSBarry Smith       jrow++;
1726b9cda22cSBarry Smith      }
1727b9cda22cSBarry Smith     y[10*i]   = sum1;
1728b9cda22cSBarry Smith     y[10*i+1] = sum2;
1729b9cda22cSBarry Smith     y[10*i+2] = sum3;
1730b9cda22cSBarry Smith     y[10*i+3] = sum4;
1731b9cda22cSBarry Smith     y[10*i+4] = sum5;
1732b9cda22cSBarry Smith     y[10*i+5] = sum6;
1733b9cda22cSBarry Smith     y[10*i+6] = sum7;
1734b9cda22cSBarry Smith     y[10*i+7] = sum8;
1735b9cda22cSBarry Smith     y[10*i+8] = sum9;
1736b9cda22cSBarry Smith     y[10*i+9] = sum10;
1737b9cda22cSBarry Smith   }
1738b9cda22cSBarry Smith 
1739dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
17403649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1741b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1742b9cda22cSBarry Smith   PetscFunctionReturn(0);
1743b9cda22cSBarry Smith }
1744b9cda22cSBarry Smith 
1745b9cda22cSBarry Smith #undef __FUNCT__
1746dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1747b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1748b9cda22cSBarry Smith {
1749b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1750b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1751d2840e09SBarry Smith   const PetscScalar *x,*v;
1752d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1753b9cda22cSBarry Smith   PetscErrorCode    ierr;
1754d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1755b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1756b9cda22cSBarry Smith 
1757b9cda22cSBarry Smith   PetscFunctionBegin;
1758b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
17593649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1760b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1761b9cda22cSBarry Smith   idx  = a->j;
1762b9cda22cSBarry Smith   v    = a->a;
1763b9cda22cSBarry Smith   ii   = a->i;
1764b9cda22cSBarry Smith 
1765b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1766b9cda22cSBarry Smith     jrow = ii[i];
1767b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1768b9cda22cSBarry Smith     sum1  = 0.0;
1769b9cda22cSBarry Smith     sum2  = 0.0;
1770b9cda22cSBarry Smith     sum3  = 0.0;
1771b9cda22cSBarry Smith     sum4  = 0.0;
1772b9cda22cSBarry Smith     sum5  = 0.0;
1773b9cda22cSBarry Smith     sum6  = 0.0;
1774b9cda22cSBarry Smith     sum7  = 0.0;
1775b9cda22cSBarry Smith     sum8  = 0.0;
1776b9cda22cSBarry Smith     sum9  = 0.0;
1777b9cda22cSBarry Smith     sum10 = 0.0;
1778b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1779b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1780b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1781b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1782b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1783b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1784b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1785b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1786b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1787b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1788b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1789b9cda22cSBarry Smith       jrow++;
1790b9cda22cSBarry Smith      }
1791b9cda22cSBarry Smith     y[10*i]   += sum1;
1792b9cda22cSBarry Smith     y[10*i+1] += sum2;
1793b9cda22cSBarry Smith     y[10*i+2] += sum3;
1794b9cda22cSBarry Smith     y[10*i+3] += sum4;
1795b9cda22cSBarry Smith     y[10*i+4] += sum5;
1796b9cda22cSBarry Smith     y[10*i+5] += sum6;
1797b9cda22cSBarry Smith     y[10*i+6] += sum7;
1798b9cda22cSBarry Smith     y[10*i+7] += sum8;
1799b9cda22cSBarry Smith     y[10*i+8] += sum9;
1800b9cda22cSBarry Smith     y[10*i+9] += sum10;
1801b9cda22cSBarry Smith   }
1802b9cda22cSBarry Smith 
1803dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1805b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1806b9cda22cSBarry Smith   PetscFunctionReturn(0);
1807b9cda22cSBarry Smith }
1808b9cda22cSBarry Smith 
1809b9cda22cSBarry Smith #undef __FUNCT__
1810b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1811b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1812b9cda22cSBarry Smith {
1813b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1814b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1815d2840e09SBarry Smith   const PetscScalar *x,*v;
1816d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1817b9cda22cSBarry Smith   PetscErrorCode    ierr;
1818d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1819d2840e09SBarry Smith   PetscInt          n,i;
1820b9cda22cSBarry Smith 
1821b9cda22cSBarry Smith   PetscFunctionBegin;
1822d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
18233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1824b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1825b9cda22cSBarry Smith 
1826b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1827b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1828b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1829b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1830e29fdc22SBarry Smith     alpha1 = x[10*i];
1831e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1832e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1833e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1834e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1835e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1836e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1837e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1838e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1839b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1840b9cda22cSBarry Smith     while (n-->0) {
1841e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1842e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1843e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1844e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1845e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1846e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1847e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1848e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1849e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1850b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1851b9cda22cSBarry Smith       idx++; v++;
1852b9cda22cSBarry Smith     }
1853b9cda22cSBarry Smith   }
1854dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18553649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1856b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1857b9cda22cSBarry Smith   PetscFunctionReturn(0);
1858b9cda22cSBarry Smith }
1859b9cda22cSBarry Smith 
1860b9cda22cSBarry Smith #undef __FUNCT__
1861b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1862b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1863b9cda22cSBarry Smith {
1864b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1865b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1866d2840e09SBarry Smith   const PetscScalar *x,*v;
1867d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1868b9cda22cSBarry Smith   PetscErrorCode    ierr;
1869d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1870d2840e09SBarry Smith   PetscInt          n,i;
1871b9cda22cSBarry Smith 
1872b9cda22cSBarry Smith   PetscFunctionBegin;
1873b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1875b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1876b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1877b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1878b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1879b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1880b9cda22cSBarry Smith     alpha1 = x[10*i];
1881b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1882b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1883b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1884b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1885b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1886b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1887b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1888b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1889b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1890b9cda22cSBarry Smith     while (n-->0) {
1891b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1892b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1893b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1894b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1895b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1896b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1897b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1898b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1899b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1900b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1901b9cda22cSBarry Smith       idx++; v++;
1902b9cda22cSBarry Smith     }
1903b9cda22cSBarry Smith   }
1904dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
19053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1906b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1907b9cda22cSBarry Smith   PetscFunctionReturn(0);
1908b9cda22cSBarry Smith }
1909b9cda22cSBarry Smith 
19102b692628SMatthew Knepley 
19112f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
19122f7816d4SBarry Smith #undef __FUNCT__
1913dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11"
1914dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1915dbdd7285SBarry Smith {
1916dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1917dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1918d2840e09SBarry Smith   const PetscScalar *x,*v;
1919d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1920dbdd7285SBarry Smith   PetscErrorCode    ierr;
1921d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1922d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1923dbdd7285SBarry Smith 
1924dbdd7285SBarry Smith   PetscFunctionBegin;
19253649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1926dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1927dbdd7285SBarry Smith   idx  = a->j;
1928dbdd7285SBarry Smith   v    = a->a;
1929dbdd7285SBarry Smith   ii   = a->i;
1930dbdd7285SBarry Smith 
1931dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1932dbdd7285SBarry Smith     jrow = ii[i];
1933dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
1934dbdd7285SBarry Smith     sum1  = 0.0;
1935dbdd7285SBarry Smith     sum2  = 0.0;
1936dbdd7285SBarry Smith     sum3  = 0.0;
1937dbdd7285SBarry Smith     sum4  = 0.0;
1938dbdd7285SBarry Smith     sum5  = 0.0;
1939dbdd7285SBarry Smith     sum6  = 0.0;
1940dbdd7285SBarry Smith     sum7  = 0.0;
1941dbdd7285SBarry Smith     sum8  = 0.0;
1942dbdd7285SBarry Smith     sum9  = 0.0;
1943dbdd7285SBarry Smith     sum10 = 0.0;
1944dbdd7285SBarry Smith     sum11 = 0.0;
1945dbdd7285SBarry Smith     nonzerorow += (n>0);
1946dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1947dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1948dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1949dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1950dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1951dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1952dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1953dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1954dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1955dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1956dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1957dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1958dbdd7285SBarry Smith       jrow++;
1959dbdd7285SBarry Smith      }
1960dbdd7285SBarry Smith     y[11*i]   = sum1;
1961dbdd7285SBarry Smith     y[11*i+1] = sum2;
1962dbdd7285SBarry Smith     y[11*i+2] = sum3;
1963dbdd7285SBarry Smith     y[11*i+3] = sum4;
1964dbdd7285SBarry Smith     y[11*i+4] = sum5;
1965dbdd7285SBarry Smith     y[11*i+5] = sum6;
1966dbdd7285SBarry Smith     y[11*i+6] = sum7;
1967dbdd7285SBarry Smith     y[11*i+7] = sum8;
1968dbdd7285SBarry Smith     y[11*i+8] = sum9;
1969dbdd7285SBarry Smith     y[11*i+9] = sum10;
1970dbdd7285SBarry Smith     y[11*i+10] = sum11;
1971dbdd7285SBarry Smith   }
1972dbdd7285SBarry Smith 
1973dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19743649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1975dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1976dbdd7285SBarry Smith   PetscFunctionReturn(0);
1977dbdd7285SBarry Smith }
1978dbdd7285SBarry Smith 
1979dbdd7285SBarry Smith #undef __FUNCT__
1980dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1981dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1982dbdd7285SBarry Smith {
1983dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1984dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1985d2840e09SBarry Smith   const PetscScalar *x,*v;
1986d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1987dbdd7285SBarry Smith   PetscErrorCode    ierr;
1988d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1989dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1990dbdd7285SBarry Smith 
1991dbdd7285SBarry Smith   PetscFunctionBegin;
1992dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19933649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1994dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1995dbdd7285SBarry Smith   idx  = a->j;
1996dbdd7285SBarry Smith   v    = a->a;
1997dbdd7285SBarry Smith   ii   = a->i;
1998dbdd7285SBarry Smith 
1999dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2000dbdd7285SBarry Smith     jrow = ii[i];
2001dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
2002dbdd7285SBarry Smith     sum1  = 0.0;
2003dbdd7285SBarry Smith     sum2  = 0.0;
2004dbdd7285SBarry Smith     sum3  = 0.0;
2005dbdd7285SBarry Smith     sum4  = 0.0;
2006dbdd7285SBarry Smith     sum5  = 0.0;
2007dbdd7285SBarry Smith     sum6  = 0.0;
2008dbdd7285SBarry Smith     sum7  = 0.0;
2009dbdd7285SBarry Smith     sum8  = 0.0;
2010dbdd7285SBarry Smith     sum9  = 0.0;
2011dbdd7285SBarry Smith     sum10 = 0.0;
2012dbdd7285SBarry Smith     sum11 = 0.0;
2013dbdd7285SBarry Smith     for (j=0; j<n; j++) {
2014dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
2015dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
2016dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
2017dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
2018dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
2019dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
2020dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
2021dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
2022dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
2023dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
2024dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
2025dbdd7285SBarry Smith       jrow++;
2026dbdd7285SBarry Smith      }
2027dbdd7285SBarry Smith     y[11*i]   += sum1;
2028dbdd7285SBarry Smith     y[11*i+1] += sum2;
2029dbdd7285SBarry Smith     y[11*i+2] += sum3;
2030dbdd7285SBarry Smith     y[11*i+3] += sum4;
2031dbdd7285SBarry Smith     y[11*i+4] += sum5;
2032dbdd7285SBarry Smith     y[11*i+5] += sum6;
2033dbdd7285SBarry Smith     y[11*i+6] += sum7;
2034dbdd7285SBarry Smith     y[11*i+7] += sum8;
2035dbdd7285SBarry Smith     y[11*i+8] += sum9;
2036dbdd7285SBarry Smith     y[11*i+9] += sum10;
2037dbdd7285SBarry Smith     y[11*i+10] += sum11;
2038dbdd7285SBarry Smith   }
2039dbdd7285SBarry Smith 
2040dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20413649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2042dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2043dbdd7285SBarry Smith   PetscFunctionReturn(0);
2044dbdd7285SBarry Smith }
2045dbdd7285SBarry Smith 
2046dbdd7285SBarry Smith #undef __FUNCT__
2047dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2048dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2049dbdd7285SBarry Smith {
2050dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2051dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2052d2840e09SBarry Smith   const PetscScalar *x,*v;
2053d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2054dbdd7285SBarry Smith   PetscErrorCode    ierr;
2055d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2056d2840e09SBarry Smith   PetscInt          n,i;
2057dbdd7285SBarry Smith 
2058dbdd7285SBarry Smith   PetscFunctionBegin;
2059d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
20603649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2061dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2062dbdd7285SBarry Smith 
2063dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2064dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2065dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2066dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2067dbdd7285SBarry Smith     alpha1 = x[11*i];
2068dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2069dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2070dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2071dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2072dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2073dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2074dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2075dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2076dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2077dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2078dbdd7285SBarry Smith     while (n-->0) {
2079dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2080dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2081dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2082dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2083dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2084dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2085dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2086dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2087dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2088dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2089dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2090dbdd7285SBarry Smith       idx++; v++;
2091dbdd7285SBarry Smith     }
2092dbdd7285SBarry Smith   }
2093dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20943649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2095dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2096dbdd7285SBarry Smith   PetscFunctionReturn(0);
2097dbdd7285SBarry Smith }
2098dbdd7285SBarry Smith 
2099dbdd7285SBarry Smith #undef __FUNCT__
2100dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2101dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2102dbdd7285SBarry Smith {
2103dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2104dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2105d2840e09SBarry Smith   const PetscScalar *x,*v;
2106d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2107dbdd7285SBarry Smith   PetscErrorCode    ierr;
2108d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2109d2840e09SBarry Smith   PetscInt          n,i;
2110dbdd7285SBarry Smith 
2111dbdd7285SBarry Smith   PetscFunctionBegin;
2112dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
21133649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2114dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2115dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2116dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2117dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2118dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2119dbdd7285SBarry Smith     alpha1 = x[11*i];
2120dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2121dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2122dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2123dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2124dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2125dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2126dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2127dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2128dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2129dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2130dbdd7285SBarry Smith     while (n-->0) {
2131dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2132dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2133dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2134dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2135dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2136dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2137dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2138dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2139dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2140dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2141dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2142dbdd7285SBarry Smith       idx++; v++;
2143dbdd7285SBarry Smith     }
2144dbdd7285SBarry Smith   }
2145dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
21463649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2147dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2148dbdd7285SBarry Smith   PetscFunctionReturn(0);
2149dbdd7285SBarry Smith }
2150dbdd7285SBarry Smith 
2151dbdd7285SBarry Smith 
2152dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2153dbdd7285SBarry Smith #undef __FUNCT__
21542f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
2155dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21562f7816d4SBarry Smith {
21572f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21582f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2159d2840e09SBarry Smith   const PetscScalar *x,*v;
2160d2840e09SBarry Smith   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21612f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2162dfbe8321SBarry Smith   PetscErrorCode     ierr;
2163d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2164d2840e09SBarry Smith   PetscInt           nonzerorow=0,n,i,jrow,j;
21652f7816d4SBarry Smith 
21662f7816d4SBarry Smith   PetscFunctionBegin;
21673649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21681ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
21692f7816d4SBarry Smith   idx  = a->j;
21702f7816d4SBarry Smith   v    = a->a;
21712f7816d4SBarry Smith   ii   = a->i;
21722f7816d4SBarry Smith 
21732f7816d4SBarry Smith   for (i=0; i<m; i++) {
21742f7816d4SBarry Smith     jrow = ii[i];
21752f7816d4SBarry Smith     n    = ii[i+1] - jrow;
21762f7816d4SBarry Smith     sum1  = 0.0;
21772f7816d4SBarry Smith     sum2  = 0.0;
21782f7816d4SBarry Smith     sum3  = 0.0;
21792f7816d4SBarry Smith     sum4  = 0.0;
21802f7816d4SBarry Smith     sum5  = 0.0;
21812f7816d4SBarry Smith     sum6  = 0.0;
21822f7816d4SBarry Smith     sum7  = 0.0;
21832f7816d4SBarry Smith     sum8  = 0.0;
21842f7816d4SBarry Smith     sum9  = 0.0;
21852f7816d4SBarry Smith     sum10 = 0.0;
21862f7816d4SBarry Smith     sum11 = 0.0;
21872f7816d4SBarry Smith     sum12 = 0.0;
21882f7816d4SBarry Smith     sum13 = 0.0;
21892f7816d4SBarry Smith     sum14 = 0.0;
21902f7816d4SBarry Smith     sum15 = 0.0;
21912f7816d4SBarry Smith     sum16 = 0.0;
2192b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21932f7816d4SBarry Smith     for (j=0; j<n; j++) {
21942f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21952f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21962f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21972f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21982f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21992f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22002f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22012f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22022f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22032f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22042f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22052f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22062f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22072f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22082f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22092f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22102f7816d4SBarry Smith       jrow++;
22112f7816d4SBarry Smith      }
22122f7816d4SBarry Smith     y[16*i]    = sum1;
22132f7816d4SBarry Smith     y[16*i+1]  = sum2;
22142f7816d4SBarry Smith     y[16*i+2]  = sum3;
22152f7816d4SBarry Smith     y[16*i+3]  = sum4;
22162f7816d4SBarry Smith     y[16*i+4]  = sum5;
22172f7816d4SBarry Smith     y[16*i+5]  = sum6;
22182f7816d4SBarry Smith     y[16*i+6]  = sum7;
22192f7816d4SBarry Smith     y[16*i+7]  = sum8;
22202f7816d4SBarry Smith     y[16*i+8]  = sum9;
22212f7816d4SBarry Smith     y[16*i+9]  = sum10;
22222f7816d4SBarry Smith     y[16*i+10] = sum11;
22232f7816d4SBarry Smith     y[16*i+11] = sum12;
22242f7816d4SBarry Smith     y[16*i+12] = sum13;
22252f7816d4SBarry Smith     y[16*i+13] = sum14;
22262f7816d4SBarry Smith     y[16*i+14] = sum15;
22272f7816d4SBarry Smith     y[16*i+15] = sum16;
22282f7816d4SBarry Smith   }
22292f7816d4SBarry Smith 
2230dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
22313649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22321ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22332f7816d4SBarry Smith   PetscFunctionReturn(0);
22342f7816d4SBarry Smith }
22352f7816d4SBarry Smith 
22362f7816d4SBarry Smith #undef __FUNCT__
22372f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2238dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
22392f7816d4SBarry Smith {
22402f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22412f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2242d2840e09SBarry Smith   const PetscScalar *x,*v;
2243d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22442f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2245dfbe8321SBarry Smith   PetscErrorCode    ierr;
2246d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2247d2840e09SBarry Smith   PetscInt          n,i;
22482f7816d4SBarry Smith 
22492f7816d4SBarry Smith   PetscFunctionBegin;
2250d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
22513649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22521ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2253bfec09a0SHong Zhang 
22542f7816d4SBarry Smith   for (i=0; i<m; i++) {
2255bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2256bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
22572f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
22582f7816d4SBarry Smith     alpha1  = x[16*i];
22592f7816d4SBarry Smith     alpha2  = x[16*i+1];
22602f7816d4SBarry Smith     alpha3  = x[16*i+2];
22612f7816d4SBarry Smith     alpha4  = x[16*i+3];
22622f7816d4SBarry Smith     alpha5  = x[16*i+4];
22632f7816d4SBarry Smith     alpha6  = x[16*i+5];
22642f7816d4SBarry Smith     alpha7  = x[16*i+6];
22652f7816d4SBarry Smith     alpha8  = x[16*i+7];
22662f7816d4SBarry Smith     alpha9  = x[16*i+8];
22672f7816d4SBarry Smith     alpha10 = x[16*i+9];
22682f7816d4SBarry Smith     alpha11 = x[16*i+10];
22692f7816d4SBarry Smith     alpha12 = x[16*i+11];
22702f7816d4SBarry Smith     alpha13 = x[16*i+12];
22712f7816d4SBarry Smith     alpha14 = x[16*i+13];
22722f7816d4SBarry Smith     alpha15 = x[16*i+14];
22732f7816d4SBarry Smith     alpha16 = x[16*i+15];
22742f7816d4SBarry Smith     while (n-->0) {
22752f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22762f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22772f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22782f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22792f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22802f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22812f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22822f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22832f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22842f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22852f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22862f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22872f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22882f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22892f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22902f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22912f7816d4SBarry Smith       idx++; v++;
22922f7816d4SBarry Smith     }
22932f7816d4SBarry Smith   }
2294dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22953649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22961ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22972f7816d4SBarry Smith   PetscFunctionReturn(0);
22982f7816d4SBarry Smith }
22992f7816d4SBarry Smith 
23002f7816d4SBarry Smith #undef __FUNCT__
23012f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2302dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23032f7816d4SBarry Smith {
23042f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23052f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2306d2840e09SBarry Smith   const PetscScalar *x,*v;
2307d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
23082f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2309dfbe8321SBarry Smith   PetscErrorCode    ierr;
2310d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2311b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
23122f7816d4SBarry Smith 
23132f7816d4SBarry Smith   PetscFunctionBegin;
23142f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23153649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23161ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23172f7816d4SBarry Smith   idx  = a->j;
23182f7816d4SBarry Smith   v    = a->a;
23192f7816d4SBarry Smith   ii   = a->i;
23202f7816d4SBarry Smith 
23212f7816d4SBarry Smith   for (i=0; i<m; i++) {
23222f7816d4SBarry Smith     jrow = ii[i];
23232f7816d4SBarry Smith     n    = ii[i+1] - jrow;
23242f7816d4SBarry Smith     sum1  = 0.0;
23252f7816d4SBarry Smith     sum2  = 0.0;
23262f7816d4SBarry Smith     sum3  = 0.0;
23272f7816d4SBarry Smith     sum4  = 0.0;
23282f7816d4SBarry Smith     sum5  = 0.0;
23292f7816d4SBarry Smith     sum6  = 0.0;
23302f7816d4SBarry Smith     sum7  = 0.0;
23312f7816d4SBarry Smith     sum8  = 0.0;
23322f7816d4SBarry Smith     sum9  = 0.0;
23332f7816d4SBarry Smith     sum10 = 0.0;
23342f7816d4SBarry Smith     sum11 = 0.0;
23352f7816d4SBarry Smith     sum12 = 0.0;
23362f7816d4SBarry Smith     sum13 = 0.0;
23372f7816d4SBarry Smith     sum14 = 0.0;
23382f7816d4SBarry Smith     sum15 = 0.0;
23392f7816d4SBarry Smith     sum16 = 0.0;
23402f7816d4SBarry Smith     for (j=0; j<n; j++) {
23412f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
23422f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
23432f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
23442f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
23452f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
23462f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
23472f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
23482f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
23492f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
23502f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
23512f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
23522f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
23532f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
23542f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
23552f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
23562f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
23572f7816d4SBarry Smith       jrow++;
23582f7816d4SBarry Smith      }
23592f7816d4SBarry Smith     y[16*i]    += sum1;
23602f7816d4SBarry Smith     y[16*i+1]  += sum2;
23612f7816d4SBarry Smith     y[16*i+2]  += sum3;
23622f7816d4SBarry Smith     y[16*i+3]  += sum4;
23632f7816d4SBarry Smith     y[16*i+4]  += sum5;
23642f7816d4SBarry Smith     y[16*i+5]  += sum6;
23652f7816d4SBarry Smith     y[16*i+6]  += sum7;
23662f7816d4SBarry Smith     y[16*i+7]  += sum8;
23672f7816d4SBarry Smith     y[16*i+8]  += sum9;
23682f7816d4SBarry Smith     y[16*i+9]  += sum10;
23692f7816d4SBarry Smith     y[16*i+10] += sum11;
23702f7816d4SBarry Smith     y[16*i+11] += sum12;
23712f7816d4SBarry Smith     y[16*i+12] += sum13;
23722f7816d4SBarry Smith     y[16*i+13] += sum14;
23732f7816d4SBarry Smith     y[16*i+14] += sum15;
23742f7816d4SBarry Smith     y[16*i+15] += sum16;
23752f7816d4SBarry Smith   }
23762f7816d4SBarry Smith 
2377dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23783649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23802f7816d4SBarry Smith   PetscFunctionReturn(0);
23812f7816d4SBarry Smith }
23822f7816d4SBarry Smith 
23832f7816d4SBarry Smith #undef __FUNCT__
23842f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2385dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23862f7816d4SBarry Smith {
23872f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23882f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2389d2840e09SBarry Smith   const PetscScalar *x,*v;
2390d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23912f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2392dfbe8321SBarry Smith   PetscErrorCode    ierr;
2393d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2394d2840e09SBarry Smith   PetscInt          n,i;
23952f7816d4SBarry Smith 
23962f7816d4SBarry Smith   PetscFunctionBegin;
23972f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23983649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
24002f7816d4SBarry Smith   for (i=0; i<m; i++) {
2401bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2402bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
24032f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
24042f7816d4SBarry Smith     alpha1 = x[16*i];
24052f7816d4SBarry Smith     alpha2 = x[16*i+1];
24062f7816d4SBarry Smith     alpha3 = x[16*i+2];
24072f7816d4SBarry Smith     alpha4 = x[16*i+3];
24082f7816d4SBarry Smith     alpha5 = x[16*i+4];
24092f7816d4SBarry Smith     alpha6 = x[16*i+5];
24102f7816d4SBarry Smith     alpha7 = x[16*i+6];
24112f7816d4SBarry Smith     alpha8 = x[16*i+7];
24122f7816d4SBarry Smith     alpha9  = x[16*i+8];
24132f7816d4SBarry Smith     alpha10 = x[16*i+9];
24142f7816d4SBarry Smith     alpha11 = x[16*i+10];
24152f7816d4SBarry Smith     alpha12 = x[16*i+11];
24162f7816d4SBarry Smith     alpha13 = x[16*i+12];
24172f7816d4SBarry Smith     alpha14 = x[16*i+13];
24182f7816d4SBarry Smith     alpha15 = x[16*i+14];
24192f7816d4SBarry Smith     alpha16 = x[16*i+15];
24202f7816d4SBarry Smith     while (n-->0) {
24212f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
24222f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
24232f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
24242f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
24252f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
24262f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
24272f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
24282f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
24292f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
24302f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
24312f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
24322f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
24332f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
24342f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
24352f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
24362f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
24372f7816d4SBarry Smith       idx++; v++;
24382f7816d4SBarry Smith     }
24392f7816d4SBarry Smith   }
2440dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
24413649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
24421ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
244366ed3db0SBarry Smith   PetscFunctionReturn(0);
244466ed3db0SBarry Smith }
244566ed3db0SBarry Smith 
2446ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2447ed1c418cSBarry Smith #undef __FUNCT__
2448ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2449ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2450ed1c418cSBarry Smith {
2451ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2452ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2453d2840e09SBarry Smith   const PetscScalar *x,*v;
2454d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2455ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2456ed1c418cSBarry Smith   PetscErrorCode    ierr;
2457d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2458d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2459ed1c418cSBarry Smith 
2460ed1c418cSBarry Smith   PetscFunctionBegin;
24613649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2462ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2463ed1c418cSBarry Smith   idx  = a->j;
2464ed1c418cSBarry Smith   v    = a->a;
2465ed1c418cSBarry Smith   ii   = a->i;
2466ed1c418cSBarry Smith 
2467ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2468ed1c418cSBarry Smith     jrow = ii[i];
2469ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2470ed1c418cSBarry Smith     sum1  = 0.0;
2471ed1c418cSBarry Smith     sum2  = 0.0;
2472ed1c418cSBarry Smith     sum3  = 0.0;
2473ed1c418cSBarry Smith     sum4  = 0.0;
2474ed1c418cSBarry Smith     sum5  = 0.0;
2475ed1c418cSBarry Smith     sum6  = 0.0;
2476ed1c418cSBarry Smith     sum7  = 0.0;
2477ed1c418cSBarry Smith     sum8  = 0.0;
2478ed1c418cSBarry Smith     sum9  = 0.0;
2479ed1c418cSBarry Smith     sum10 = 0.0;
2480ed1c418cSBarry Smith     sum11 = 0.0;
2481ed1c418cSBarry Smith     sum12 = 0.0;
2482ed1c418cSBarry Smith     sum13 = 0.0;
2483ed1c418cSBarry Smith     sum14 = 0.0;
2484ed1c418cSBarry Smith     sum15 = 0.0;
2485ed1c418cSBarry Smith     sum16 = 0.0;
2486ed1c418cSBarry Smith     sum17 = 0.0;
2487ed1c418cSBarry Smith     sum18 = 0.0;
2488ed1c418cSBarry Smith     nonzerorow += (n>0);
2489ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2490ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2491ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2492ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2493ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2494ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2495ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2496ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2497ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2498ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2499ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2500ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2501ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2502ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2503ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2504ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2505ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2506ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2507ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2508ed1c418cSBarry Smith       jrow++;
2509ed1c418cSBarry Smith      }
2510ed1c418cSBarry Smith     y[18*i]    = sum1;
2511ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2512ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2513ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2514ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2515ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2516ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2517ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2518ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2519ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2520ed1c418cSBarry Smith     y[18*i+10] = sum11;
2521ed1c418cSBarry Smith     y[18*i+11] = sum12;
2522ed1c418cSBarry Smith     y[18*i+12] = sum13;
2523ed1c418cSBarry Smith     y[18*i+13] = sum14;
2524ed1c418cSBarry Smith     y[18*i+14] = sum15;
2525ed1c418cSBarry Smith     y[18*i+15] = sum16;
2526ed1c418cSBarry Smith     y[18*i+16] = sum17;
2527ed1c418cSBarry Smith     y[18*i+17] = sum18;
2528ed1c418cSBarry Smith   }
2529ed1c418cSBarry Smith 
2530dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
25313649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2532ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2533ed1c418cSBarry Smith   PetscFunctionReturn(0);
2534ed1c418cSBarry Smith }
2535ed1c418cSBarry Smith 
2536ed1c418cSBarry Smith #undef __FUNCT__
2537ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2538ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2539ed1c418cSBarry Smith {
2540ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2541ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2542d2840e09SBarry Smith   const PetscScalar *x,*v;
2543d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2544ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2545ed1c418cSBarry Smith   PetscErrorCode    ierr;
2546d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2547d2840e09SBarry Smith   PetscInt          n,i;
2548ed1c418cSBarry Smith 
2549ed1c418cSBarry Smith   PetscFunctionBegin;
2550d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
25513649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2552ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2553ed1c418cSBarry Smith 
2554ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2555ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2556ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2557ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2558ed1c418cSBarry Smith     alpha1  = x[18*i];
2559ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2560ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2561ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2562ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2563ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2564ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2565ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2566ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2567ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2568ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2569ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2570ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2571ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2572ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2573ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2574ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2575ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2576ed1c418cSBarry Smith     while (n-->0) {
2577ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2578ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2579ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2580ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2581ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2582ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2583ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2584ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2585ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2586ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2587ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2588ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2589ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2590ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2591ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2592ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2593ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2594ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2595ed1c418cSBarry Smith       idx++; v++;
2596ed1c418cSBarry Smith     }
2597ed1c418cSBarry Smith   }
2598dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25993649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2600ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2601ed1c418cSBarry Smith   PetscFunctionReturn(0);
2602ed1c418cSBarry Smith }
2603ed1c418cSBarry Smith 
2604ed1c418cSBarry Smith #undef __FUNCT__
2605ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2606ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607ed1c418cSBarry Smith {
2608ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2609ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2610d2840e09SBarry Smith   const PetscScalar *x,*v;
2611d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2612ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2613ed1c418cSBarry Smith   PetscErrorCode    ierr;
2614d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2615ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2616ed1c418cSBarry Smith 
2617ed1c418cSBarry Smith   PetscFunctionBegin;
2618ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2620ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2621ed1c418cSBarry Smith   idx  = a->j;
2622ed1c418cSBarry Smith   v    = a->a;
2623ed1c418cSBarry Smith   ii   = a->i;
2624ed1c418cSBarry Smith 
2625ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2626ed1c418cSBarry Smith     jrow = ii[i];
2627ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2628ed1c418cSBarry Smith     sum1  = 0.0;
2629ed1c418cSBarry Smith     sum2  = 0.0;
2630ed1c418cSBarry Smith     sum3  = 0.0;
2631ed1c418cSBarry Smith     sum4  = 0.0;
2632ed1c418cSBarry Smith     sum5  = 0.0;
2633ed1c418cSBarry Smith     sum6  = 0.0;
2634ed1c418cSBarry Smith     sum7  = 0.0;
2635ed1c418cSBarry Smith     sum8  = 0.0;
2636ed1c418cSBarry Smith     sum9  = 0.0;
2637ed1c418cSBarry Smith     sum10 = 0.0;
2638ed1c418cSBarry Smith     sum11 = 0.0;
2639ed1c418cSBarry Smith     sum12 = 0.0;
2640ed1c418cSBarry Smith     sum13 = 0.0;
2641ed1c418cSBarry Smith     sum14 = 0.0;
2642ed1c418cSBarry Smith     sum15 = 0.0;
2643ed1c418cSBarry Smith     sum16 = 0.0;
2644ed1c418cSBarry Smith     sum17 = 0.0;
2645ed1c418cSBarry Smith     sum18 = 0.0;
2646ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2647ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2648ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2649ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2650ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2651ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2652ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2653ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2654ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2655ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2656ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2657ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2658ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2659ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2660ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2661ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2662ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2663ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2664ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2665ed1c418cSBarry Smith       jrow++;
2666ed1c418cSBarry Smith      }
2667ed1c418cSBarry Smith     y[18*i]    += sum1;
2668ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2669ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2670ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2671ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2672ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2673ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2674ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2675ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2676ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2677ed1c418cSBarry Smith     y[18*i+10] += sum11;
2678ed1c418cSBarry Smith     y[18*i+11] += sum12;
2679ed1c418cSBarry Smith     y[18*i+12] += sum13;
2680ed1c418cSBarry Smith     y[18*i+13] += sum14;
2681ed1c418cSBarry Smith     y[18*i+14] += sum15;
2682ed1c418cSBarry Smith     y[18*i+15] += sum16;
2683ed1c418cSBarry Smith     y[18*i+16] += sum17;
2684ed1c418cSBarry Smith     y[18*i+17] += sum18;
2685ed1c418cSBarry Smith   }
2686ed1c418cSBarry Smith 
2687dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26883649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2689ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2690ed1c418cSBarry Smith   PetscFunctionReturn(0);
2691ed1c418cSBarry Smith }
2692ed1c418cSBarry Smith 
2693ed1c418cSBarry Smith #undef __FUNCT__
2694ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2695ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2696ed1c418cSBarry Smith {
2697ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2698ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2699d2840e09SBarry Smith   const PetscScalar *x,*v;
2700d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2701ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2702ed1c418cSBarry Smith   PetscErrorCode    ierr;
2703d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2704d2840e09SBarry Smith   PetscInt          n,i;
2705ed1c418cSBarry Smith 
2706ed1c418cSBarry Smith   PetscFunctionBegin;
2707ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27083649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2709ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2710ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2711ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2712ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2713ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2714ed1c418cSBarry Smith     alpha1 = x[18*i];
2715ed1c418cSBarry Smith     alpha2 = x[18*i+1];
2716ed1c418cSBarry Smith     alpha3 = x[18*i+2];
2717ed1c418cSBarry Smith     alpha4 = x[18*i+3];
2718ed1c418cSBarry Smith     alpha5 = x[18*i+4];
2719ed1c418cSBarry Smith     alpha6 = x[18*i+5];
2720ed1c418cSBarry Smith     alpha7 = x[18*i+6];
2721ed1c418cSBarry Smith     alpha8 = x[18*i+7];
2722ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2723ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2724ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2725ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2726ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2727ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2728ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2729ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2730ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2731ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2732ed1c418cSBarry Smith     while (n-->0) {
2733ed1c418cSBarry Smith       y[18*(*idx)]   += alpha1*(*v);
2734ed1c418cSBarry Smith       y[18*(*idx)+1] += alpha2*(*v);
2735ed1c418cSBarry Smith       y[18*(*idx)+2] += alpha3*(*v);
2736ed1c418cSBarry Smith       y[18*(*idx)+3] += alpha4*(*v);
2737ed1c418cSBarry Smith       y[18*(*idx)+4] += alpha5*(*v);
2738ed1c418cSBarry Smith       y[18*(*idx)+5] += alpha6*(*v);
2739ed1c418cSBarry Smith       y[18*(*idx)+6] += alpha7*(*v);
2740ed1c418cSBarry Smith       y[18*(*idx)+7] += alpha8*(*v);
2741ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2742ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2743ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2744ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2745ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2746ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2747ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2748ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2749ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2750ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2751ed1c418cSBarry Smith       idx++; v++;
2752ed1c418cSBarry Smith     }
2753ed1c418cSBarry Smith   }
2754dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
27553649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2756ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2757ed1c418cSBarry Smith   PetscFunctionReturn(0);
2758ed1c418cSBarry Smith }
2759ed1c418cSBarry Smith 
27606861f193SBarry Smith #undef __FUNCT__
27616861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N"
27626861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27636861f193SBarry Smith {
27646861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27656861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27666861f193SBarry Smith   const PetscScalar *x,*v;
27676861f193SBarry Smith   PetscScalar       *y,*sums;
27686861f193SBarry Smith   PetscErrorCode    ierr;
27696861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27706861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27716861f193SBarry Smith 
27726861f193SBarry Smith   PetscFunctionBegin;
27733649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27746861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27756861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27766861f193SBarry Smith   idx  = a->j;
27776861f193SBarry Smith   v    = a->a;
27786861f193SBarry Smith   ii   = a->i;
27796861f193SBarry Smith 
27806861f193SBarry Smith   for (i=0; i<m; i++) {
27816861f193SBarry Smith     jrow = ii[i];
27826861f193SBarry Smith     n    = ii[i+1] - jrow;
27836861f193SBarry Smith     sums = y + dof*i;
27846861f193SBarry Smith     for (j=0; j<n; j++) {
27856861f193SBarry Smith       for (k=0; k<dof; k++) {
27866861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
27876861f193SBarry Smith       }
27886861f193SBarry Smith       jrow++;
27896861f193SBarry Smith     }
27906861f193SBarry Smith   }
27916861f193SBarry Smith 
27926861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27933649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27946861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27956861f193SBarry Smith   PetscFunctionReturn(0);
27966861f193SBarry Smith }
27976861f193SBarry Smith 
27986861f193SBarry Smith #undef __FUNCT__
27996861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
28006861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28016861f193SBarry Smith {
28026861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28036861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28046861f193SBarry Smith   const PetscScalar *x,*v;
28056861f193SBarry Smith   PetscScalar       *y,*sums;
28066861f193SBarry Smith   PetscErrorCode    ierr;
28076861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28086861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
28096861f193SBarry Smith 
28106861f193SBarry Smith   PetscFunctionBegin;
28116861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28123649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28136861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28146861f193SBarry Smith   idx  = a->j;
28156861f193SBarry Smith   v    = a->a;
28166861f193SBarry Smith   ii   = a->i;
28176861f193SBarry Smith 
28186861f193SBarry Smith   for (i=0; i<m; i++) {
28196861f193SBarry Smith     jrow = ii[i];
28206861f193SBarry Smith     n    = ii[i+1] - jrow;
28216861f193SBarry Smith     sums = y + dof*i;
28226861f193SBarry Smith     for (j=0; j<n; j++) {
28236861f193SBarry Smith       for (k=0; k<dof; k++) {
28246861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
28256861f193SBarry Smith       }
28266861f193SBarry Smith       jrow++;
28276861f193SBarry Smith     }
28286861f193SBarry Smith   }
28296861f193SBarry Smith 
28306861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28313649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28326861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28336861f193SBarry Smith   PetscFunctionReturn(0);
28346861f193SBarry Smith }
28356861f193SBarry Smith 
28366861f193SBarry Smith #undef __FUNCT__
28376861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
28386861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
28396861f193SBarry Smith {
28406861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28416861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28426861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28436861f193SBarry Smith   PetscScalar       *y;
28446861f193SBarry Smith   PetscErrorCode    ierr;
28456861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28466861f193SBarry Smith   PetscInt          n,i,k;
28476861f193SBarry Smith 
28486861f193SBarry Smith   PetscFunctionBegin;
28493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28506861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
28516861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
28526861f193SBarry Smith   for (i=0; i<m; i++) {
28536861f193SBarry Smith     idx    = a->j + a->i[i] ;
28546861f193SBarry Smith     v      = a->a + a->i[i] ;
28556861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28566861f193SBarry Smith     alpha  = x + dof*i;
28576861f193SBarry Smith     while (n-->0) {
28586861f193SBarry Smith       for (k=0; k<dof; k++) {
28596861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28606861f193SBarry Smith       }
286183ce7366SBarry Smith       idx++; v++;
28626861f193SBarry Smith     }
28636861f193SBarry Smith   }
28646861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28666861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28676861f193SBarry Smith   PetscFunctionReturn(0);
28686861f193SBarry Smith }
28696861f193SBarry Smith 
28706861f193SBarry Smith #undef __FUNCT__
28716861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
28726861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28736861f193SBarry Smith {
28746861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28756861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28766861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28776861f193SBarry Smith   PetscScalar       *y;
28786861f193SBarry Smith   PetscErrorCode    ierr;
28796861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28806861f193SBarry Smith   PetscInt          n,i,k;
28816861f193SBarry Smith 
28826861f193SBarry Smith   PetscFunctionBegin;
28836861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28856861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28866861f193SBarry Smith   for (i=0; i<m; i++) {
28876861f193SBarry Smith     idx    = a->j + a->i[i] ;
28886861f193SBarry Smith     v      = a->a + a->i[i] ;
28896861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28906861f193SBarry Smith     alpha  = x + dof*i;
28916861f193SBarry Smith     while (n-->0) {
28926861f193SBarry Smith       for (k=0; k<dof; k++) {
28936861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28946861f193SBarry Smith       }
289583ce7366SBarry Smith       idx++; v++;
28966861f193SBarry Smith     }
28976861f193SBarry Smith   }
28986861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28993649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
29006861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
29016861f193SBarry Smith   PetscFunctionReturn(0);
29026861f193SBarry Smith }
29036861f193SBarry Smith 
2904f4a53059SBarry Smith /*===================================================================================*/
29054a2ae208SSatish Balay #undef __FUNCT__
29064a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2907dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2908f4a53059SBarry Smith {
29094cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2910dfbe8321SBarry Smith   PetscErrorCode ierr;
2911f4a53059SBarry Smith 
2912b24ad042SBarry Smith   PetscFunctionBegin;
2913f4a53059SBarry Smith   /* start the scatter */
2914ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29154cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2916ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29174cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2918f4a53059SBarry Smith   PetscFunctionReturn(0);
2919f4a53059SBarry Smith }
2920f4a53059SBarry Smith 
29214a2ae208SSatish Balay #undef __FUNCT__
29224a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2923dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
29244cb9d9b8SBarry Smith {
29254cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2926dfbe8321SBarry Smith   PetscErrorCode ierr;
2927b24ad042SBarry Smith 
29284cb9d9b8SBarry Smith   PetscFunctionBegin;
29294cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
29304cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2931ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2932ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29334cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29344cb9d9b8SBarry Smith }
29354cb9d9b8SBarry Smith 
29364a2ae208SSatish Balay #undef __FUNCT__
29374a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2938dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29394cb9d9b8SBarry Smith {
29404cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2941dfbe8321SBarry Smith   PetscErrorCode ierr;
29424cb9d9b8SBarry Smith 
2943b24ad042SBarry Smith   PetscFunctionBegin;
29444cb9d9b8SBarry Smith   /* start the scatter */
2945ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2946d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2947ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2948717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
29494cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29504cb9d9b8SBarry Smith }
29514cb9d9b8SBarry Smith 
29524a2ae208SSatish Balay #undef __FUNCT__
29534a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2954dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29554cb9d9b8SBarry Smith {
29564cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2957dfbe8321SBarry Smith   PetscErrorCode ierr;
2958b24ad042SBarry Smith 
29594cb9d9b8SBarry Smith   PetscFunctionBegin;
29604cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2961ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2962d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2963ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29644cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29654cb9d9b8SBarry Smith }
29664cb9d9b8SBarry Smith 
296795ddefa2SHong Zhang /* ----------------------------------------------------------------*/
29689c4fc4c7SBarry Smith #undef __FUNCT__
29697ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
29707ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
29717ba1a0bfSKris Buschelman {
29727ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2973a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
29747ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
29757ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
29767ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2977d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29787ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2979d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2980d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29817ba1a0bfSKris Buschelman   MatScalar          *ca;
2982d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29837ba1a0bfSKris Buschelman 
29847ba1a0bfSKris Buschelman   PetscFunctionBegin;
29857ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29867ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29877ba1a0bfSKris Buschelman 
29887ba1a0bfSKris Buschelman   cn = pn*ppdof;
29897ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29907ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
29917ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
29927ba1a0bfSKris Buschelman   ci[0] = 0;
29937ba1a0bfSKris Buschelman 
29947ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
299574ed9c26SBarry Smith   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
2996c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2997c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
29987ba1a0bfSKris Buschelman 
29997ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
30007ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
30017ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3002a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
30037ba1a0bfSKris Buschelman   current_space = free_space;
30047ba1a0bfSKris Buschelman 
30057ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
30067ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
30077ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
30087ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
30097ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
30107ba1a0bfSKris Buschelman       ptanzi = 0;
30117ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
30127ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
30137ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
30147ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
30157ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
30167ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
30177ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
30187ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
30197ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
30207ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
30217ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
30227ba1a0bfSKris Buschelman           }
30237ba1a0bfSKris Buschelman         }
30247ba1a0bfSKris Buschelman       }
30257ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
30267ba1a0bfSKris Buschelman       ptaj = ptasparserow;
30277ba1a0bfSKris Buschelman       cnzi   = 0;
30287ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30297ba1a0bfSKris Buschelman         /* Get offset within block of P */
30307ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
30317ba1a0bfSKris Buschelman         /* Get block row of P */
30327ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
30337ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
30347ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
30357ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
30367ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
30377ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
30387ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30397ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30407ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30417ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30427ba1a0bfSKris Buschelman           }
30437ba1a0bfSKris Buschelman         }
30447ba1a0bfSKris Buschelman       }
30457ba1a0bfSKris Buschelman 
30467ba1a0bfSKris Buschelman       /* sort sparserow */
30477ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30487ba1a0bfSKris Buschelman 
30497ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30507ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30517ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
30524238b7adSHong Zhang         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
30537ba1a0bfSKris Buschelman       }
30547ba1a0bfSKris Buschelman 
30557ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
30567ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
30577ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30587ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30597ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30607ba1a0bfSKris Buschelman 
30617ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30627ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
30637ba1a0bfSKris Buschelman       }
30647ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
30657ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
30667ba1a0bfSKris Buschelman       }
30677ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30687ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30697ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30707ba1a0bfSKris Buschelman     }
30717ba1a0bfSKris Buschelman   }
30727ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30737ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30747ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
30757ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3076a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
307774ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30787ba1a0bfSKris Buschelman 
30797ba1a0bfSKris Buschelman   /* Allocate space for ca */
30807ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30817ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30827ba1a0bfSKris Buschelman 
30837ba1a0bfSKris Buschelman   /* put together the new matrix */
30847adad957SLisandro Dalcin   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
30857ba1a0bfSKris Buschelman 
30867ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30877ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30887ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ *)((*C)->data);
3089e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3090e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30917ba1a0bfSKris Buschelman   c->nonew   = 0;
30922121bac1SHong Zhang   (*C)->ops->ptap = MatPtAP_SeqAIJ_SeqMAIJ;
30937ba1a0bfSKris Buschelman 
30947ba1a0bfSKris Buschelman   /* Clean up. */
30957ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
30967ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
30977ba1a0bfSKris Buschelman }
30987ba1a0bfSKris Buschelman 
30997ba1a0bfSKris Buschelman #undef __FUNCT__
31007ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
31017ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
31027ba1a0bfSKris Buschelman {
31037ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
31047ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
31057ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
31067ba1a0bfSKris Buschelman   Mat             P=pp->AIJ;
31077ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
31087ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
31097ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3110d2840e09SBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3111d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3112d2840e09SBarry Smith   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3113d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3114d2840e09SBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3115d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
31167ba1a0bfSKris Buschelman 
31177ba1a0bfSKris Buschelman   PetscFunctionBegin;
31187ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
311974ed9c26SBarry Smith   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
312074ed9c26SBarry Smith   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
312174ed9c26SBarry Smith   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
312274ed9c26SBarry Smith   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
31237ba1a0bfSKris Buschelman 
31247ba1a0bfSKris Buschelman   /* Clear old values in C */
31257ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
31267ba1a0bfSKris Buschelman 
31277ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
31287ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
31297ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
31307ba1a0bfSKris Buschelman     apnzj = 0;
31317ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
31327ba1a0bfSKris Buschelman       /* Get offset within block of P */
31337ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
31347ba1a0bfSKris Buschelman       /* Get block row of P */
31357ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
31367ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
31377ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
31387ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
31397ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
31407ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
31417ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
31427ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
31437ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31447ba1a0bfSKris Buschelman         }
31457ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31467ba1a0bfSKris Buschelman       }
3147dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31487ba1a0bfSKris Buschelman       aa++;
31497ba1a0bfSKris Buschelman     }
31507ba1a0bfSKris Buschelman 
31517ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31527ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31537ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31547ba1a0bfSKris Buschelman 
31557ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31567ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31577ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31587ba1a0bfSKris Buschelman     poffset = pi[prow];
31597ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
31607ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31617ba1a0bfSKris Buschelman     pJ   = pj+poffset;
31627ba1a0bfSKris Buschelman     pA   = pa+poffset;
31637ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
31647ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
31657ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
31667ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
31677ba1a0bfSKris Buschelman       pJ++;
31687ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31697ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
31707ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
31717ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
31727ba1a0bfSKris Buschelman         }
31737ba1a0bfSKris Buschelman       }
3174dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31757ba1a0bfSKris Buschelman       pA++;
31767ba1a0bfSKris Buschelman     }
31777ba1a0bfSKris Buschelman 
31787ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31797ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
31807ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31817ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31827ba1a0bfSKris Buschelman     }
31837ba1a0bfSKris Buschelman   }
31847ba1a0bfSKris Buschelman 
31857ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31867ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31877ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
318874ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
318995ddefa2SHong Zhang   PetscFunctionReturn(0);
319095ddefa2SHong Zhang }
31917ba1a0bfSKris Buschelman 
31922121bac1SHong Zhang #undef __FUNCT__
31932121bac1SHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ"
319415221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
31952121bac1SHong Zhang {
31962121bac1SHong Zhang   PetscErrorCode ierr;
31972121bac1SHong Zhang 
31982121bac1SHong Zhang   PetscFunctionBegin;
31992121bac1SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
32002121bac1SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
32012121bac1SHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
32022121bac1SHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
32032121bac1SHong Zhang   }
32042121bac1SHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
32052121bac1SHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
32062121bac1SHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
32072121bac1SHong Zhang   PetscFunctionReturn(0);
32082121bac1SHong Zhang }
32092121bac1SHong Zhang 
321095ddefa2SHong Zhang #undef __FUNCT__
321195ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
321215221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
321395ddefa2SHong Zhang {
321495ddefa2SHong Zhang   PetscErrorCode    ierr;
321595ddefa2SHong Zhang 
321695ddefa2SHong Zhang   PetscFunctionBegin;
321795ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
321895ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
321995ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
322095ddefa2SHong Zhang   PetscFunctionReturn(0);
322195ddefa2SHong Zhang }
322295ddefa2SHong Zhang 
322395ddefa2SHong Zhang #undef __FUNCT__
322495ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
322515221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
322695ddefa2SHong Zhang {
322795ddefa2SHong Zhang   PetscFunctionBegin;
3228e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
32297ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
32307ba1a0bfSKris Buschelman }
32317ba1a0bfSKris Buschelman 
32327ba1a0bfSKris Buschelman #undef __FUNCT__
32339e03dfbbSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ"
32349e03dfbbSHong Zhang PETSC_EXTERN_C PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
32359e03dfbbSHong Zhang {
32369e03dfbbSHong Zhang   PetscErrorCode ierr;
32379e03dfbbSHong Zhang 
32389e03dfbbSHong Zhang   PetscFunctionBegin;
32399e03dfbbSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
32409e03dfbbSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
32419e03dfbbSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
32429e03dfbbSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
32439e03dfbbSHong Zhang   }
32449e03dfbbSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
32459e03dfbbSHong Zhang   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
32469e03dfbbSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
32479e03dfbbSHong Zhang   PetscFunctionReturn(0);
32489e03dfbbSHong Zhang }
32499e03dfbbSHong Zhang 
32509e03dfbbSHong Zhang #undef __FUNCT__
32510fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
325215221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32539c4fc4c7SBarry Smith {
32549c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3255ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
32569c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
32579c4fc4c7SBarry Smith   PetscErrorCode    ierr;
32580fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3259ba8c8a56SBarry Smith   PetscInt          *cols;
3260ba8c8a56SBarry Smith   PetscScalar       *vals;
32619c4fc4c7SBarry Smith 
32629c4fc4c7SBarry Smith   PetscFunctionBegin;
32639c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
32647c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
32659c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32669c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
32670fd73130SBarry Smith     for (j=0; j<dof; j++) {
32680fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
32699c4fc4c7SBarry Smith     }
32709c4fc4c7SBarry Smith   }
3271ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
32729c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
32739c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
32749c4fc4c7SBarry Smith   ii   = 0;
32759c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3276ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32770fd73130SBarry Smith     for (j=0; j<dof; j++) {
32789c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
32790fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
32809c4fc4c7SBarry Smith       }
3281ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32829c4fc4c7SBarry Smith       ii++;
32839c4fc4c7SBarry Smith     }
3284ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32859c4fc4c7SBarry Smith   }
32869c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3287ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289ceb03754SKris Buschelman 
3290ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
32918ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3292c3d102feSKris Buschelman   } else {
3293ceb03754SKris Buschelman     *newmat = B;
3294c3d102feSKris Buschelman   }
32959c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32969c4fc4c7SBarry Smith }
32979c4fc4c7SBarry Smith 
3298c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3299be1d678aSKris Buschelman 
33000fd73130SBarry Smith #undef __FUNCT__
33010fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
330215221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
33030fd73130SBarry Smith {
33040fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3305ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
33060fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
33070fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
33080fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
33090fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3310910ba992SMatthew Knepley   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3311910ba992SMatthew Knepley   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
33120fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
33130fd73130SBarry Smith   PetscErrorCode    ierr;
33140fd73130SBarry Smith   PetscScalar       *vals,*ovals;
33150fd73130SBarry Smith 
33160fd73130SBarry Smith   PetscFunctionBegin;
3317d0f46423SBarry Smith   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3318d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33190fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
33200fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
33210fd73130SBarry Smith     for (j=0; j<dof; j++) {
33220fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
33230fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
33240fd73130SBarry Smith     }
33250fd73130SBarry Smith   }
332669b1f4b7SBarry Smith   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
33270fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
33280fd73130SBarry Smith 
33297a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3330d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3331d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33320fd73130SBarry Smith   garray = mpiaij->garray;
33330fd73130SBarry Smith 
33340fd73130SBarry Smith   ii = rstart;
3335d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33360fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33370fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33380fd73130SBarry Smith     for (j=0; j<dof; j++) {
33390fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33400fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33410fd73130SBarry Smith       }
33420fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33430fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33440fd73130SBarry Smith       }
3345ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3346ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33470fd73130SBarry Smith       ii++;
33480fd73130SBarry Smith     }
33490fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33500fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33510fd73130SBarry Smith   }
33520fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33530fd73130SBarry Smith 
3354ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3355ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3356ceb03754SKris Buschelman 
3357ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
33587adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33597adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
33608ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
33617adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3362c3d102feSKris Buschelman   } else {
3363ceb03754SKris Buschelman     *newmat = B;
3364c3d102feSKris Buschelman   }
33650fd73130SBarry Smith   PetscFunctionReturn(0);
33660fd73130SBarry Smith }
33670fd73130SBarry Smith 
33689e516c8fSBarry Smith #undef __FUNCT__
33699e516c8fSBarry Smith #define __FUNCT__ "MatGetSubMatrix_MAIJ"
33709e516c8fSBarry Smith PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
33719e516c8fSBarry Smith {
33729e516c8fSBarry Smith   PetscErrorCode ierr;
33739e516c8fSBarry Smith   Mat            A;
33749e516c8fSBarry Smith 
33759e516c8fSBarry Smith   PetscFunctionBegin;
33769e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
33779e516c8fSBarry Smith   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
33789e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
33799e516c8fSBarry Smith   PetscFunctionReturn(0);
33809e516c8fSBarry Smith }
33810fd73130SBarry Smith 
3382bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3383ff585edeSJed Brown #undef __FUNCT__
3384ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ"
3385ff585edeSJed Brown /*@C
33860bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33870bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33880bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33890bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33900bad9183SKris Buschelman 
3391ff585edeSJed Brown   Collective
3392ff585edeSJed Brown 
3393ff585edeSJed Brown   Input Parameters:
3394ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3395ff585edeSJed Brown - dof - the block size (number of components per node)
3396ff585edeSJed Brown 
3397ff585edeSJed Brown   Output Parameter:
3398ff585edeSJed Brown . maij - the new MAIJ matrix
3399ff585edeSJed Brown 
34000bad9183SKris Buschelman   Operations provided:
34010fd73130SBarry Smith + MatMult
34020bad9183SKris Buschelman . MatMultTranspose
34030bad9183SKris Buschelman . MatMultAdd
34040bad9183SKris Buschelman . MatMultTransposeAdd
34050fd73130SBarry Smith - MatView
34060bad9183SKris Buschelman 
34070bad9183SKris Buschelman   Level: advanced
34080bad9183SKris Buschelman 
3409b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3410ff585edeSJed Brown @*/
34117087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
341282b1193eSBarry Smith {
3413dfbe8321SBarry Smith   PetscErrorCode ierr;
3414b24ad042SBarry Smith   PetscMPIInt    size;
3415b24ad042SBarry Smith   PetscInt       n;
341682b1193eSBarry Smith   Mat            B;
341782b1193eSBarry Smith 
341882b1193eSBarry Smith   PetscFunctionBegin;
3419d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3420d72c5c08SBarry Smith 
3421d72c5c08SBarry Smith   if (dof == 1) {
3422d72c5c08SBarry Smith     *maij = A;
3423d72c5c08SBarry Smith   } else {
34247adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3425d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3426bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3427bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3428bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3429bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3430cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
3431d72c5c08SBarry Smith 
34327adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3433f4a53059SBarry Smith     if (size == 1) {
3434feb9fe23SJed Brown       Mat_SeqMAIJ    *b;
3435feb9fe23SJed Brown 
3436b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3437356c569eSBarry Smith       B->ops->setup   = PETSC_NULL;
34384cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
34390fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3440feb9fe23SJed Brown       b      = (Mat_SeqMAIJ*)B->data;
3441b9b97703SBarry Smith       b->dof = dof;
34424cb9d9b8SBarry Smith       b->AIJ = A;
3443d72c5c08SBarry Smith       if (dof == 2) {
3444cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3445cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3446cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3447cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3448bcc973b7SBarry Smith       } else if (dof == 3) {
3449bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3450bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3451bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3452bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3453bcc973b7SBarry Smith       } else if (dof == 4) {
3454bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3455bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3456bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3457bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3458f9fae5adSBarry Smith       } else if (dof == 5) {
3459f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3460f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3461f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3462f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34636cd98798SBarry Smith       } else if (dof == 6) {
34646cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34656cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34666cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34676cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3468ed8eea03SBarry Smith       } else if (dof == 7) {
3469ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3470ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3471ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3472ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
347366ed3db0SBarry Smith       } else if (dof == 8) {
347466ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
347566ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
347666ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
347766ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34782b692628SMatthew Knepley       } else if (dof == 9) {
34792b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34802b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34812b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34822b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3483b9cda22cSBarry Smith       } else if (dof == 10) {
3484b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3485b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3486b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3487b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3488dbdd7285SBarry Smith       } else if (dof == 11) {
3489dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3490dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3491dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3492dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34932f7816d4SBarry Smith       } else if (dof == 16) {
34942f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34952f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34962f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34972f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3498ed1c418cSBarry Smith       } else if (dof == 18) {
3499ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3500ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3501ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3502ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
350382b1193eSBarry Smith       } else {
35046861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
35056861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
35066861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
35076861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
350882b1193eSBarry Smith       }
35099c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
35102121bac1SHong Zhang       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_seqaij_seqmaij_C","MatPtAP_SeqAIJ_SeqMAIJ",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3511f4a53059SBarry Smith     } else {
3512f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3513feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3514f4a53059SBarry Smith       IS          from,to;
3515f4a53059SBarry Smith       Vec         gvec;
3516f4a53059SBarry Smith 
3517b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3518356c569eSBarry Smith       B->ops->setup   = PETSC_NULL;
3519d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
35200fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
3521b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3522b9b97703SBarry Smith       b->dof = dof;
3523b9b97703SBarry Smith       b->A   = A;
35244cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
35254cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
35264cb9d9b8SBarry Smith 
3527f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3528a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3529a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
353013288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3531a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3532f4a53059SBarry Smith 
3533f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3534deff0451SBarry Smith       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3535f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3536f4a53059SBarry Smith 
3537f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3538778a2246SBarry Smith       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3539f4a53059SBarry Smith 
3540f4a53059SBarry Smith       /* generate the scatter context */
3541f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3542f4a53059SBarry Smith 
35436bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
35446bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
35456bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3546f4a53059SBarry Smith 
3547f4a53059SBarry Smith       B->ops->mult                = MatMult_MPIMAIJ_dof;
35484cb9d9b8SBarry Smith       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
35494cb9d9b8SBarry Smith       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
35504cb9d9b8SBarry Smith       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
35510fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
35529e03dfbbSHong Zhang       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C","MatPtAP_MPIAIJ_MPIMAIJ",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3553f4a53059SBarry Smith     }
35549e516c8fSBarry Smith     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
35554994cf47SJed Brown     ierr = MatSetUp(B);CHKERRQ(ierr);
3556cd3bbe55SBarry Smith     *maij = B;
3557*0d2bece7SBarry Smith     ierr = MatViewFromOptions(B,"-mat_view");CHKERRQ(ierr);
3558d72c5c08SBarry Smith   }
355982b1193eSBarry Smith   PetscFunctionReturn(0);
356082b1193eSBarry Smith }
3561