xref: /petsc/src/mat/impls/maij/maij.c (revision 15221df2c238ff242abe25b8bb1de3a593c7f7d5)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
21b45d2f2cSJed Brown #include <petsc-private/vecimpl.h>
2282b1193eSBarry Smith 
234a2ae208SSatish Balay #undef __FUNCT__
244a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
25ff585edeSJed Brown /*@C
26ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27ff585edeSJed Brown 
28ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29ff585edeSJed Brown 
30ff585edeSJed Brown    Input Parameter:
31ff585edeSJed Brown .  A - the MAIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Output Parameter:
34ff585edeSJed Brown .  B - the AIJ matrix
35ff585edeSJed Brown 
36ff585edeSJed Brown    Level: advanced
37ff585edeSJed Brown 
38ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39ff585edeSJed Brown 
40ff585edeSJed Brown .seealso: MatCreateMAIJ()
41ff585edeSJed Brown @*/
427087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43b9b97703SBarry Smith {
44dfbe8321SBarry Smith   PetscErrorCode ierr;
45ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
46b9b97703SBarry Smith 
47b9b97703SBarry Smith   PetscFunctionBegin;
48251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50b9b97703SBarry Smith   if (ismpimaij) {
51b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith     *B = b->A;
54b9b97703SBarry Smith   } else if (isseqmaij) {
55b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56b9b97703SBarry Smith 
57b9b97703SBarry Smith     *B = b->AIJ;
58b9b97703SBarry Smith   } else {
59b9b97703SBarry Smith     *B = A;
60b9b97703SBarry Smith   }
61b9b97703SBarry Smith   PetscFunctionReturn(0);
62b9b97703SBarry Smith }
63b9b97703SBarry Smith 
644a2ae208SSatish Balay #undef __FUNCT__
654a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
66ff585edeSJed Brown /*@C
67ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68ff585edeSJed Brown 
693f9fe445SBarry Smith    Logically Collective
70ff585edeSJed Brown 
71ff585edeSJed Brown    Input Parameter:
72ff585edeSJed Brown +  A - the MAIJ matrix
73ff585edeSJed Brown -  dof - the block size for the new matrix
74ff585edeSJed Brown 
75ff585edeSJed Brown    Output Parameter:
76ff585edeSJed Brown .  B - the new MAIJ matrix
77ff585edeSJed Brown 
78ff585edeSJed Brown    Level: advanced
79ff585edeSJed Brown 
80ff585edeSJed Brown .seealso: MatCreateMAIJ()
81ff585edeSJed Brown @*/
827087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83b9b97703SBarry Smith {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
853b98c0a2SBarry Smith   Mat            Aij = PETSC_NULL;
86b9b97703SBarry Smith 
87b9b97703SBarry Smith   PetscFunctionBegin;
88c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
89b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91b9b97703SBarry Smith   PetscFunctionReturn(0);
92b9b97703SBarry Smith }
93b9b97703SBarry Smith 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9782b1193eSBarry Smith {
98dfbe8321SBarry Smith   PetscErrorCode ierr;
994cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
10082b1193eSBarry Smith 
10182b1193eSBarry Smith   PetscFunctionBegin;
1026bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1042121bac1SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqmaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1052121bac1SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_seqaij_seqmaij_C","",PETSC_NULL);CHKERRQ(ierr);
1064cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1074cb9d9b8SBarry Smith }
1084cb9d9b8SBarry Smith 
1094a2ae208SSatish Balay #undef __FUNCT__
110356c569eSBarry Smith #define __FUNCT__ "MatSetUp_MAIJ"
111356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
112356c569eSBarry Smith {
113356c569eSBarry Smith   PetscFunctionBegin;
114356c569eSBarry Smith   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115356c569eSBarry Smith   PetscFunctionReturn(0);
116356c569eSBarry Smith }
117356c569eSBarry Smith 
118356c569eSBarry Smith #undef __FUNCT__
1190fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
1200fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1210fd73130SBarry Smith {
1220fd73130SBarry Smith   PetscErrorCode ierr;
1230fd73130SBarry Smith   Mat            B;
1240fd73130SBarry Smith 
1250fd73130SBarry Smith   PetscFunctionBegin;
126ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1270fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1286bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1290fd73130SBarry Smith   PetscFunctionReturn(0);
1300fd73130SBarry Smith }
1310fd73130SBarry Smith 
1320fd73130SBarry Smith #undef __FUNCT__
1330fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
1340fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1350fd73130SBarry Smith {
1360fd73130SBarry Smith   PetscErrorCode ierr;
1370fd73130SBarry Smith   Mat            B;
1380fd73130SBarry Smith 
1390fd73130SBarry Smith   PetscFunctionBegin;
140ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
1410fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1426bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1430fd73130SBarry Smith   PetscFunctionReturn(0);
1440fd73130SBarry Smith }
1450fd73130SBarry Smith 
1460fd73130SBarry Smith #undef __FUNCT__
1474a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
148dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1494cb9d9b8SBarry Smith {
150dfbe8321SBarry Smith   PetscErrorCode ierr;
1514cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1524cb9d9b8SBarry Smith 
1534cb9d9b8SBarry Smith   PetscFunctionBegin;
1546bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1556bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1566bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1576bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1586bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
159bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
160dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
161b9b97703SBarry Smith   PetscFunctionReturn(0);
162b9b97703SBarry Smith }
163b9b97703SBarry Smith 
1640bad9183SKris Buschelman /*MC
165fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1660bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1670bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1680bad9183SKris Buschelman 
1690bad9183SKris Buschelman   Operations provided:
1700bad9183SKris Buschelman . MatMult
1710bad9183SKris Buschelman . MatMultTranspose
1720bad9183SKris Buschelman . MatMultAdd
1730bad9183SKris Buschelman . MatMultTransposeAdd
1740bad9183SKris Buschelman 
1750bad9183SKris Buschelman   Level: advanced
1760bad9183SKris Buschelman 
177b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1780bad9183SKris Buschelman M*/
1790bad9183SKris Buschelman 
1804a2ae208SSatish Balay #undef __FUNCT__
1814a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
182*15221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatCreate_MAIJ(Mat A)
18382b1193eSBarry Smith {
184dfbe8321SBarry Smith   PetscErrorCode ierr;
1854cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
18664b52464SHong Zhang   PetscMPIInt    size;
18782b1193eSBarry Smith 
18882b1193eSBarry Smith   PetscFunctionBegin;
18938f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
190b0a32e0cSBarry Smith   A->data  = (void*)b;
191cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
192356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
193f4a53059SBarry Smith 
194cd3bbe55SBarry Smith   b->AIJ  = 0;
195cd3bbe55SBarry Smith   b->dof  = 0;
196f4a53059SBarry Smith   b->OAIJ = 0;
197f4a53059SBarry Smith   b->ctx  = 0;
198f4a53059SBarry Smith   b->w    = 0;
1997adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
20064b52464SHong Zhang   if (size == 1){
20164b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
20264b52464SHong Zhang   } else {
20364b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
20464b52464SHong Zhang   }
20582b1193eSBarry Smith   PetscFunctionReturn(0);
20682b1193eSBarry Smith }
20782b1193eSBarry Smith 
208cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2094a2ae208SSatish Balay #undef __FUNCT__
210b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
211dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21282b1193eSBarry Smith {
2134cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
214bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
215d2840e09SBarry Smith   const PetscScalar *x,*v;
216d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
217dfbe8321SBarry Smith   PetscErrorCode    ierr;
218d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
219d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
22082b1193eSBarry Smith 
221bcc973b7SBarry Smith   PetscFunctionBegin;
2223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2231ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
224bcc973b7SBarry Smith   idx  = a->j;
225bcc973b7SBarry Smith   v    = a->a;
226bcc973b7SBarry Smith   ii   = a->i;
227bcc973b7SBarry Smith 
228bcc973b7SBarry Smith   for (i=0; i<m; i++) {
229bcc973b7SBarry Smith     jrow = ii[i];
230bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
231bcc973b7SBarry Smith     sum1  = 0.0;
232bcc973b7SBarry Smith     sum2  = 0.0;
233b3c51c66SMatthew Knepley     nonzerorow += (n>0);
234bcc973b7SBarry Smith     for (j=0; j<n; j++) {
235bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
236bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
237bcc973b7SBarry Smith       jrow++;
238bcc973b7SBarry Smith      }
239bcc973b7SBarry Smith     y[2*i]   = sum1;
240bcc973b7SBarry Smith     y[2*i+1] = sum2;
241bcc973b7SBarry Smith   }
242bcc973b7SBarry Smith 
243dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2451ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
24682b1193eSBarry Smith   PetscFunctionReturn(0);
24782b1193eSBarry Smith }
248bcc973b7SBarry Smith 
2494a2ae208SSatish Balay #undef __FUNCT__
250b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
251dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
25282b1193eSBarry Smith {
2534cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
254bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
255d2840e09SBarry Smith   const PetscScalar *x,*v;
256d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
257dfbe8321SBarry Smith   PetscErrorCode    ierr;
258d2840e09SBarry Smith   PetscInt          n,i;
259d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
26082b1193eSBarry Smith 
261bcc973b7SBarry Smith   PetscFunctionBegin;
262d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2641ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
265bfec09a0SHong Zhang 
266bcc973b7SBarry Smith   for (i=0; i<m; i++) {
267bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
268bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
269bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
270bcc973b7SBarry Smith     alpha1 = x[2*i];
271bcc973b7SBarry Smith     alpha2 = x[2*i+1];
272bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
273bcc973b7SBarry Smith   }
274dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2753649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2761ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27782b1193eSBarry Smith   PetscFunctionReturn(0);
27882b1193eSBarry Smith }
279bcc973b7SBarry Smith 
2804a2ae208SSatish Balay #undef __FUNCT__
281b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
282dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
28382b1193eSBarry Smith {
2844cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
285bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
286d2840e09SBarry Smith   const PetscScalar *x,*v;
287d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
288dfbe8321SBarry Smith   PetscErrorCode    ierr;
289b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
290d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
29182b1193eSBarry Smith 
292bcc973b7SBarry Smith   PetscFunctionBegin;
293f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2943649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2951ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
296bcc973b7SBarry Smith   idx  = a->j;
297bcc973b7SBarry Smith   v    = a->a;
298bcc973b7SBarry Smith   ii   = a->i;
299bcc973b7SBarry Smith 
300bcc973b7SBarry Smith   for (i=0; i<m; i++) {
301bcc973b7SBarry Smith     jrow = ii[i];
302bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
303bcc973b7SBarry Smith     sum1  = 0.0;
304bcc973b7SBarry Smith     sum2  = 0.0;
305bcc973b7SBarry Smith     for (j=0; j<n; j++) {
306bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
307bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
308bcc973b7SBarry Smith       jrow++;
309bcc973b7SBarry Smith      }
310bcc973b7SBarry Smith     y[2*i]   += sum1;
311bcc973b7SBarry Smith     y[2*i+1] += sum2;
312bcc973b7SBarry Smith   }
313bcc973b7SBarry Smith 
314dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3161ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
317bcc973b7SBarry Smith   PetscFunctionReturn(0);
31882b1193eSBarry Smith }
3194a2ae208SSatish Balay #undef __FUNCT__
320b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
321dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
32282b1193eSBarry Smith {
3234cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
324bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
325d2840e09SBarry Smith   const PetscScalar *x,*v;
326d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
327dfbe8321SBarry Smith   PetscErrorCode    ierr;
328d2840e09SBarry Smith   PetscInt          n,i;
329d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
33082b1193eSBarry Smith 
331bcc973b7SBarry Smith   PetscFunctionBegin;
332f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3333649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
335bfec09a0SHong Zhang 
336bcc973b7SBarry Smith   for (i=0; i<m; i++) {
337bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
338bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
339bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
340bcc973b7SBarry Smith     alpha1 = x[2*i];
341bcc973b7SBarry Smith     alpha2 = x[2*i+1];
342bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
343bcc973b7SBarry Smith   }
344dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3453649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3461ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
347bcc973b7SBarry Smith   PetscFunctionReturn(0);
34882b1193eSBarry Smith }
349cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3504a2ae208SSatish Balay #undef __FUNCT__
351b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
352dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
353bcc973b7SBarry Smith {
3544cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
355bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
356d2840e09SBarry Smith   const PetscScalar *x,*v;
357d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
358dfbe8321SBarry Smith   PetscErrorCode    ierr;
359d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
360d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
36182b1193eSBarry Smith 
362bcc973b7SBarry Smith   PetscFunctionBegin;
3633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3641ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
365bcc973b7SBarry Smith   idx  = a->j;
366bcc973b7SBarry Smith   v    = a->a;
367bcc973b7SBarry Smith   ii   = a->i;
368bcc973b7SBarry Smith 
369bcc973b7SBarry Smith   for (i=0; i<m; i++) {
370bcc973b7SBarry Smith     jrow = ii[i];
371bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
372bcc973b7SBarry Smith     sum1  = 0.0;
373bcc973b7SBarry Smith     sum2  = 0.0;
374bcc973b7SBarry Smith     sum3  = 0.0;
375b3c51c66SMatthew Knepley     nonzerorow += (n>0);
376bcc973b7SBarry Smith     for (j=0; j<n; j++) {
377bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
378bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
379bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
380bcc973b7SBarry Smith       jrow++;
381bcc973b7SBarry Smith      }
382bcc973b7SBarry Smith     y[3*i]   = sum1;
383bcc973b7SBarry Smith     y[3*i+1] = sum2;
384bcc973b7SBarry Smith     y[3*i+2] = sum3;
385bcc973b7SBarry Smith   }
386bcc973b7SBarry Smith 
387dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3883649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3891ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
390bcc973b7SBarry Smith   PetscFunctionReturn(0);
391bcc973b7SBarry Smith }
392bcc973b7SBarry Smith 
3934a2ae208SSatish Balay #undef __FUNCT__
394b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
395dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
396bcc973b7SBarry Smith {
3974cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
398bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
399d2840e09SBarry Smith   const PetscScalar *x,*v;
400d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
401dfbe8321SBarry Smith   PetscErrorCode    ierr;
402d2840e09SBarry Smith   PetscInt          n,i;
403d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
404bcc973b7SBarry Smith 
405bcc973b7SBarry Smith   PetscFunctionBegin;
406d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
4073649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4081ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
409bfec09a0SHong Zhang 
410bcc973b7SBarry Smith   for (i=0; i<m; i++) {
411bfec09a0SHong Zhang     idx    = a->j + a->i[i];
412bfec09a0SHong Zhang     v      = a->a + a->i[i];
413bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
414bcc973b7SBarry Smith     alpha1 = x[3*i];
415bcc973b7SBarry Smith     alpha2 = x[3*i+1];
416bcc973b7SBarry Smith     alpha3 = x[3*i+2];
417bcc973b7SBarry Smith     while (n-->0) {
418bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
419bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
420bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
421bcc973b7SBarry Smith       idx++; v++;
422bcc973b7SBarry Smith     }
423bcc973b7SBarry Smith   }
424dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4253649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4261ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
427bcc973b7SBarry Smith   PetscFunctionReturn(0);
428bcc973b7SBarry Smith }
429bcc973b7SBarry Smith 
4304a2ae208SSatish Balay #undef __FUNCT__
431b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
432dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433bcc973b7SBarry Smith {
4344cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
435bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
436d2840e09SBarry Smith   const PetscScalar *x,*v;
437d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
438dfbe8321SBarry Smith   PetscErrorCode    ierr;
439b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
440d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
441bcc973b7SBarry Smith 
442bcc973b7SBarry Smith   PetscFunctionBegin;
443f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4443649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4451ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
446bcc973b7SBarry Smith   idx  = a->j;
447bcc973b7SBarry Smith   v    = a->a;
448bcc973b7SBarry Smith   ii   = a->i;
449bcc973b7SBarry Smith 
450bcc973b7SBarry Smith   for (i=0; i<m; i++) {
451bcc973b7SBarry Smith     jrow = ii[i];
452bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
453bcc973b7SBarry Smith     sum1  = 0.0;
454bcc973b7SBarry Smith     sum2  = 0.0;
455bcc973b7SBarry Smith     sum3  = 0.0;
456bcc973b7SBarry Smith     for (j=0; j<n; j++) {
457bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
458bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
459bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
460bcc973b7SBarry Smith       jrow++;
461bcc973b7SBarry Smith      }
462bcc973b7SBarry Smith     y[3*i]   += sum1;
463bcc973b7SBarry Smith     y[3*i+1] += sum2;
464bcc973b7SBarry Smith     y[3*i+2] += sum3;
465bcc973b7SBarry Smith   }
466bcc973b7SBarry Smith 
467dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4691ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
470bcc973b7SBarry Smith   PetscFunctionReturn(0);
471bcc973b7SBarry Smith }
4724a2ae208SSatish Balay #undef __FUNCT__
473b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
474dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
475bcc973b7SBarry Smith {
4764cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
477bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
478d2840e09SBarry Smith   const PetscScalar *x,*v;
479d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
480dfbe8321SBarry Smith   PetscErrorCode    ierr;
481d2840e09SBarry Smith   PetscInt          n,i;
482d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
483bcc973b7SBarry Smith 
484bcc973b7SBarry Smith   PetscFunctionBegin;
485f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
488bcc973b7SBarry Smith   for (i=0; i<m; i++) {
489bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
490bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
491bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
492bcc973b7SBarry Smith     alpha1 = x[3*i];
493bcc973b7SBarry Smith     alpha2 = x[3*i+1];
494bcc973b7SBarry Smith     alpha3 = x[3*i+2];
495bcc973b7SBarry Smith     while (n-->0) {
496bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
497bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
498bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
499bcc973b7SBarry Smith       idx++; v++;
500bcc973b7SBarry Smith     }
501bcc973b7SBarry Smith   }
502dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
5033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5041ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
505bcc973b7SBarry Smith   PetscFunctionReturn(0);
506bcc973b7SBarry Smith }
507bcc973b7SBarry Smith 
508bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
5094a2ae208SSatish Balay #undef __FUNCT__
510b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
511dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512bcc973b7SBarry Smith {
5134cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
514bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
515d2840e09SBarry Smith   const PetscScalar *x,*v;
516d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
517dfbe8321SBarry Smith   PetscErrorCode    ierr;
518d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
519d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
520bcc973b7SBarry Smith 
521bcc973b7SBarry Smith   PetscFunctionBegin;
5223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5231ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
524bcc973b7SBarry Smith   idx  = a->j;
525bcc973b7SBarry Smith   v    = a->a;
526bcc973b7SBarry Smith   ii   = a->i;
527bcc973b7SBarry Smith 
528bcc973b7SBarry Smith   for (i=0; i<m; i++) {
529bcc973b7SBarry Smith     jrow = ii[i];
530bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
531bcc973b7SBarry Smith     sum1  = 0.0;
532bcc973b7SBarry Smith     sum2  = 0.0;
533bcc973b7SBarry Smith     sum3  = 0.0;
534bcc973b7SBarry Smith     sum4  = 0.0;
535b3c51c66SMatthew Knepley     nonzerorow += (n>0);
536bcc973b7SBarry Smith     for (j=0; j<n; j++) {
537bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
538bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
539bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
540bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
541bcc973b7SBarry Smith       jrow++;
542bcc973b7SBarry Smith      }
543bcc973b7SBarry Smith     y[4*i]   = sum1;
544bcc973b7SBarry Smith     y[4*i+1] = sum2;
545bcc973b7SBarry Smith     y[4*i+2] = sum3;
546bcc973b7SBarry Smith     y[4*i+3] = sum4;
547bcc973b7SBarry Smith   }
548bcc973b7SBarry Smith 
549dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5503649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5511ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
552bcc973b7SBarry Smith   PetscFunctionReturn(0);
553bcc973b7SBarry Smith }
554bcc973b7SBarry Smith 
5554a2ae208SSatish Balay #undef __FUNCT__
556b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
557dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
558bcc973b7SBarry Smith {
5594cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
560bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
561d2840e09SBarry Smith   const PetscScalar *x,*v;
562d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
563dfbe8321SBarry Smith   PetscErrorCode    ierr;
564d2840e09SBarry Smith   PetscInt          n,i;
565d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
566bcc973b7SBarry Smith 
567bcc973b7SBarry Smith   PetscFunctionBegin;
568d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5693649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5701ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
571bcc973b7SBarry Smith   for (i=0; i<m; i++) {
572bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
573bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
574bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
575bcc973b7SBarry Smith     alpha1 = x[4*i];
576bcc973b7SBarry Smith     alpha2 = x[4*i+1];
577bcc973b7SBarry Smith     alpha3 = x[4*i+2];
578bcc973b7SBarry Smith     alpha4 = x[4*i+3];
579bcc973b7SBarry Smith     while (n-->0) {
580bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
581bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
582bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
583bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
584bcc973b7SBarry Smith       idx++; v++;
585bcc973b7SBarry Smith     }
586bcc973b7SBarry Smith   }
587dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5883649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5891ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
590bcc973b7SBarry Smith   PetscFunctionReturn(0);
591bcc973b7SBarry Smith }
592bcc973b7SBarry Smith 
5934a2ae208SSatish Balay #undef __FUNCT__
594b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
595dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596bcc973b7SBarry Smith {
5974cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
598f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
599d2840e09SBarry Smith   const PetscScalar *x,*v;
600d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
601dfbe8321SBarry Smith   PetscErrorCode    ierr;
602b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
603d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
604f9fae5adSBarry Smith 
605f9fae5adSBarry Smith   PetscFunctionBegin;
606f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6073649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6081ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
609f9fae5adSBarry Smith   idx  = a->j;
610f9fae5adSBarry Smith   v    = a->a;
611f9fae5adSBarry Smith   ii   = a->i;
612f9fae5adSBarry Smith 
613f9fae5adSBarry Smith   for (i=0; i<m; i++) {
614f9fae5adSBarry Smith     jrow = ii[i];
615f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
616f9fae5adSBarry Smith     sum1  = 0.0;
617f9fae5adSBarry Smith     sum2  = 0.0;
618f9fae5adSBarry Smith     sum3  = 0.0;
619f9fae5adSBarry Smith     sum4  = 0.0;
620f9fae5adSBarry Smith     for (j=0; j<n; j++) {
621f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
622f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
623f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
624f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
625f9fae5adSBarry Smith       jrow++;
626f9fae5adSBarry Smith      }
627f9fae5adSBarry Smith     y[4*i]   += sum1;
628f9fae5adSBarry Smith     y[4*i+1] += sum2;
629f9fae5adSBarry Smith     y[4*i+2] += sum3;
630f9fae5adSBarry Smith     y[4*i+3] += sum4;
631f9fae5adSBarry Smith   }
632f9fae5adSBarry Smith 
633dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6351ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
636f9fae5adSBarry Smith   PetscFunctionReturn(0);
637bcc973b7SBarry Smith }
6384a2ae208SSatish Balay #undef __FUNCT__
639b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
640dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
641bcc973b7SBarry Smith {
6424cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
643f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
644d2840e09SBarry Smith   const PetscScalar *x,*v;
645d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
646dfbe8321SBarry Smith   PetscErrorCode    ierr;
647d2840e09SBarry Smith   PetscInt          n,i;
648d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
649f9fae5adSBarry Smith 
650f9fae5adSBarry Smith   PetscFunctionBegin;
651f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6523649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6531ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
654bfec09a0SHong Zhang 
655f9fae5adSBarry Smith   for (i=0; i<m; i++) {
656bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
657bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
658f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
659f9fae5adSBarry Smith     alpha1 = x[4*i];
660f9fae5adSBarry Smith     alpha2 = x[4*i+1];
661f9fae5adSBarry Smith     alpha3 = x[4*i+2];
662f9fae5adSBarry Smith     alpha4 = x[4*i+3];
663f9fae5adSBarry Smith     while (n-->0) {
664f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
665f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
666f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
667f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
668f9fae5adSBarry Smith       idx++; v++;
669f9fae5adSBarry Smith     }
670f9fae5adSBarry Smith   }
671dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6731ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
674f9fae5adSBarry Smith   PetscFunctionReturn(0);
675f9fae5adSBarry Smith }
676f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6776cd98798SBarry Smith 
6784a2ae208SSatish Balay #undef __FUNCT__
679b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
680dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681f9fae5adSBarry Smith {
6824cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
683f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
684d2840e09SBarry Smith   const PetscScalar *x,*v;
685d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
686dfbe8321SBarry Smith   PetscErrorCode    ierr;
687d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
688d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
689f9fae5adSBarry Smith 
690f9fae5adSBarry Smith   PetscFunctionBegin;
6913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6921ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
693f9fae5adSBarry Smith   idx  = a->j;
694f9fae5adSBarry Smith   v    = a->a;
695f9fae5adSBarry Smith   ii   = a->i;
696f9fae5adSBarry Smith 
697f9fae5adSBarry Smith   for (i=0; i<m; i++) {
698f9fae5adSBarry Smith     jrow = ii[i];
699f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
700f9fae5adSBarry Smith     sum1  = 0.0;
701f9fae5adSBarry Smith     sum2  = 0.0;
702f9fae5adSBarry Smith     sum3  = 0.0;
703f9fae5adSBarry Smith     sum4  = 0.0;
704f9fae5adSBarry Smith     sum5  = 0.0;
705b3c51c66SMatthew Knepley     nonzerorow += (n>0);
706f9fae5adSBarry Smith     for (j=0; j<n; j++) {
707f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
708f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
709f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
710f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
711f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
712f9fae5adSBarry Smith       jrow++;
713f9fae5adSBarry Smith      }
714f9fae5adSBarry Smith     y[5*i]   = sum1;
715f9fae5adSBarry Smith     y[5*i+1] = sum2;
716f9fae5adSBarry Smith     y[5*i+2] = sum3;
717f9fae5adSBarry Smith     y[5*i+3] = sum4;
718f9fae5adSBarry Smith     y[5*i+4] = sum5;
719f9fae5adSBarry Smith   }
720f9fae5adSBarry Smith 
721dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
7223649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7231ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
724f9fae5adSBarry Smith   PetscFunctionReturn(0);
725f9fae5adSBarry Smith }
726f9fae5adSBarry Smith 
7274a2ae208SSatish Balay #undef __FUNCT__
728b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
729dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
730f9fae5adSBarry Smith {
7314cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
732f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
733d2840e09SBarry Smith   const PetscScalar *x,*v;
734d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
735dfbe8321SBarry Smith   PetscErrorCode    ierr;
736d2840e09SBarry Smith   PetscInt          n,i;
737d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
738f9fae5adSBarry Smith 
739f9fae5adSBarry Smith   PetscFunctionBegin;
740d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7413649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7421ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
743bfec09a0SHong Zhang 
744f9fae5adSBarry Smith   for (i=0; i<m; i++) {
745bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
746bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
747f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
748f9fae5adSBarry Smith     alpha1 = x[5*i];
749f9fae5adSBarry Smith     alpha2 = x[5*i+1];
750f9fae5adSBarry Smith     alpha3 = x[5*i+2];
751f9fae5adSBarry Smith     alpha4 = x[5*i+3];
752f9fae5adSBarry Smith     alpha5 = x[5*i+4];
753f9fae5adSBarry Smith     while (n-->0) {
754f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
755f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
756f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
757f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
758f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
759f9fae5adSBarry Smith       idx++; v++;
760f9fae5adSBarry Smith     }
761f9fae5adSBarry Smith   }
762dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7641ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
765f9fae5adSBarry Smith   PetscFunctionReturn(0);
766f9fae5adSBarry Smith }
767f9fae5adSBarry Smith 
7684a2ae208SSatish Balay #undef __FUNCT__
769b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
770dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
771f9fae5adSBarry Smith {
7724cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
773f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
774d2840e09SBarry Smith   const PetscScalar *x,*v;
775d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
776dfbe8321SBarry Smith   PetscErrorCode    ierr;
777b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
778d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
779f9fae5adSBarry Smith 
780f9fae5adSBarry Smith   PetscFunctionBegin;
781f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7831ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
784f9fae5adSBarry Smith   idx  = a->j;
785f9fae5adSBarry Smith   v    = a->a;
786f9fae5adSBarry Smith   ii   = a->i;
787f9fae5adSBarry Smith 
788f9fae5adSBarry Smith   for (i=0; i<m; i++) {
789f9fae5adSBarry Smith     jrow = ii[i];
790f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
791f9fae5adSBarry Smith     sum1  = 0.0;
792f9fae5adSBarry Smith     sum2  = 0.0;
793f9fae5adSBarry Smith     sum3  = 0.0;
794f9fae5adSBarry Smith     sum4  = 0.0;
795f9fae5adSBarry Smith     sum5  = 0.0;
796f9fae5adSBarry Smith     for (j=0; j<n; j++) {
797f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
798f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
799f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
800f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
801f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
802f9fae5adSBarry Smith       jrow++;
803f9fae5adSBarry Smith      }
804f9fae5adSBarry Smith     y[5*i]   += sum1;
805f9fae5adSBarry Smith     y[5*i+1] += sum2;
806f9fae5adSBarry Smith     y[5*i+2] += sum3;
807f9fae5adSBarry Smith     y[5*i+3] += sum4;
808f9fae5adSBarry Smith     y[5*i+4] += sum5;
809f9fae5adSBarry Smith   }
810f9fae5adSBarry Smith 
811dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8123649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8131ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
814f9fae5adSBarry Smith   PetscFunctionReturn(0);
815f9fae5adSBarry Smith }
816f9fae5adSBarry Smith 
8174a2ae208SSatish Balay #undef __FUNCT__
818b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
819dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
820f9fae5adSBarry Smith {
8214cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
822f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
823d2840e09SBarry Smith   const PetscScalar *x,*v;
824d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
825dfbe8321SBarry Smith   PetscErrorCode    ierr;
826d2840e09SBarry Smith   PetscInt          n,i;
827d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
828f9fae5adSBarry Smith 
829f9fae5adSBarry Smith   PetscFunctionBegin;
830f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8313649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8321ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
833bfec09a0SHong Zhang 
834f9fae5adSBarry Smith   for (i=0; i<m; i++) {
835bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
836bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
837f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
838f9fae5adSBarry Smith     alpha1 = x[5*i];
839f9fae5adSBarry Smith     alpha2 = x[5*i+1];
840f9fae5adSBarry Smith     alpha3 = x[5*i+2];
841f9fae5adSBarry Smith     alpha4 = x[5*i+3];
842f9fae5adSBarry Smith     alpha5 = x[5*i+4];
843f9fae5adSBarry Smith     while (n-->0) {
844f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
845f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
846f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
847f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
848f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
849f9fae5adSBarry Smith       idx++; v++;
850f9fae5adSBarry Smith     }
851f9fae5adSBarry Smith   }
852dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8541ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
855f9fae5adSBarry Smith   PetscFunctionReturn(0);
856bcc973b7SBarry Smith }
857bcc973b7SBarry Smith 
8586cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8594a2ae208SSatish Balay #undef __FUNCT__
860b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
861dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8626cd98798SBarry Smith {
8636cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8646cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
865d2840e09SBarry Smith   const PetscScalar *x,*v;
866d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
867dfbe8321SBarry Smith   PetscErrorCode    ierr;
868d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
869d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
8706cd98798SBarry Smith 
8716cd98798SBarry Smith   PetscFunctionBegin;
8723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8731ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8746cd98798SBarry Smith   idx  = a->j;
8756cd98798SBarry Smith   v    = a->a;
8766cd98798SBarry Smith   ii   = a->i;
8776cd98798SBarry Smith 
8786cd98798SBarry Smith   for (i=0; i<m; i++) {
8796cd98798SBarry Smith     jrow = ii[i];
8806cd98798SBarry Smith     n    = ii[i+1] - jrow;
8816cd98798SBarry Smith     sum1  = 0.0;
8826cd98798SBarry Smith     sum2  = 0.0;
8836cd98798SBarry Smith     sum3  = 0.0;
8846cd98798SBarry Smith     sum4  = 0.0;
8856cd98798SBarry Smith     sum5  = 0.0;
8866cd98798SBarry Smith     sum6  = 0.0;
887b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8886cd98798SBarry Smith     for (j=0; j<n; j++) {
8896cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8906cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8916cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8926cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8936cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8946cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8956cd98798SBarry Smith       jrow++;
8966cd98798SBarry Smith      }
8976cd98798SBarry Smith     y[6*i]   = sum1;
8986cd98798SBarry Smith     y[6*i+1] = sum2;
8996cd98798SBarry Smith     y[6*i+2] = sum3;
9006cd98798SBarry Smith     y[6*i+3] = sum4;
9016cd98798SBarry Smith     y[6*i+4] = sum5;
9026cd98798SBarry Smith     y[6*i+5] = sum6;
9036cd98798SBarry Smith   }
9046cd98798SBarry Smith 
905dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
9063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9071ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9086cd98798SBarry Smith   PetscFunctionReturn(0);
9096cd98798SBarry Smith }
9106cd98798SBarry Smith 
9114a2ae208SSatish Balay #undef __FUNCT__
912b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
913dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
9146cd98798SBarry Smith {
9156cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9166cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
917d2840e09SBarry Smith   const PetscScalar *x,*v;
918d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
919dfbe8321SBarry Smith   PetscErrorCode    ierr;
920d2840e09SBarry Smith   PetscInt          n,i;
921d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9226cd98798SBarry Smith 
9236cd98798SBarry Smith   PetscFunctionBegin;
924d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
9253649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9261ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
927bfec09a0SHong Zhang 
9286cd98798SBarry Smith   for (i=0; i<m; i++) {
929bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
930bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9316cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9326cd98798SBarry Smith     alpha1 = x[6*i];
9336cd98798SBarry Smith     alpha2 = x[6*i+1];
9346cd98798SBarry Smith     alpha3 = x[6*i+2];
9356cd98798SBarry Smith     alpha4 = x[6*i+3];
9366cd98798SBarry Smith     alpha5 = x[6*i+4];
9376cd98798SBarry Smith     alpha6 = x[6*i+5];
9386cd98798SBarry Smith     while (n-->0) {
9396cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9406cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9416cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9426cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9436cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9446cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9456cd98798SBarry Smith       idx++; v++;
9466cd98798SBarry Smith     }
9476cd98798SBarry Smith   }
948dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9501ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9516cd98798SBarry Smith   PetscFunctionReturn(0);
9526cd98798SBarry Smith }
9536cd98798SBarry Smith 
9544a2ae208SSatish Balay #undef __FUNCT__
955b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
956dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9576cd98798SBarry Smith {
9586cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9596cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
960d2840e09SBarry Smith   const PetscScalar *x,*v;
961d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
962dfbe8321SBarry Smith   PetscErrorCode    ierr;
963b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
964d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9656cd98798SBarry Smith 
9666cd98798SBarry Smith   PetscFunctionBegin;
9676cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9683649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9691ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9706cd98798SBarry Smith   idx  = a->j;
9716cd98798SBarry Smith   v    = a->a;
9726cd98798SBarry Smith   ii   = a->i;
9736cd98798SBarry Smith 
9746cd98798SBarry Smith   for (i=0; i<m; i++) {
9756cd98798SBarry Smith     jrow = ii[i];
9766cd98798SBarry Smith     n    = ii[i+1] - jrow;
9776cd98798SBarry Smith     sum1  = 0.0;
9786cd98798SBarry Smith     sum2  = 0.0;
9796cd98798SBarry Smith     sum3  = 0.0;
9806cd98798SBarry Smith     sum4  = 0.0;
9816cd98798SBarry Smith     sum5  = 0.0;
9826cd98798SBarry Smith     sum6  = 0.0;
9836cd98798SBarry Smith     for (j=0; j<n; j++) {
9846cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9856cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9866cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9876cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9886cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9896cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9906cd98798SBarry Smith       jrow++;
9916cd98798SBarry Smith      }
9926cd98798SBarry Smith     y[6*i]   += sum1;
9936cd98798SBarry Smith     y[6*i+1] += sum2;
9946cd98798SBarry Smith     y[6*i+2] += sum3;
9956cd98798SBarry Smith     y[6*i+3] += sum4;
9966cd98798SBarry Smith     y[6*i+4] += sum5;
9976cd98798SBarry Smith     y[6*i+5] += sum6;
9986cd98798SBarry Smith   }
9996cd98798SBarry Smith 
1000dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10021ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10036cd98798SBarry Smith   PetscFunctionReturn(0);
10046cd98798SBarry Smith }
10056cd98798SBarry Smith 
10064a2ae208SSatish Balay #undef __FUNCT__
1007b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1008dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
10096cd98798SBarry Smith {
10106cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
10116cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1012d2840e09SBarry Smith   const PetscScalar *x,*v;
1013d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1014dfbe8321SBarry Smith   PetscErrorCode    ierr;
1015d2840e09SBarry Smith   PetscInt          n,i;
1016d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
10176cd98798SBarry Smith 
10186cd98798SBarry Smith   PetscFunctionBegin;
10196cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
10211ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1022bfec09a0SHong Zhang 
10236cd98798SBarry Smith   for (i=0; i<m; i++) {
1024bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1025bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
10266cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
10276cd98798SBarry Smith     alpha1 = x[6*i];
10286cd98798SBarry Smith     alpha2 = x[6*i+1];
10296cd98798SBarry Smith     alpha3 = x[6*i+2];
10306cd98798SBarry Smith     alpha4 = x[6*i+3];
10316cd98798SBarry Smith     alpha5 = x[6*i+4];
10326cd98798SBarry Smith     alpha6 = x[6*i+5];
10336cd98798SBarry Smith     while (n-->0) {
10346cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10356cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10366cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10376cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10386cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10396cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10406cd98798SBarry Smith       idx++; v++;
10416cd98798SBarry Smith     }
10426cd98798SBarry Smith   }
1043dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10451ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10466cd98798SBarry Smith   PetscFunctionReturn(0);
10476cd98798SBarry Smith }
10486cd98798SBarry Smith 
104966ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
105066ed3db0SBarry Smith #undef __FUNCT__
1051ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
1052ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1053ed8eea03SBarry Smith {
1054ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1055ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1056d2840e09SBarry Smith   const PetscScalar *x,*v;
1057d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1058ed8eea03SBarry Smith   PetscErrorCode    ierr;
1059d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1060d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1061ed8eea03SBarry Smith 
1062ed8eea03SBarry Smith   PetscFunctionBegin;
10633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1064ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1065ed8eea03SBarry Smith   idx  = a->j;
1066ed8eea03SBarry Smith   v    = a->a;
1067ed8eea03SBarry Smith   ii   = a->i;
1068ed8eea03SBarry Smith 
1069ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1070ed8eea03SBarry Smith     jrow = ii[i];
1071ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1072ed8eea03SBarry Smith     sum1  = 0.0;
1073ed8eea03SBarry Smith     sum2  = 0.0;
1074ed8eea03SBarry Smith     sum3  = 0.0;
1075ed8eea03SBarry Smith     sum4  = 0.0;
1076ed8eea03SBarry Smith     sum5  = 0.0;
1077ed8eea03SBarry Smith     sum6  = 0.0;
1078ed8eea03SBarry Smith     sum7  = 0.0;
1079b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1080ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1081ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1082ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1083ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1084ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1085ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1086ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1087ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1088ed8eea03SBarry Smith       jrow++;
1089ed8eea03SBarry Smith      }
1090ed8eea03SBarry Smith     y[7*i]   = sum1;
1091ed8eea03SBarry Smith     y[7*i+1] = sum2;
1092ed8eea03SBarry Smith     y[7*i+2] = sum3;
1093ed8eea03SBarry Smith     y[7*i+3] = sum4;
1094ed8eea03SBarry Smith     y[7*i+4] = sum5;
1095ed8eea03SBarry Smith     y[7*i+5] = sum6;
1096ed8eea03SBarry Smith     y[7*i+6] = sum7;
1097ed8eea03SBarry Smith   }
1098ed8eea03SBarry Smith 
1099dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
11003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1101ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1102ed8eea03SBarry Smith   PetscFunctionReturn(0);
1103ed8eea03SBarry Smith }
1104ed8eea03SBarry Smith 
1105ed8eea03SBarry Smith #undef __FUNCT__
1106ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1107ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1108ed8eea03SBarry Smith {
1109ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1110ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1111d2840e09SBarry Smith   const PetscScalar *x,*v;
1112d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1113ed8eea03SBarry Smith   PetscErrorCode    ierr;
1114d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1115d2840e09SBarry Smith   PetscInt          n,i;
1116ed8eea03SBarry Smith 
1117ed8eea03SBarry Smith   PetscFunctionBegin;
1118d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
11193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1120ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1121ed8eea03SBarry Smith 
1122ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1123ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1124ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1125ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1126ed8eea03SBarry Smith     alpha1 = x[7*i];
1127ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1128ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1129ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1130ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1131ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1132ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1133ed8eea03SBarry Smith     while (n-->0) {
1134ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1135ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1136ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1137ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1138ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1139ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1140ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1141ed8eea03SBarry Smith       idx++; v++;
1142ed8eea03SBarry Smith     }
1143ed8eea03SBarry Smith   }
1144dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11453649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1146ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1147ed8eea03SBarry Smith   PetscFunctionReturn(0);
1148ed8eea03SBarry Smith }
1149ed8eea03SBarry Smith 
1150ed8eea03SBarry Smith #undef __FUNCT__
1151ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1152ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1153ed8eea03SBarry Smith {
1154ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1155ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1156d2840e09SBarry Smith   const PetscScalar *x,*v;
1157d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1158ed8eea03SBarry Smith   PetscErrorCode    ierr;
1159d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1160ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1161ed8eea03SBarry Smith 
1162ed8eea03SBarry Smith   PetscFunctionBegin;
1163ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11643649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1165ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1166ed8eea03SBarry Smith   idx  = a->j;
1167ed8eea03SBarry Smith   v    = a->a;
1168ed8eea03SBarry Smith   ii   = a->i;
1169ed8eea03SBarry Smith 
1170ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1171ed8eea03SBarry Smith     jrow = ii[i];
1172ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1173ed8eea03SBarry Smith     sum1  = 0.0;
1174ed8eea03SBarry Smith     sum2  = 0.0;
1175ed8eea03SBarry Smith     sum3  = 0.0;
1176ed8eea03SBarry Smith     sum4  = 0.0;
1177ed8eea03SBarry Smith     sum5  = 0.0;
1178ed8eea03SBarry Smith     sum6  = 0.0;
1179ed8eea03SBarry Smith     sum7  = 0.0;
1180ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1181ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1182ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1183ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1184ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1185ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1186ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1187ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1188ed8eea03SBarry Smith       jrow++;
1189ed8eea03SBarry Smith      }
1190ed8eea03SBarry Smith     y[7*i]   += sum1;
1191ed8eea03SBarry Smith     y[7*i+1] += sum2;
1192ed8eea03SBarry Smith     y[7*i+2] += sum3;
1193ed8eea03SBarry Smith     y[7*i+3] += sum4;
1194ed8eea03SBarry Smith     y[7*i+4] += sum5;
1195ed8eea03SBarry Smith     y[7*i+5] += sum6;
1196ed8eea03SBarry Smith     y[7*i+6] += sum7;
1197ed8eea03SBarry Smith   }
1198ed8eea03SBarry Smith 
1199dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1201ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1202ed8eea03SBarry Smith   PetscFunctionReturn(0);
1203ed8eea03SBarry Smith }
1204ed8eea03SBarry Smith 
1205ed8eea03SBarry Smith #undef __FUNCT__
1206ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1207ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1208ed8eea03SBarry Smith {
1209ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1210ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1211d2840e09SBarry Smith   const PetscScalar *x,*v;
1212d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1213ed8eea03SBarry Smith   PetscErrorCode    ierr;
1214d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1215d2840e09SBarry Smith   PetscInt          n,i;
1216ed8eea03SBarry Smith 
1217ed8eea03SBarry Smith   PetscFunctionBegin;
1218ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1220ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1221ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1222ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1223ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1224ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1225ed8eea03SBarry Smith     alpha1 = x[7*i];
1226ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1227ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1228ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1229ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1230ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1231ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1232ed8eea03SBarry Smith     while (n-->0) {
1233ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1234ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1235ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1236ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1237ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1238ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1239ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1240ed8eea03SBarry Smith       idx++; v++;
1241ed8eea03SBarry Smith     }
1242ed8eea03SBarry Smith   }
1243dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1245ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1246ed8eea03SBarry Smith   PetscFunctionReturn(0);
1247ed8eea03SBarry Smith }
1248ed8eea03SBarry Smith 
1249ed8eea03SBarry Smith #undef __FUNCT__
125066ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1251dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
125266ed3db0SBarry Smith {
125366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
125466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1255d2840e09SBarry Smith   const PetscScalar *x,*v;
1256d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1257dfbe8321SBarry Smith   PetscErrorCode    ierr;
1258d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1259d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
126066ed3db0SBarry Smith 
126166ed3db0SBarry Smith   PetscFunctionBegin;
12623649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12631ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
126466ed3db0SBarry Smith   idx  = a->j;
126566ed3db0SBarry Smith   v    = a->a;
126666ed3db0SBarry Smith   ii   = a->i;
126766ed3db0SBarry Smith 
126866ed3db0SBarry Smith   for (i=0; i<m; i++) {
126966ed3db0SBarry Smith     jrow = ii[i];
127066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
127166ed3db0SBarry Smith     sum1  = 0.0;
127266ed3db0SBarry Smith     sum2  = 0.0;
127366ed3db0SBarry Smith     sum3  = 0.0;
127466ed3db0SBarry Smith     sum4  = 0.0;
127566ed3db0SBarry Smith     sum5  = 0.0;
127666ed3db0SBarry Smith     sum6  = 0.0;
127766ed3db0SBarry Smith     sum7  = 0.0;
127866ed3db0SBarry Smith     sum8  = 0.0;
1279b3c51c66SMatthew Knepley     nonzerorow += (n>0);
128066ed3db0SBarry Smith     for (j=0; j<n; j++) {
128166ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
128266ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
128366ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
128466ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
128566ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
128666ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
128766ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
128866ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
128966ed3db0SBarry Smith       jrow++;
129066ed3db0SBarry Smith      }
129166ed3db0SBarry Smith     y[8*i]   = sum1;
129266ed3db0SBarry Smith     y[8*i+1] = sum2;
129366ed3db0SBarry Smith     y[8*i+2] = sum3;
129466ed3db0SBarry Smith     y[8*i+3] = sum4;
129566ed3db0SBarry Smith     y[8*i+4] = sum5;
129666ed3db0SBarry Smith     y[8*i+5] = sum6;
129766ed3db0SBarry Smith     y[8*i+6] = sum7;
129866ed3db0SBarry Smith     y[8*i+7] = sum8;
129966ed3db0SBarry Smith   }
130066ed3db0SBarry Smith 
1301dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
13023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13031ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130466ed3db0SBarry Smith   PetscFunctionReturn(0);
130566ed3db0SBarry Smith }
130666ed3db0SBarry Smith 
130766ed3db0SBarry Smith #undef __FUNCT__
130866ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1309dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
131066ed3db0SBarry Smith {
131166ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131266ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1313d2840e09SBarry Smith   const PetscScalar *x,*v;
1314d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1315dfbe8321SBarry Smith   PetscErrorCode    ierr;
1316d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1317d2840e09SBarry Smith   PetscInt          n,i;
131866ed3db0SBarry Smith 
131966ed3db0SBarry Smith   PetscFunctionBegin;
1320d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
13213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13221ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1323bfec09a0SHong Zhang 
132466ed3db0SBarry Smith   for (i=0; i<m; i++) {
1325bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1326bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
132766ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
132866ed3db0SBarry Smith     alpha1 = x[8*i];
132966ed3db0SBarry Smith     alpha2 = x[8*i+1];
133066ed3db0SBarry Smith     alpha3 = x[8*i+2];
133166ed3db0SBarry Smith     alpha4 = x[8*i+3];
133266ed3db0SBarry Smith     alpha5 = x[8*i+4];
133366ed3db0SBarry Smith     alpha6 = x[8*i+5];
133466ed3db0SBarry Smith     alpha7 = x[8*i+6];
133566ed3db0SBarry Smith     alpha8 = x[8*i+7];
133666ed3db0SBarry Smith     while (n-->0) {
133766ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
133866ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
133966ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
134066ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
134166ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
134266ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
134366ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
134466ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
134566ed3db0SBarry Smith       idx++; v++;
134666ed3db0SBarry Smith     }
134766ed3db0SBarry Smith   }
1348dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13501ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
135166ed3db0SBarry Smith   PetscFunctionReturn(0);
135266ed3db0SBarry Smith }
135366ed3db0SBarry Smith 
135466ed3db0SBarry Smith #undef __FUNCT__
135566ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1356dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
135766ed3db0SBarry Smith {
135866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
135966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1360d2840e09SBarry Smith   const PetscScalar *x,*v;
1361d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1362dfbe8321SBarry Smith   PetscErrorCode    ierr;
1363d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1364b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
136566ed3db0SBarry Smith 
136666ed3db0SBarry Smith   PetscFunctionBegin;
136766ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13683649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13691ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137066ed3db0SBarry Smith   idx  = a->j;
137166ed3db0SBarry Smith   v    = a->a;
137266ed3db0SBarry Smith   ii   = a->i;
137366ed3db0SBarry Smith 
137466ed3db0SBarry Smith   for (i=0; i<m; i++) {
137566ed3db0SBarry Smith     jrow = ii[i];
137666ed3db0SBarry Smith     n    = ii[i+1] - jrow;
137766ed3db0SBarry Smith     sum1  = 0.0;
137866ed3db0SBarry Smith     sum2  = 0.0;
137966ed3db0SBarry Smith     sum3  = 0.0;
138066ed3db0SBarry Smith     sum4  = 0.0;
138166ed3db0SBarry Smith     sum5  = 0.0;
138266ed3db0SBarry Smith     sum6  = 0.0;
138366ed3db0SBarry Smith     sum7  = 0.0;
138466ed3db0SBarry Smith     sum8  = 0.0;
138566ed3db0SBarry Smith     for (j=0; j<n; j++) {
138666ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
138766ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
138866ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
138966ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
139066ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
139166ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
139266ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
139366ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
139466ed3db0SBarry Smith       jrow++;
139566ed3db0SBarry Smith      }
139666ed3db0SBarry Smith     y[8*i]   += sum1;
139766ed3db0SBarry Smith     y[8*i+1] += sum2;
139866ed3db0SBarry Smith     y[8*i+2] += sum3;
139966ed3db0SBarry Smith     y[8*i+3] += sum4;
140066ed3db0SBarry Smith     y[8*i+4] += sum5;
140166ed3db0SBarry Smith     y[8*i+5] += sum6;
140266ed3db0SBarry Smith     y[8*i+6] += sum7;
140366ed3db0SBarry Smith     y[8*i+7] += sum8;
140466ed3db0SBarry Smith   }
140566ed3db0SBarry Smith 
1406dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14073649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14081ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
140966ed3db0SBarry Smith   PetscFunctionReturn(0);
141066ed3db0SBarry Smith }
141166ed3db0SBarry Smith 
141266ed3db0SBarry Smith #undef __FUNCT__
141366ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1414dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
141566ed3db0SBarry Smith {
141666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
141766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1418d2840e09SBarry Smith   const PetscScalar *x,*v;
1419d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1420dfbe8321SBarry Smith   PetscErrorCode    ierr;
1421d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1422d2840e09SBarry Smith   PetscInt          n,i;
142366ed3db0SBarry Smith 
142466ed3db0SBarry Smith   PetscFunctionBegin;
142566ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14263649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14271ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
142866ed3db0SBarry Smith   for (i=0; i<m; i++) {
1429bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1430bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
143166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
143266ed3db0SBarry Smith     alpha1 = x[8*i];
143366ed3db0SBarry Smith     alpha2 = x[8*i+1];
143466ed3db0SBarry Smith     alpha3 = x[8*i+2];
143566ed3db0SBarry Smith     alpha4 = x[8*i+3];
143666ed3db0SBarry Smith     alpha5 = x[8*i+4];
143766ed3db0SBarry Smith     alpha6 = x[8*i+5];
143866ed3db0SBarry Smith     alpha7 = x[8*i+6];
143966ed3db0SBarry Smith     alpha8 = x[8*i+7];
144066ed3db0SBarry Smith     while (n-->0) {
144166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
144266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
144366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
144466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
144566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
144666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
144766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
144866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
144966ed3db0SBarry Smith       idx++; v++;
145066ed3db0SBarry Smith     }
145166ed3db0SBarry Smith   }
1452dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14541ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14552f7816d4SBarry Smith   PetscFunctionReturn(0);
14562f7816d4SBarry Smith }
14572f7816d4SBarry Smith 
14582b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
14592b692628SMatthew Knepley #undef __FUNCT__
14602b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1461dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14622b692628SMatthew Knepley {
14632b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14642b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1465d2840e09SBarry Smith   const PetscScalar *x,*v;
1466d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1467dfbe8321SBarry Smith   PetscErrorCode    ierr;
1468d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1469d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14702b692628SMatthew Knepley 
14712b692628SMatthew Knepley   PetscFunctionBegin;
14723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14731ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14742b692628SMatthew Knepley   idx  = a->j;
14752b692628SMatthew Knepley   v    = a->a;
14762b692628SMatthew Knepley   ii   = a->i;
14772b692628SMatthew Knepley 
14782b692628SMatthew Knepley   for (i=0; i<m; i++) {
14792b692628SMatthew Knepley     jrow = ii[i];
14802b692628SMatthew Knepley     n    = ii[i+1] - jrow;
14812b692628SMatthew Knepley     sum1  = 0.0;
14822b692628SMatthew Knepley     sum2  = 0.0;
14832b692628SMatthew Knepley     sum3  = 0.0;
14842b692628SMatthew Knepley     sum4  = 0.0;
14852b692628SMatthew Knepley     sum5  = 0.0;
14862b692628SMatthew Knepley     sum6  = 0.0;
14872b692628SMatthew Knepley     sum7  = 0.0;
14882b692628SMatthew Knepley     sum8  = 0.0;
14892b692628SMatthew Knepley     sum9  = 0.0;
1490b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14912b692628SMatthew Knepley     for (j=0; j<n; j++) {
14922b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14932b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14942b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14952b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14962b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14972b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14982b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14992b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15002b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15012b692628SMatthew Knepley       jrow++;
15022b692628SMatthew Knepley      }
15032b692628SMatthew Knepley     y[9*i]   = sum1;
15042b692628SMatthew Knepley     y[9*i+1] = sum2;
15052b692628SMatthew Knepley     y[9*i+2] = sum3;
15062b692628SMatthew Knepley     y[9*i+3] = sum4;
15072b692628SMatthew Knepley     y[9*i+4] = sum5;
15082b692628SMatthew Knepley     y[9*i+5] = sum6;
15092b692628SMatthew Knepley     y[9*i+6] = sum7;
15102b692628SMatthew Knepley     y[9*i+7] = sum8;
15112b692628SMatthew Knepley     y[9*i+8] = sum9;
15122b692628SMatthew Knepley   }
15132b692628SMatthew Knepley 
1514dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
15153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15161ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15172b692628SMatthew Knepley   PetscFunctionReturn(0);
15182b692628SMatthew Knepley }
15192b692628SMatthew Knepley 
1520b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1521b9cda22cSBarry Smith 
15222b692628SMatthew Knepley #undef __FUNCT__
15232b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1524dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
15252b692628SMatthew Knepley {
15262b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15272b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1528d2840e09SBarry Smith   const PetscScalar *x,*v;
1529d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1530dfbe8321SBarry Smith   PetscErrorCode    ierr;
1531d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1532d2840e09SBarry Smith   PetscInt          n,i;
15332b692628SMatthew Knepley 
15342b692628SMatthew Knepley   PetscFunctionBegin;
1535d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
15363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15371ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15382b692628SMatthew Knepley 
15392b692628SMatthew Knepley   for (i=0; i<m; i++) {
15402b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15412b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15422b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15432b692628SMatthew Knepley     alpha1 = x[9*i];
15442b692628SMatthew Knepley     alpha2 = x[9*i+1];
15452b692628SMatthew Knepley     alpha3 = x[9*i+2];
15462b692628SMatthew Knepley     alpha4 = x[9*i+3];
15472b692628SMatthew Knepley     alpha5 = x[9*i+4];
15482b692628SMatthew Knepley     alpha6 = x[9*i+5];
15492b692628SMatthew Knepley     alpha7 = x[9*i+6];
15502b692628SMatthew Knepley     alpha8 = x[9*i+7];
15512f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15522b692628SMatthew Knepley     while (n-->0) {
15532b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15542b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15552b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15562b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15572b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15582b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15592b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15602b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15612b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15622b692628SMatthew Knepley       idx++; v++;
15632b692628SMatthew Knepley     }
15642b692628SMatthew Knepley   }
1565dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15663649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15671ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15682b692628SMatthew Knepley   PetscFunctionReturn(0);
15692b692628SMatthew Knepley }
15702b692628SMatthew Knepley 
15712b692628SMatthew Knepley #undef __FUNCT__
15722b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1573dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15742b692628SMatthew Knepley {
15752b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15762b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1577d2840e09SBarry Smith   const PetscScalar *x,*v;
1578d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1579dfbe8321SBarry Smith   PetscErrorCode    ierr;
1580d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1581b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15822b692628SMatthew Knepley 
15832b692628SMatthew Knepley   PetscFunctionBegin;
15842b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15853649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15861ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15872b692628SMatthew Knepley   idx  = a->j;
15882b692628SMatthew Knepley   v    = a->a;
15892b692628SMatthew Knepley   ii   = a->i;
15902b692628SMatthew Knepley 
15912b692628SMatthew Knepley   for (i=0; i<m; i++) {
15922b692628SMatthew Knepley     jrow = ii[i];
15932b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15942b692628SMatthew Knepley     sum1  = 0.0;
15952b692628SMatthew Knepley     sum2  = 0.0;
15962b692628SMatthew Knepley     sum3  = 0.0;
15972b692628SMatthew Knepley     sum4  = 0.0;
15982b692628SMatthew Knepley     sum5  = 0.0;
15992b692628SMatthew Knepley     sum6  = 0.0;
16002b692628SMatthew Knepley     sum7  = 0.0;
16012b692628SMatthew Knepley     sum8  = 0.0;
16022b692628SMatthew Knepley     sum9  = 0.0;
16032b692628SMatthew Knepley     for (j=0; j<n; j++) {
16042b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
16052b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
16062b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
16072b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
16082b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
16092b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
16102b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
16112b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
16122b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
16132b692628SMatthew Knepley       jrow++;
16142b692628SMatthew Knepley      }
16152b692628SMatthew Knepley     y[9*i]   += sum1;
16162b692628SMatthew Knepley     y[9*i+1] += sum2;
16172b692628SMatthew Knepley     y[9*i+2] += sum3;
16182b692628SMatthew Knepley     y[9*i+3] += sum4;
16192b692628SMatthew Knepley     y[9*i+4] += sum5;
16202b692628SMatthew Knepley     y[9*i+5] += sum6;
16212b692628SMatthew Knepley     y[9*i+6] += sum7;
16222b692628SMatthew Knepley     y[9*i+7] += sum8;
16232b692628SMatthew Knepley     y[9*i+8] += sum9;
16242b692628SMatthew Knepley   }
16252b692628SMatthew Knepley 
1626efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
16273649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16281ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16292b692628SMatthew Knepley   PetscFunctionReturn(0);
16302b692628SMatthew Knepley }
16312b692628SMatthew Knepley 
16322b692628SMatthew Knepley #undef __FUNCT__
16332b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1634dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
16352b692628SMatthew Knepley {
16362b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
16372b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1638d2840e09SBarry Smith   const PetscScalar *x,*v;
1639d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1640dfbe8321SBarry Smith   PetscErrorCode    ierr;
1641d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1642d2840e09SBarry Smith   PetscInt          n,i;
16432b692628SMatthew Knepley 
16442b692628SMatthew Knepley   PetscFunctionBegin;
16452b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16463649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
16471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16482b692628SMatthew Knepley   for (i=0; i<m; i++) {
16492b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
16502b692628SMatthew Knepley     v      = a->a + a->i[i] ;
16512b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
16522b692628SMatthew Knepley     alpha1 = x[9*i];
16532b692628SMatthew Knepley     alpha2 = x[9*i+1];
16542b692628SMatthew Knepley     alpha3 = x[9*i+2];
16552b692628SMatthew Knepley     alpha4 = x[9*i+3];
16562b692628SMatthew Knepley     alpha5 = x[9*i+4];
16572b692628SMatthew Knepley     alpha6 = x[9*i+5];
16582b692628SMatthew Knepley     alpha7 = x[9*i+6];
16592b692628SMatthew Knepley     alpha8 = x[9*i+7];
16602b692628SMatthew Knepley     alpha9 = x[9*i+8];
16612b692628SMatthew Knepley     while (n-->0) {
16622b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16632b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16642b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16652b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16662b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16672b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16682b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16692b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16702b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16712b692628SMatthew Knepley       idx++; v++;
16722b692628SMatthew Knepley     }
16732b692628SMatthew Knepley   }
1674dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16753649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16761ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16772b692628SMatthew Knepley   PetscFunctionReturn(0);
16782b692628SMatthew Knepley }
1679b9cda22cSBarry Smith #undef __FUNCT__
1680b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1681b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1682b9cda22cSBarry Smith {
1683b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1684b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1685d2840e09SBarry Smith   const PetscScalar *x,*v;
1686d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1687b9cda22cSBarry Smith   PetscErrorCode    ierr;
1688d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1689d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1690b9cda22cSBarry Smith 
1691b9cda22cSBarry Smith   PetscFunctionBegin;
16923649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1693b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1694b9cda22cSBarry Smith   idx  = a->j;
1695b9cda22cSBarry Smith   v    = a->a;
1696b9cda22cSBarry Smith   ii   = a->i;
1697b9cda22cSBarry Smith 
1698b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1699b9cda22cSBarry Smith     jrow = ii[i];
1700b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1701b9cda22cSBarry Smith     sum1  = 0.0;
1702b9cda22cSBarry Smith     sum2  = 0.0;
1703b9cda22cSBarry Smith     sum3  = 0.0;
1704b9cda22cSBarry Smith     sum4  = 0.0;
1705b9cda22cSBarry Smith     sum5  = 0.0;
1706b9cda22cSBarry Smith     sum6  = 0.0;
1707b9cda22cSBarry Smith     sum7  = 0.0;
1708b9cda22cSBarry Smith     sum8  = 0.0;
1709b9cda22cSBarry Smith     sum9  = 0.0;
1710b9cda22cSBarry Smith     sum10 = 0.0;
1711b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1712b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1713b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1714b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1715b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1716b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1717b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1718b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1719b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1720b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1721b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1722b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1723b9cda22cSBarry Smith       jrow++;
1724b9cda22cSBarry Smith      }
1725b9cda22cSBarry Smith     y[10*i]   = sum1;
1726b9cda22cSBarry Smith     y[10*i+1] = sum2;
1727b9cda22cSBarry Smith     y[10*i+2] = sum3;
1728b9cda22cSBarry Smith     y[10*i+3] = sum4;
1729b9cda22cSBarry Smith     y[10*i+4] = sum5;
1730b9cda22cSBarry Smith     y[10*i+5] = sum6;
1731b9cda22cSBarry Smith     y[10*i+6] = sum7;
1732b9cda22cSBarry Smith     y[10*i+7] = sum8;
1733b9cda22cSBarry Smith     y[10*i+8] = sum9;
1734b9cda22cSBarry Smith     y[10*i+9] = sum10;
1735b9cda22cSBarry Smith   }
1736b9cda22cSBarry Smith 
1737dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
17383649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1739b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1740b9cda22cSBarry Smith   PetscFunctionReturn(0);
1741b9cda22cSBarry Smith }
1742b9cda22cSBarry Smith 
1743b9cda22cSBarry Smith #undef __FUNCT__
1744dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1745b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1746b9cda22cSBarry Smith {
1747b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749d2840e09SBarry Smith   const PetscScalar *x,*v;
1750d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1751b9cda22cSBarry Smith   PetscErrorCode    ierr;
1752d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1753b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1754b9cda22cSBarry Smith 
1755b9cda22cSBarry Smith   PetscFunctionBegin;
1756b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
17573649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1758b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1759b9cda22cSBarry Smith   idx  = a->j;
1760b9cda22cSBarry Smith   v    = a->a;
1761b9cda22cSBarry Smith   ii   = a->i;
1762b9cda22cSBarry Smith 
1763b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1764b9cda22cSBarry Smith     jrow = ii[i];
1765b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1766b9cda22cSBarry Smith     sum1  = 0.0;
1767b9cda22cSBarry Smith     sum2  = 0.0;
1768b9cda22cSBarry Smith     sum3  = 0.0;
1769b9cda22cSBarry Smith     sum4  = 0.0;
1770b9cda22cSBarry Smith     sum5  = 0.0;
1771b9cda22cSBarry Smith     sum6  = 0.0;
1772b9cda22cSBarry Smith     sum7  = 0.0;
1773b9cda22cSBarry Smith     sum8  = 0.0;
1774b9cda22cSBarry Smith     sum9  = 0.0;
1775b9cda22cSBarry Smith     sum10 = 0.0;
1776b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1777b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1778b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1779b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1780b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1781b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1782b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1783b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1784b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1785b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1786b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1787b9cda22cSBarry Smith       jrow++;
1788b9cda22cSBarry Smith      }
1789b9cda22cSBarry Smith     y[10*i]   += sum1;
1790b9cda22cSBarry Smith     y[10*i+1] += sum2;
1791b9cda22cSBarry Smith     y[10*i+2] += sum3;
1792b9cda22cSBarry Smith     y[10*i+3] += sum4;
1793b9cda22cSBarry Smith     y[10*i+4] += sum5;
1794b9cda22cSBarry Smith     y[10*i+5] += sum6;
1795b9cda22cSBarry Smith     y[10*i+6] += sum7;
1796b9cda22cSBarry Smith     y[10*i+7] += sum8;
1797b9cda22cSBarry Smith     y[10*i+8] += sum9;
1798b9cda22cSBarry Smith     y[10*i+9] += sum10;
1799b9cda22cSBarry Smith   }
1800b9cda22cSBarry Smith 
1801dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1803b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1804b9cda22cSBarry Smith   PetscFunctionReturn(0);
1805b9cda22cSBarry Smith }
1806b9cda22cSBarry Smith 
1807b9cda22cSBarry Smith #undef __FUNCT__
1808b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1809b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1810b9cda22cSBarry Smith {
1811b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1812b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1813d2840e09SBarry Smith   const PetscScalar *x,*v;
1814d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1815b9cda22cSBarry Smith   PetscErrorCode    ierr;
1816d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1817d2840e09SBarry Smith   PetscInt          n,i;
1818b9cda22cSBarry Smith 
1819b9cda22cSBarry Smith   PetscFunctionBegin;
1820d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
18213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1822b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1823b9cda22cSBarry Smith 
1824b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1825b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1826b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1827b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1828e29fdc22SBarry Smith     alpha1 = x[10*i];
1829e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1830e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1831e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1832e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1833e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1834e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1835e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1836e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1837b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1838b9cda22cSBarry Smith     while (n-->0) {
1839e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1840e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1841e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1842e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1843e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1844e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1845e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1846e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1847e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1848b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1849b9cda22cSBarry Smith       idx++; v++;
1850b9cda22cSBarry Smith     }
1851b9cda22cSBarry Smith   }
1852dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1854b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1855b9cda22cSBarry Smith   PetscFunctionReturn(0);
1856b9cda22cSBarry Smith }
1857b9cda22cSBarry Smith 
1858b9cda22cSBarry Smith #undef __FUNCT__
1859b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1860b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1861b9cda22cSBarry Smith {
1862b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1863b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1864d2840e09SBarry Smith   const PetscScalar *x,*v;
1865d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1866b9cda22cSBarry Smith   PetscErrorCode    ierr;
1867d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1868d2840e09SBarry Smith   PetscInt          n,i;
1869b9cda22cSBarry Smith 
1870b9cda22cSBarry Smith   PetscFunctionBegin;
1871b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1873b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1874b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1875b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1876b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1877b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1878b9cda22cSBarry Smith     alpha1 = x[10*i];
1879b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1880b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1881b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1882b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1883b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1884b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1885b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1886b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1887b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1888b9cda22cSBarry Smith     while (n-->0) {
1889b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1890b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1891b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1892b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1893b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1894b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1895b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1896b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1897b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1898b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1899b9cda22cSBarry Smith       idx++; v++;
1900b9cda22cSBarry Smith     }
1901b9cda22cSBarry Smith   }
1902dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
19033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1904b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1905b9cda22cSBarry Smith   PetscFunctionReturn(0);
1906b9cda22cSBarry Smith }
1907b9cda22cSBarry Smith 
19082b692628SMatthew Knepley 
19092f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
19102f7816d4SBarry Smith #undef __FUNCT__
1911dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11"
1912dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1913dbdd7285SBarry Smith {
1914dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1915dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1916d2840e09SBarry Smith   const PetscScalar *x,*v;
1917d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1918dbdd7285SBarry Smith   PetscErrorCode    ierr;
1919d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1920d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1921dbdd7285SBarry Smith 
1922dbdd7285SBarry Smith   PetscFunctionBegin;
19233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1924dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1925dbdd7285SBarry Smith   idx  = a->j;
1926dbdd7285SBarry Smith   v    = a->a;
1927dbdd7285SBarry Smith   ii   = a->i;
1928dbdd7285SBarry Smith 
1929dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1930dbdd7285SBarry Smith     jrow = ii[i];
1931dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
1932dbdd7285SBarry Smith     sum1  = 0.0;
1933dbdd7285SBarry Smith     sum2  = 0.0;
1934dbdd7285SBarry Smith     sum3  = 0.0;
1935dbdd7285SBarry Smith     sum4  = 0.0;
1936dbdd7285SBarry Smith     sum5  = 0.0;
1937dbdd7285SBarry Smith     sum6  = 0.0;
1938dbdd7285SBarry Smith     sum7  = 0.0;
1939dbdd7285SBarry Smith     sum8  = 0.0;
1940dbdd7285SBarry Smith     sum9  = 0.0;
1941dbdd7285SBarry Smith     sum10 = 0.0;
1942dbdd7285SBarry Smith     sum11 = 0.0;
1943dbdd7285SBarry Smith     nonzerorow += (n>0);
1944dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1945dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1946dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1947dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1948dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1949dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1950dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1951dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1952dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1953dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1954dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1955dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1956dbdd7285SBarry Smith       jrow++;
1957dbdd7285SBarry Smith      }
1958dbdd7285SBarry Smith     y[11*i]   = sum1;
1959dbdd7285SBarry Smith     y[11*i+1] = sum2;
1960dbdd7285SBarry Smith     y[11*i+2] = sum3;
1961dbdd7285SBarry Smith     y[11*i+3] = sum4;
1962dbdd7285SBarry Smith     y[11*i+4] = sum5;
1963dbdd7285SBarry Smith     y[11*i+5] = sum6;
1964dbdd7285SBarry Smith     y[11*i+6] = sum7;
1965dbdd7285SBarry Smith     y[11*i+7] = sum8;
1966dbdd7285SBarry Smith     y[11*i+8] = sum9;
1967dbdd7285SBarry Smith     y[11*i+9] = sum10;
1968dbdd7285SBarry Smith     y[11*i+10] = sum11;
1969dbdd7285SBarry Smith   }
1970dbdd7285SBarry Smith 
1971dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1973dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1974dbdd7285SBarry Smith   PetscFunctionReturn(0);
1975dbdd7285SBarry Smith }
1976dbdd7285SBarry Smith 
1977dbdd7285SBarry Smith #undef __FUNCT__
1978dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1979dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1980dbdd7285SBarry Smith {
1981dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1982dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1983d2840e09SBarry Smith   const PetscScalar *x,*v;
1984d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1985dbdd7285SBarry Smith   PetscErrorCode    ierr;
1986d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1987dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1988dbdd7285SBarry Smith 
1989dbdd7285SBarry Smith   PetscFunctionBegin;
1990dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1992dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1993dbdd7285SBarry Smith   idx  = a->j;
1994dbdd7285SBarry Smith   v    = a->a;
1995dbdd7285SBarry Smith   ii   = a->i;
1996dbdd7285SBarry Smith 
1997dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1998dbdd7285SBarry Smith     jrow = ii[i];
1999dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
2000dbdd7285SBarry Smith     sum1  = 0.0;
2001dbdd7285SBarry Smith     sum2  = 0.0;
2002dbdd7285SBarry Smith     sum3  = 0.0;
2003dbdd7285SBarry Smith     sum4  = 0.0;
2004dbdd7285SBarry Smith     sum5  = 0.0;
2005dbdd7285SBarry Smith     sum6  = 0.0;
2006dbdd7285SBarry Smith     sum7  = 0.0;
2007dbdd7285SBarry Smith     sum8  = 0.0;
2008dbdd7285SBarry Smith     sum9  = 0.0;
2009dbdd7285SBarry Smith     sum10 = 0.0;
2010dbdd7285SBarry Smith     sum11 = 0.0;
2011dbdd7285SBarry Smith     for (j=0; j<n; j++) {
2012dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
2013dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
2014dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
2015dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
2016dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
2017dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
2018dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
2019dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
2020dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
2021dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
2022dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
2023dbdd7285SBarry Smith       jrow++;
2024dbdd7285SBarry Smith      }
2025dbdd7285SBarry Smith     y[11*i]   += sum1;
2026dbdd7285SBarry Smith     y[11*i+1] += sum2;
2027dbdd7285SBarry Smith     y[11*i+2] += sum3;
2028dbdd7285SBarry Smith     y[11*i+3] += sum4;
2029dbdd7285SBarry Smith     y[11*i+4] += sum5;
2030dbdd7285SBarry Smith     y[11*i+5] += sum6;
2031dbdd7285SBarry Smith     y[11*i+6] += sum7;
2032dbdd7285SBarry Smith     y[11*i+7] += sum8;
2033dbdd7285SBarry Smith     y[11*i+8] += sum9;
2034dbdd7285SBarry Smith     y[11*i+9] += sum10;
2035dbdd7285SBarry Smith     y[11*i+10] += sum11;
2036dbdd7285SBarry Smith   }
2037dbdd7285SBarry Smith 
2038dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2040dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2041dbdd7285SBarry Smith   PetscFunctionReturn(0);
2042dbdd7285SBarry Smith }
2043dbdd7285SBarry Smith 
2044dbdd7285SBarry Smith #undef __FUNCT__
2045dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2046dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2047dbdd7285SBarry Smith {
2048dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2049dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2050d2840e09SBarry Smith   const PetscScalar *x,*v;
2051d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2052dbdd7285SBarry Smith   PetscErrorCode    ierr;
2053d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2054d2840e09SBarry Smith   PetscInt          n,i;
2055dbdd7285SBarry Smith 
2056dbdd7285SBarry Smith   PetscFunctionBegin;
2057d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
20583649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2059dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2060dbdd7285SBarry Smith 
2061dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2062dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2063dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2064dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2065dbdd7285SBarry Smith     alpha1 = x[11*i];
2066dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2067dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2068dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2069dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2070dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2071dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2072dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2073dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2074dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2075dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2076dbdd7285SBarry Smith     while (n-->0) {
2077dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2078dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2079dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2080dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2081dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2082dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2083dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2084dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2085dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2086dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2087dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2088dbdd7285SBarry Smith       idx++; v++;
2089dbdd7285SBarry Smith     }
2090dbdd7285SBarry Smith   }
2091dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20923649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2093dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2094dbdd7285SBarry Smith   PetscFunctionReturn(0);
2095dbdd7285SBarry Smith }
2096dbdd7285SBarry Smith 
2097dbdd7285SBarry Smith #undef __FUNCT__
2098dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2099dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2100dbdd7285SBarry Smith {
2101dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2102dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2103d2840e09SBarry Smith   const PetscScalar *x,*v;
2104d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2105dbdd7285SBarry Smith   PetscErrorCode    ierr;
2106d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2107d2840e09SBarry Smith   PetscInt          n,i;
2108dbdd7285SBarry Smith 
2109dbdd7285SBarry Smith   PetscFunctionBegin;
2110dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
21113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2112dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2113dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2114dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2115dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2116dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2117dbdd7285SBarry Smith     alpha1 = x[11*i];
2118dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2119dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2120dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2121dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2122dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2123dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2124dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2125dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2126dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2127dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2128dbdd7285SBarry Smith     while (n-->0) {
2129dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2130dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2131dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2132dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2133dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2134dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2135dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2136dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2137dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2138dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2139dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2140dbdd7285SBarry Smith       idx++; v++;
2141dbdd7285SBarry Smith     }
2142dbdd7285SBarry Smith   }
2143dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
21443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2145dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2146dbdd7285SBarry Smith   PetscFunctionReturn(0);
2147dbdd7285SBarry Smith }
2148dbdd7285SBarry Smith 
2149dbdd7285SBarry Smith 
2150dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2151dbdd7285SBarry Smith #undef __FUNCT__
21522f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
2153dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21542f7816d4SBarry Smith {
21552f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21562f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2157d2840e09SBarry Smith   const PetscScalar *x,*v;
2158d2840e09SBarry Smith   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21592f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2160dfbe8321SBarry Smith   PetscErrorCode     ierr;
2161d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2162d2840e09SBarry Smith   PetscInt           nonzerorow=0,n,i,jrow,j;
21632f7816d4SBarry Smith 
21642f7816d4SBarry Smith   PetscFunctionBegin;
21653649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21661ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
21672f7816d4SBarry Smith   idx  = a->j;
21682f7816d4SBarry Smith   v    = a->a;
21692f7816d4SBarry Smith   ii   = a->i;
21702f7816d4SBarry Smith 
21712f7816d4SBarry Smith   for (i=0; i<m; i++) {
21722f7816d4SBarry Smith     jrow = ii[i];
21732f7816d4SBarry Smith     n    = ii[i+1] - jrow;
21742f7816d4SBarry Smith     sum1  = 0.0;
21752f7816d4SBarry Smith     sum2  = 0.0;
21762f7816d4SBarry Smith     sum3  = 0.0;
21772f7816d4SBarry Smith     sum4  = 0.0;
21782f7816d4SBarry Smith     sum5  = 0.0;
21792f7816d4SBarry Smith     sum6  = 0.0;
21802f7816d4SBarry Smith     sum7  = 0.0;
21812f7816d4SBarry Smith     sum8  = 0.0;
21822f7816d4SBarry Smith     sum9  = 0.0;
21832f7816d4SBarry Smith     sum10 = 0.0;
21842f7816d4SBarry Smith     sum11 = 0.0;
21852f7816d4SBarry Smith     sum12 = 0.0;
21862f7816d4SBarry Smith     sum13 = 0.0;
21872f7816d4SBarry Smith     sum14 = 0.0;
21882f7816d4SBarry Smith     sum15 = 0.0;
21892f7816d4SBarry Smith     sum16 = 0.0;
2190b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21912f7816d4SBarry Smith     for (j=0; j<n; j++) {
21922f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21932f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21942f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21952f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21962f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21972f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21982f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21992f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22002f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22012f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22022f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22032f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22042f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22052f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22062f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22072f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22082f7816d4SBarry Smith       jrow++;
22092f7816d4SBarry Smith      }
22102f7816d4SBarry Smith     y[16*i]    = sum1;
22112f7816d4SBarry Smith     y[16*i+1]  = sum2;
22122f7816d4SBarry Smith     y[16*i+2]  = sum3;
22132f7816d4SBarry Smith     y[16*i+3]  = sum4;
22142f7816d4SBarry Smith     y[16*i+4]  = sum5;
22152f7816d4SBarry Smith     y[16*i+5]  = sum6;
22162f7816d4SBarry Smith     y[16*i+6]  = sum7;
22172f7816d4SBarry Smith     y[16*i+7]  = sum8;
22182f7816d4SBarry Smith     y[16*i+8]  = sum9;
22192f7816d4SBarry Smith     y[16*i+9]  = sum10;
22202f7816d4SBarry Smith     y[16*i+10] = sum11;
22212f7816d4SBarry Smith     y[16*i+11] = sum12;
22222f7816d4SBarry Smith     y[16*i+12] = sum13;
22232f7816d4SBarry Smith     y[16*i+13] = sum14;
22242f7816d4SBarry Smith     y[16*i+14] = sum15;
22252f7816d4SBarry Smith     y[16*i+15] = sum16;
22262f7816d4SBarry Smith   }
22272f7816d4SBarry Smith 
2228dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
22293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22301ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22312f7816d4SBarry Smith   PetscFunctionReturn(0);
22322f7816d4SBarry Smith }
22332f7816d4SBarry Smith 
22342f7816d4SBarry Smith #undef __FUNCT__
22352f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2236dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
22372f7816d4SBarry Smith {
22382f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22392f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2240d2840e09SBarry Smith   const PetscScalar *x,*v;
2241d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22422f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2243dfbe8321SBarry Smith   PetscErrorCode    ierr;
2244d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2245d2840e09SBarry Smith   PetscInt          n,i;
22462f7816d4SBarry Smith 
22472f7816d4SBarry Smith   PetscFunctionBegin;
2248d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
22493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22501ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2251bfec09a0SHong Zhang 
22522f7816d4SBarry Smith   for (i=0; i<m; i++) {
2253bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2254bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
22552f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
22562f7816d4SBarry Smith     alpha1  = x[16*i];
22572f7816d4SBarry Smith     alpha2  = x[16*i+1];
22582f7816d4SBarry Smith     alpha3  = x[16*i+2];
22592f7816d4SBarry Smith     alpha4  = x[16*i+3];
22602f7816d4SBarry Smith     alpha5  = x[16*i+4];
22612f7816d4SBarry Smith     alpha6  = x[16*i+5];
22622f7816d4SBarry Smith     alpha7  = x[16*i+6];
22632f7816d4SBarry Smith     alpha8  = x[16*i+7];
22642f7816d4SBarry Smith     alpha9  = x[16*i+8];
22652f7816d4SBarry Smith     alpha10 = x[16*i+9];
22662f7816d4SBarry Smith     alpha11 = x[16*i+10];
22672f7816d4SBarry Smith     alpha12 = x[16*i+11];
22682f7816d4SBarry Smith     alpha13 = x[16*i+12];
22692f7816d4SBarry Smith     alpha14 = x[16*i+13];
22702f7816d4SBarry Smith     alpha15 = x[16*i+14];
22712f7816d4SBarry Smith     alpha16 = x[16*i+15];
22722f7816d4SBarry Smith     while (n-->0) {
22732f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22742f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22752f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22762f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22772f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22782f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22792f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22802f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22812f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22822f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22832f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22842f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22852f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22862f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22872f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22882f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22892f7816d4SBarry Smith       idx++; v++;
22902f7816d4SBarry Smith     }
22912f7816d4SBarry Smith   }
2292dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22933649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22941ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22952f7816d4SBarry Smith   PetscFunctionReturn(0);
22962f7816d4SBarry Smith }
22972f7816d4SBarry Smith 
22982f7816d4SBarry Smith #undef __FUNCT__
22992f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2300dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23012f7816d4SBarry Smith {
23022f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23032f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2304d2840e09SBarry Smith   const PetscScalar *x,*v;
2305d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
23062f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2307dfbe8321SBarry Smith   PetscErrorCode    ierr;
2308d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2309b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
23102f7816d4SBarry Smith 
23112f7816d4SBarry Smith   PetscFunctionBegin;
23122f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23133649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23141ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23152f7816d4SBarry Smith   idx  = a->j;
23162f7816d4SBarry Smith   v    = a->a;
23172f7816d4SBarry Smith   ii   = a->i;
23182f7816d4SBarry Smith 
23192f7816d4SBarry Smith   for (i=0; i<m; i++) {
23202f7816d4SBarry Smith     jrow = ii[i];
23212f7816d4SBarry Smith     n    = ii[i+1] - jrow;
23222f7816d4SBarry Smith     sum1  = 0.0;
23232f7816d4SBarry Smith     sum2  = 0.0;
23242f7816d4SBarry Smith     sum3  = 0.0;
23252f7816d4SBarry Smith     sum4  = 0.0;
23262f7816d4SBarry Smith     sum5  = 0.0;
23272f7816d4SBarry Smith     sum6  = 0.0;
23282f7816d4SBarry Smith     sum7  = 0.0;
23292f7816d4SBarry Smith     sum8  = 0.0;
23302f7816d4SBarry Smith     sum9  = 0.0;
23312f7816d4SBarry Smith     sum10 = 0.0;
23322f7816d4SBarry Smith     sum11 = 0.0;
23332f7816d4SBarry Smith     sum12 = 0.0;
23342f7816d4SBarry Smith     sum13 = 0.0;
23352f7816d4SBarry Smith     sum14 = 0.0;
23362f7816d4SBarry Smith     sum15 = 0.0;
23372f7816d4SBarry Smith     sum16 = 0.0;
23382f7816d4SBarry Smith     for (j=0; j<n; j++) {
23392f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
23402f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
23412f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
23422f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
23432f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
23442f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
23452f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
23462f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
23472f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
23482f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
23492f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
23502f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
23512f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
23522f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
23532f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
23542f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
23552f7816d4SBarry Smith       jrow++;
23562f7816d4SBarry Smith      }
23572f7816d4SBarry Smith     y[16*i]    += sum1;
23582f7816d4SBarry Smith     y[16*i+1]  += sum2;
23592f7816d4SBarry Smith     y[16*i+2]  += sum3;
23602f7816d4SBarry Smith     y[16*i+3]  += sum4;
23612f7816d4SBarry Smith     y[16*i+4]  += sum5;
23622f7816d4SBarry Smith     y[16*i+5]  += sum6;
23632f7816d4SBarry Smith     y[16*i+6]  += sum7;
23642f7816d4SBarry Smith     y[16*i+7]  += sum8;
23652f7816d4SBarry Smith     y[16*i+8]  += sum9;
23662f7816d4SBarry Smith     y[16*i+9]  += sum10;
23672f7816d4SBarry Smith     y[16*i+10] += sum11;
23682f7816d4SBarry Smith     y[16*i+11] += sum12;
23692f7816d4SBarry Smith     y[16*i+12] += sum13;
23702f7816d4SBarry Smith     y[16*i+13] += sum14;
23712f7816d4SBarry Smith     y[16*i+14] += sum15;
23722f7816d4SBarry Smith     y[16*i+15] += sum16;
23732f7816d4SBarry Smith   }
23742f7816d4SBarry Smith 
2375dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23763649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23771ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23782f7816d4SBarry Smith   PetscFunctionReturn(0);
23792f7816d4SBarry Smith }
23802f7816d4SBarry Smith 
23812f7816d4SBarry Smith #undef __FUNCT__
23822f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2383dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23842f7816d4SBarry Smith {
23852f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23862f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2387d2840e09SBarry Smith   const PetscScalar *x,*v;
2388d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23892f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2390dfbe8321SBarry Smith   PetscErrorCode    ierr;
2391d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2392d2840e09SBarry Smith   PetscInt          n,i;
23932f7816d4SBarry Smith 
23942f7816d4SBarry Smith   PetscFunctionBegin;
23952f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23963649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23971ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23982f7816d4SBarry Smith   for (i=0; i<m; i++) {
2399bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2400bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
24012f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
24022f7816d4SBarry Smith     alpha1 = x[16*i];
24032f7816d4SBarry Smith     alpha2 = x[16*i+1];
24042f7816d4SBarry Smith     alpha3 = x[16*i+2];
24052f7816d4SBarry Smith     alpha4 = x[16*i+3];
24062f7816d4SBarry Smith     alpha5 = x[16*i+4];
24072f7816d4SBarry Smith     alpha6 = x[16*i+5];
24082f7816d4SBarry Smith     alpha7 = x[16*i+6];
24092f7816d4SBarry Smith     alpha8 = x[16*i+7];
24102f7816d4SBarry Smith     alpha9  = x[16*i+8];
24112f7816d4SBarry Smith     alpha10 = x[16*i+9];
24122f7816d4SBarry Smith     alpha11 = x[16*i+10];
24132f7816d4SBarry Smith     alpha12 = x[16*i+11];
24142f7816d4SBarry Smith     alpha13 = x[16*i+12];
24152f7816d4SBarry Smith     alpha14 = x[16*i+13];
24162f7816d4SBarry Smith     alpha15 = x[16*i+14];
24172f7816d4SBarry Smith     alpha16 = x[16*i+15];
24182f7816d4SBarry Smith     while (n-->0) {
24192f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
24202f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
24212f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
24222f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
24232f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
24242f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
24252f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
24262f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
24272f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
24282f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
24292f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
24302f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
24312f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
24322f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
24332f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
24342f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
24352f7816d4SBarry Smith       idx++; v++;
24362f7816d4SBarry Smith     }
24372f7816d4SBarry Smith   }
2438dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
24393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
24401ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
244166ed3db0SBarry Smith   PetscFunctionReturn(0);
244266ed3db0SBarry Smith }
244366ed3db0SBarry Smith 
2444ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2445ed1c418cSBarry Smith #undef __FUNCT__
2446ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2447ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2448ed1c418cSBarry Smith {
2449ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2450ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2451d2840e09SBarry Smith   const PetscScalar *x,*v;
2452d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2453ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2454ed1c418cSBarry Smith   PetscErrorCode    ierr;
2455d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2456d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2457ed1c418cSBarry Smith 
2458ed1c418cSBarry Smith   PetscFunctionBegin;
24593649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2460ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2461ed1c418cSBarry Smith   idx  = a->j;
2462ed1c418cSBarry Smith   v    = a->a;
2463ed1c418cSBarry Smith   ii   = a->i;
2464ed1c418cSBarry Smith 
2465ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2466ed1c418cSBarry Smith     jrow = ii[i];
2467ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2468ed1c418cSBarry Smith     sum1  = 0.0;
2469ed1c418cSBarry Smith     sum2  = 0.0;
2470ed1c418cSBarry Smith     sum3  = 0.0;
2471ed1c418cSBarry Smith     sum4  = 0.0;
2472ed1c418cSBarry Smith     sum5  = 0.0;
2473ed1c418cSBarry Smith     sum6  = 0.0;
2474ed1c418cSBarry Smith     sum7  = 0.0;
2475ed1c418cSBarry Smith     sum8  = 0.0;
2476ed1c418cSBarry Smith     sum9  = 0.0;
2477ed1c418cSBarry Smith     sum10 = 0.0;
2478ed1c418cSBarry Smith     sum11 = 0.0;
2479ed1c418cSBarry Smith     sum12 = 0.0;
2480ed1c418cSBarry Smith     sum13 = 0.0;
2481ed1c418cSBarry Smith     sum14 = 0.0;
2482ed1c418cSBarry Smith     sum15 = 0.0;
2483ed1c418cSBarry Smith     sum16 = 0.0;
2484ed1c418cSBarry Smith     sum17 = 0.0;
2485ed1c418cSBarry Smith     sum18 = 0.0;
2486ed1c418cSBarry Smith     nonzerorow += (n>0);
2487ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2488ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2489ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2490ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2491ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2492ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2493ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2494ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2495ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2496ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2497ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2498ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2499ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2500ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2501ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2502ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2503ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2504ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2505ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2506ed1c418cSBarry Smith       jrow++;
2507ed1c418cSBarry Smith      }
2508ed1c418cSBarry Smith     y[18*i]    = sum1;
2509ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2510ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2511ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2512ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2513ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2514ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2515ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2516ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2517ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2518ed1c418cSBarry Smith     y[18*i+10] = sum11;
2519ed1c418cSBarry Smith     y[18*i+11] = sum12;
2520ed1c418cSBarry Smith     y[18*i+12] = sum13;
2521ed1c418cSBarry Smith     y[18*i+13] = sum14;
2522ed1c418cSBarry Smith     y[18*i+14] = sum15;
2523ed1c418cSBarry Smith     y[18*i+15] = sum16;
2524ed1c418cSBarry Smith     y[18*i+16] = sum17;
2525ed1c418cSBarry Smith     y[18*i+17] = sum18;
2526ed1c418cSBarry Smith   }
2527ed1c418cSBarry Smith 
2528dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
25293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2530ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2531ed1c418cSBarry Smith   PetscFunctionReturn(0);
2532ed1c418cSBarry Smith }
2533ed1c418cSBarry Smith 
2534ed1c418cSBarry Smith #undef __FUNCT__
2535ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2536ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2537ed1c418cSBarry Smith {
2538ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2539ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2540d2840e09SBarry Smith   const PetscScalar *x,*v;
2541d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2542ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2543ed1c418cSBarry Smith   PetscErrorCode    ierr;
2544d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2545d2840e09SBarry Smith   PetscInt          n,i;
2546ed1c418cSBarry Smith 
2547ed1c418cSBarry Smith   PetscFunctionBegin;
2548d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
25493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2550ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2551ed1c418cSBarry Smith 
2552ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2553ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2554ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2555ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2556ed1c418cSBarry Smith     alpha1  = x[18*i];
2557ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2558ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2559ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2560ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2561ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2562ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2563ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2564ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2565ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2566ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2567ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2568ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2569ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2570ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2571ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2572ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2573ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2574ed1c418cSBarry Smith     while (n-->0) {
2575ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2576ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2577ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2578ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2579ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2580ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2581ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2582ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2583ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2584ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2585ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2586ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2587ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2588ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2589ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2590ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2591ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2592ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2593ed1c418cSBarry Smith       idx++; v++;
2594ed1c418cSBarry Smith     }
2595ed1c418cSBarry Smith   }
2596dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25973649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2598ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2599ed1c418cSBarry Smith   PetscFunctionReturn(0);
2600ed1c418cSBarry Smith }
2601ed1c418cSBarry Smith 
2602ed1c418cSBarry Smith #undef __FUNCT__
2603ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2604ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2605ed1c418cSBarry Smith {
2606ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2607ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2608d2840e09SBarry Smith   const PetscScalar *x,*v;
2609d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2610ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2611ed1c418cSBarry Smith   PetscErrorCode    ierr;
2612d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2613ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2614ed1c418cSBarry Smith 
2615ed1c418cSBarry Smith   PetscFunctionBegin;
2616ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26173649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2618ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2619ed1c418cSBarry Smith   idx  = a->j;
2620ed1c418cSBarry Smith   v    = a->a;
2621ed1c418cSBarry Smith   ii   = a->i;
2622ed1c418cSBarry Smith 
2623ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2624ed1c418cSBarry Smith     jrow = ii[i];
2625ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2626ed1c418cSBarry Smith     sum1  = 0.0;
2627ed1c418cSBarry Smith     sum2  = 0.0;
2628ed1c418cSBarry Smith     sum3  = 0.0;
2629ed1c418cSBarry Smith     sum4  = 0.0;
2630ed1c418cSBarry Smith     sum5  = 0.0;
2631ed1c418cSBarry Smith     sum6  = 0.0;
2632ed1c418cSBarry Smith     sum7  = 0.0;
2633ed1c418cSBarry Smith     sum8  = 0.0;
2634ed1c418cSBarry Smith     sum9  = 0.0;
2635ed1c418cSBarry Smith     sum10 = 0.0;
2636ed1c418cSBarry Smith     sum11 = 0.0;
2637ed1c418cSBarry Smith     sum12 = 0.0;
2638ed1c418cSBarry Smith     sum13 = 0.0;
2639ed1c418cSBarry Smith     sum14 = 0.0;
2640ed1c418cSBarry Smith     sum15 = 0.0;
2641ed1c418cSBarry Smith     sum16 = 0.0;
2642ed1c418cSBarry Smith     sum17 = 0.0;
2643ed1c418cSBarry Smith     sum18 = 0.0;
2644ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2645ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2646ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2647ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2648ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2649ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2650ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2651ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2652ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2653ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2654ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2655ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2656ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2657ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2658ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2659ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2660ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2661ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2662ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2663ed1c418cSBarry Smith       jrow++;
2664ed1c418cSBarry Smith      }
2665ed1c418cSBarry Smith     y[18*i]    += sum1;
2666ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2667ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2668ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2669ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2670ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2671ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2672ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2673ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2674ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2675ed1c418cSBarry Smith     y[18*i+10] += sum11;
2676ed1c418cSBarry Smith     y[18*i+11] += sum12;
2677ed1c418cSBarry Smith     y[18*i+12] += sum13;
2678ed1c418cSBarry Smith     y[18*i+13] += sum14;
2679ed1c418cSBarry Smith     y[18*i+14] += sum15;
2680ed1c418cSBarry Smith     y[18*i+15] += sum16;
2681ed1c418cSBarry Smith     y[18*i+16] += sum17;
2682ed1c418cSBarry Smith     y[18*i+17] += sum18;
2683ed1c418cSBarry Smith   }
2684ed1c418cSBarry Smith 
2685dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26863649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2687ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2688ed1c418cSBarry Smith   PetscFunctionReturn(0);
2689ed1c418cSBarry Smith }
2690ed1c418cSBarry Smith 
2691ed1c418cSBarry Smith #undef __FUNCT__
2692ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2693ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2694ed1c418cSBarry Smith {
2695ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2696ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2697d2840e09SBarry Smith   const PetscScalar *x,*v;
2698d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2699ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2700ed1c418cSBarry Smith   PetscErrorCode    ierr;
2701d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2702d2840e09SBarry Smith   PetscInt          n,i;
2703ed1c418cSBarry Smith 
2704ed1c418cSBarry Smith   PetscFunctionBegin;
2705ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27063649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2707ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2708ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2709ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2710ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2711ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2712ed1c418cSBarry Smith     alpha1 = x[18*i];
2713ed1c418cSBarry Smith     alpha2 = x[18*i+1];
2714ed1c418cSBarry Smith     alpha3 = x[18*i+2];
2715ed1c418cSBarry Smith     alpha4 = x[18*i+3];
2716ed1c418cSBarry Smith     alpha5 = x[18*i+4];
2717ed1c418cSBarry Smith     alpha6 = x[18*i+5];
2718ed1c418cSBarry Smith     alpha7 = x[18*i+6];
2719ed1c418cSBarry Smith     alpha8 = x[18*i+7];
2720ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2721ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2722ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2723ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2724ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2725ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2726ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2727ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2728ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2729ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2730ed1c418cSBarry Smith     while (n-->0) {
2731ed1c418cSBarry Smith       y[18*(*idx)]   += alpha1*(*v);
2732ed1c418cSBarry Smith       y[18*(*idx)+1] += alpha2*(*v);
2733ed1c418cSBarry Smith       y[18*(*idx)+2] += alpha3*(*v);
2734ed1c418cSBarry Smith       y[18*(*idx)+3] += alpha4*(*v);
2735ed1c418cSBarry Smith       y[18*(*idx)+4] += alpha5*(*v);
2736ed1c418cSBarry Smith       y[18*(*idx)+5] += alpha6*(*v);
2737ed1c418cSBarry Smith       y[18*(*idx)+6] += alpha7*(*v);
2738ed1c418cSBarry Smith       y[18*(*idx)+7] += alpha8*(*v);
2739ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2740ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2741ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2742ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2743ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2744ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2745ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2746ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2747ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2748ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2749ed1c418cSBarry Smith       idx++; v++;
2750ed1c418cSBarry Smith     }
2751ed1c418cSBarry Smith   }
2752dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
27533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2754ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2755ed1c418cSBarry Smith   PetscFunctionReturn(0);
2756ed1c418cSBarry Smith }
2757ed1c418cSBarry Smith 
27586861f193SBarry Smith #undef __FUNCT__
27596861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N"
27606861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27616861f193SBarry Smith {
27626861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27636861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27646861f193SBarry Smith   const PetscScalar *x,*v;
27656861f193SBarry Smith   PetscScalar       *y,*sums;
27666861f193SBarry Smith   PetscErrorCode    ierr;
27676861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27686861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27696861f193SBarry Smith 
27706861f193SBarry Smith   PetscFunctionBegin;
27713649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27726861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27736861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27746861f193SBarry Smith   idx  = a->j;
27756861f193SBarry Smith   v    = a->a;
27766861f193SBarry Smith   ii   = a->i;
27776861f193SBarry Smith 
27786861f193SBarry Smith   for (i=0; i<m; i++) {
27796861f193SBarry Smith     jrow = ii[i];
27806861f193SBarry Smith     n    = ii[i+1] - jrow;
27816861f193SBarry Smith     sums = y + dof*i;
27826861f193SBarry Smith     for (j=0; j<n; j++) {
27836861f193SBarry Smith       for (k=0; k<dof; k++) {
27846861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
27856861f193SBarry Smith       }
27866861f193SBarry Smith       jrow++;
27876861f193SBarry Smith     }
27886861f193SBarry Smith   }
27896861f193SBarry Smith 
27906861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27913649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27926861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27936861f193SBarry Smith   PetscFunctionReturn(0);
27946861f193SBarry Smith }
27956861f193SBarry Smith 
27966861f193SBarry Smith #undef __FUNCT__
27976861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
27986861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27996861f193SBarry Smith {
28006861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28016861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28026861f193SBarry Smith   const PetscScalar *x,*v;
28036861f193SBarry Smith   PetscScalar       *y,*sums;
28046861f193SBarry Smith   PetscErrorCode    ierr;
28056861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28066861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
28076861f193SBarry Smith 
28086861f193SBarry Smith   PetscFunctionBegin;
28096861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28103649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28116861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28126861f193SBarry Smith   idx  = a->j;
28136861f193SBarry Smith   v    = a->a;
28146861f193SBarry Smith   ii   = a->i;
28156861f193SBarry Smith 
28166861f193SBarry Smith   for (i=0; i<m; i++) {
28176861f193SBarry Smith     jrow = ii[i];
28186861f193SBarry Smith     n    = ii[i+1] - jrow;
28196861f193SBarry Smith     sums = y + dof*i;
28206861f193SBarry Smith     for (j=0; j<n; j++) {
28216861f193SBarry Smith       for (k=0; k<dof; k++) {
28226861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
28236861f193SBarry Smith       }
28246861f193SBarry Smith       jrow++;
28256861f193SBarry Smith     }
28266861f193SBarry Smith   }
28276861f193SBarry Smith 
28286861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28306861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28316861f193SBarry Smith   PetscFunctionReturn(0);
28326861f193SBarry Smith }
28336861f193SBarry Smith 
28346861f193SBarry Smith #undef __FUNCT__
28356861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
28366861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
28376861f193SBarry Smith {
28386861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28396861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28406861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28416861f193SBarry Smith   PetscScalar       *y;
28426861f193SBarry Smith   PetscErrorCode    ierr;
28436861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28446861f193SBarry Smith   PetscInt          n,i,k;
28456861f193SBarry Smith 
28466861f193SBarry Smith   PetscFunctionBegin;
28473649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28486861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
28496861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
28506861f193SBarry Smith   for (i=0; i<m; i++) {
28516861f193SBarry Smith     idx    = a->j + a->i[i] ;
28526861f193SBarry Smith     v      = a->a + a->i[i] ;
28536861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28546861f193SBarry Smith     alpha  = x + dof*i;
28556861f193SBarry Smith     while (n-->0) {
28566861f193SBarry Smith       for (k=0; k<dof; k++) {
28576861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28586861f193SBarry Smith       }
285983ce7366SBarry Smith       idx++; v++;
28606861f193SBarry Smith     }
28616861f193SBarry Smith   }
28626861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28646861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28656861f193SBarry Smith   PetscFunctionReturn(0);
28666861f193SBarry Smith }
28676861f193SBarry Smith 
28686861f193SBarry Smith #undef __FUNCT__
28696861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
28706861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28716861f193SBarry Smith {
28726861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28736861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28746861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28756861f193SBarry Smith   PetscScalar       *y;
28766861f193SBarry Smith   PetscErrorCode    ierr;
28776861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28786861f193SBarry Smith   PetscInt          n,i,k;
28796861f193SBarry Smith 
28806861f193SBarry Smith   PetscFunctionBegin;
28816861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28836861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28846861f193SBarry Smith   for (i=0; i<m; i++) {
28856861f193SBarry Smith     idx    = a->j + a->i[i] ;
28866861f193SBarry Smith     v      = a->a + a->i[i] ;
28876861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28886861f193SBarry Smith     alpha  = x + dof*i;
28896861f193SBarry Smith     while (n-->0) {
28906861f193SBarry Smith       for (k=0; k<dof; k++) {
28916861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28926861f193SBarry Smith       }
289383ce7366SBarry Smith       idx++; v++;
28946861f193SBarry Smith     }
28956861f193SBarry Smith   }
28966861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28973649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28986861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28996861f193SBarry Smith   PetscFunctionReturn(0);
29006861f193SBarry Smith }
29016861f193SBarry Smith 
2902f4a53059SBarry Smith /*===================================================================================*/
29034a2ae208SSatish Balay #undef __FUNCT__
29044a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2905dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2906f4a53059SBarry Smith {
29074cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2908dfbe8321SBarry Smith   PetscErrorCode ierr;
2909f4a53059SBarry Smith 
2910b24ad042SBarry Smith   PetscFunctionBegin;
2911f4a53059SBarry Smith   /* start the scatter */
2912ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29134cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2914ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29154cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2916f4a53059SBarry Smith   PetscFunctionReturn(0);
2917f4a53059SBarry Smith }
2918f4a53059SBarry Smith 
29194a2ae208SSatish Balay #undef __FUNCT__
29204a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2921dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
29224cb9d9b8SBarry Smith {
29234cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2924dfbe8321SBarry Smith   PetscErrorCode ierr;
2925b24ad042SBarry Smith 
29264cb9d9b8SBarry Smith   PetscFunctionBegin;
29274cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
29284cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2929ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2930ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29314cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29324cb9d9b8SBarry Smith }
29334cb9d9b8SBarry Smith 
29344a2ae208SSatish Balay #undef __FUNCT__
29354a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2936dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29374cb9d9b8SBarry Smith {
29384cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2939dfbe8321SBarry Smith   PetscErrorCode ierr;
29404cb9d9b8SBarry Smith 
2941b24ad042SBarry Smith   PetscFunctionBegin;
29424cb9d9b8SBarry Smith   /* start the scatter */
2943ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2944d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2945ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2946717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
29474cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29484cb9d9b8SBarry Smith }
29494cb9d9b8SBarry Smith 
29504a2ae208SSatish Balay #undef __FUNCT__
29514a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2952dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29534cb9d9b8SBarry Smith {
29544cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2955dfbe8321SBarry Smith   PetscErrorCode ierr;
2956b24ad042SBarry Smith 
29574cb9d9b8SBarry Smith   PetscFunctionBegin;
29584cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2959ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2960d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2961ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29624cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29634cb9d9b8SBarry Smith }
29644cb9d9b8SBarry Smith 
296595ddefa2SHong Zhang /* ----------------------------------------------------------------*/
29669c4fc4c7SBarry Smith #undef __FUNCT__
29677ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
29687ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
29697ba1a0bfSKris Buschelman {
29707ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2971a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
29727ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
29737ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
29747ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2975d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29767ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2977d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2978d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29797ba1a0bfSKris Buschelman   MatScalar          *ca;
2980d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29817ba1a0bfSKris Buschelman 
29827ba1a0bfSKris Buschelman   PetscFunctionBegin;
29837ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29847ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29857ba1a0bfSKris Buschelman 
29867ba1a0bfSKris Buschelman   cn = pn*ppdof;
29877ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29887ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
29897ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
29907ba1a0bfSKris Buschelman   ci[0] = 0;
29917ba1a0bfSKris Buschelman 
29927ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
299374ed9c26SBarry Smith   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
2994c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2995c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
29967ba1a0bfSKris Buschelman 
29977ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
29987ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
29997ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3000a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
30017ba1a0bfSKris Buschelman   current_space = free_space;
30027ba1a0bfSKris Buschelman 
30037ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
30047ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
30057ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
30067ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
30077ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
30087ba1a0bfSKris Buschelman       ptanzi = 0;
30097ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
30107ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
30117ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
30127ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
30137ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
30147ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
30157ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
30167ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
30177ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
30187ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
30197ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
30207ba1a0bfSKris Buschelman           }
30217ba1a0bfSKris Buschelman         }
30227ba1a0bfSKris Buschelman       }
30237ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
30247ba1a0bfSKris Buschelman       ptaj = ptasparserow;
30257ba1a0bfSKris Buschelman       cnzi   = 0;
30267ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30277ba1a0bfSKris Buschelman         /* Get offset within block of P */
30287ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
30297ba1a0bfSKris Buschelman         /* Get block row of P */
30307ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
30317ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
30327ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
30337ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
30347ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
30357ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
30367ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30377ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30387ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30397ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30407ba1a0bfSKris Buschelman           }
30417ba1a0bfSKris Buschelman         }
30427ba1a0bfSKris Buschelman       }
30437ba1a0bfSKris Buschelman 
30447ba1a0bfSKris Buschelman       /* sort sparserow */
30457ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30467ba1a0bfSKris Buschelman 
30477ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30487ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30497ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
30504238b7adSHong Zhang         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
30517ba1a0bfSKris Buschelman       }
30527ba1a0bfSKris Buschelman 
30537ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
30547ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
30557ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30567ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30577ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30587ba1a0bfSKris Buschelman 
30597ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30607ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
30617ba1a0bfSKris Buschelman       }
30627ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
30637ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
30647ba1a0bfSKris Buschelman       }
30657ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30667ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30677ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30687ba1a0bfSKris Buschelman     }
30697ba1a0bfSKris Buschelman   }
30707ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30717ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30727ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
30737ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3074a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
307574ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30767ba1a0bfSKris Buschelman 
30777ba1a0bfSKris Buschelman   /* Allocate space for ca */
30787ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30797ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30807ba1a0bfSKris Buschelman 
30817ba1a0bfSKris Buschelman   /* put together the new matrix */
30827adad957SLisandro Dalcin   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
30837ba1a0bfSKris Buschelman 
30847ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30857ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30867ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ *)((*C)->data);
3087e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3088e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30897ba1a0bfSKris Buschelman   c->nonew   = 0;
30902121bac1SHong Zhang   (*C)->ops->ptap = MatPtAP_SeqAIJ_SeqMAIJ;
30917ba1a0bfSKris Buschelman 
30927ba1a0bfSKris Buschelman   /* Clean up. */
30937ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
30947ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
30957ba1a0bfSKris Buschelman }
30967ba1a0bfSKris Buschelman 
30977ba1a0bfSKris Buschelman #undef __FUNCT__
30987ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
30997ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
31007ba1a0bfSKris Buschelman {
31017ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
31027ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
31037ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
31047ba1a0bfSKris Buschelman   Mat             P=pp->AIJ;
31057ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
31067ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
31077ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3108d2840e09SBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3109d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3110d2840e09SBarry Smith   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3111d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3112d2840e09SBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3113d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
31147ba1a0bfSKris Buschelman 
31157ba1a0bfSKris Buschelman   PetscFunctionBegin;
31167ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
311774ed9c26SBarry Smith   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
311874ed9c26SBarry Smith   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
311974ed9c26SBarry Smith   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
312074ed9c26SBarry Smith   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
31217ba1a0bfSKris Buschelman 
31227ba1a0bfSKris Buschelman   /* Clear old values in C */
31237ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
31247ba1a0bfSKris Buschelman 
31257ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
31267ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
31277ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
31287ba1a0bfSKris Buschelman     apnzj = 0;
31297ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
31307ba1a0bfSKris Buschelman       /* Get offset within block of P */
31317ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
31327ba1a0bfSKris Buschelman       /* Get block row of P */
31337ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
31347ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
31357ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
31367ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
31377ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
31387ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
31397ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
31407ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
31417ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31427ba1a0bfSKris Buschelman         }
31437ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31447ba1a0bfSKris Buschelman       }
3145dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31467ba1a0bfSKris Buschelman       aa++;
31477ba1a0bfSKris Buschelman     }
31487ba1a0bfSKris Buschelman 
31497ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31507ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31517ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31527ba1a0bfSKris Buschelman 
31537ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31547ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31557ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31567ba1a0bfSKris Buschelman     poffset = pi[prow];
31577ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
31587ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31597ba1a0bfSKris Buschelman     pJ   = pj+poffset;
31607ba1a0bfSKris Buschelman     pA   = pa+poffset;
31617ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
31627ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
31637ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
31647ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
31657ba1a0bfSKris Buschelman       pJ++;
31667ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31677ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
31687ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
31697ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
31707ba1a0bfSKris Buschelman         }
31717ba1a0bfSKris Buschelman       }
3172dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31737ba1a0bfSKris Buschelman       pA++;
31747ba1a0bfSKris Buschelman     }
31757ba1a0bfSKris Buschelman 
31767ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31777ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
31787ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31797ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31807ba1a0bfSKris Buschelman     }
31817ba1a0bfSKris Buschelman   }
31827ba1a0bfSKris Buschelman 
31837ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31847ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31857ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
318674ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
318795ddefa2SHong Zhang   PetscFunctionReturn(0);
318895ddefa2SHong Zhang }
31897ba1a0bfSKris Buschelman 
31902121bac1SHong Zhang #undef __FUNCT__
31912121bac1SHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ"
3192*15221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
31932121bac1SHong Zhang {
31942121bac1SHong Zhang   PetscErrorCode ierr;
31952121bac1SHong Zhang 
31962121bac1SHong Zhang   PetscFunctionBegin;
31972121bac1SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
31982121bac1SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
31992121bac1SHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
32002121bac1SHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
32012121bac1SHong Zhang   }
32022121bac1SHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
32032121bac1SHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
32042121bac1SHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
32052121bac1SHong Zhang   PetscFunctionReturn(0);
32062121bac1SHong Zhang }
32072121bac1SHong Zhang 
320895ddefa2SHong Zhang #undef __FUNCT__
320995ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3210*15221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
321195ddefa2SHong Zhang {
321295ddefa2SHong Zhang   PetscErrorCode    ierr;
321395ddefa2SHong Zhang 
321495ddefa2SHong Zhang   PetscFunctionBegin;
321595ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
321695ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
321795ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
321895ddefa2SHong Zhang   PetscFunctionReturn(0);
321995ddefa2SHong Zhang }
322095ddefa2SHong Zhang 
322195ddefa2SHong Zhang #undef __FUNCT__
322295ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3223*15221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
322495ddefa2SHong Zhang {
322595ddefa2SHong Zhang   PetscFunctionBegin;
3226e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
32277ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
32287ba1a0bfSKris Buschelman }
32297ba1a0bfSKris Buschelman 
32307ba1a0bfSKris Buschelman #undef __FUNCT__
32310fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3232*15221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32339c4fc4c7SBarry Smith {
32349c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3235ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
32369c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
32379c4fc4c7SBarry Smith   PetscErrorCode    ierr;
32380fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3239ba8c8a56SBarry Smith   PetscInt          *cols;
3240ba8c8a56SBarry Smith   PetscScalar       *vals;
32419c4fc4c7SBarry Smith 
32429c4fc4c7SBarry Smith   PetscFunctionBegin;
32439c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
32447c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
32459c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32469c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
32470fd73130SBarry Smith     for (j=0; j<dof; j++) {
32480fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
32499c4fc4c7SBarry Smith     }
32509c4fc4c7SBarry Smith   }
3251ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
32529c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
32539c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
32549c4fc4c7SBarry Smith   ii   = 0;
32559c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3256ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32570fd73130SBarry Smith     for (j=0; j<dof; j++) {
32589c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
32590fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
32609c4fc4c7SBarry Smith       }
3261ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32629c4fc4c7SBarry Smith       ii++;
32639c4fc4c7SBarry Smith     }
3264ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32659c4fc4c7SBarry Smith   }
32669c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3267ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3268ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3269ceb03754SKris Buschelman 
3270ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
32718ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3272c3d102feSKris Buschelman   } else {
3273ceb03754SKris Buschelman     *newmat = B;
3274c3d102feSKris Buschelman   }
32759c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32769c4fc4c7SBarry Smith }
32779c4fc4c7SBarry Smith 
3278c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3279be1d678aSKris Buschelman 
32800fd73130SBarry Smith #undef __FUNCT__
32810fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3282*15221df2SJed Brown PETSC_EXTERN_C PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32830fd73130SBarry Smith {
32840fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3285ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32860fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32870fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
32880fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
32890fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3290910ba992SMatthew Knepley   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3291910ba992SMatthew Knepley   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
32920fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
32930fd73130SBarry Smith   PetscErrorCode    ierr;
32940fd73130SBarry Smith   PetscScalar       *vals,*ovals;
32950fd73130SBarry Smith 
32960fd73130SBarry Smith   PetscFunctionBegin;
3297d0f46423SBarry Smith   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3298d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32990fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
33000fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
33010fd73130SBarry Smith     for (j=0; j<dof; j++) {
33020fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
33030fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
33040fd73130SBarry Smith     }
33050fd73130SBarry Smith   }
330669b1f4b7SBarry Smith   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
33070fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
33080fd73130SBarry Smith 
33097a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3310d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3311d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33120fd73130SBarry Smith   garray = mpiaij->garray;
33130fd73130SBarry Smith 
33140fd73130SBarry Smith   ii = rstart;
3315d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33160fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33170fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33180fd73130SBarry Smith     for (j=0; j<dof; j++) {
33190fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33200fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33210fd73130SBarry Smith       }
33220fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33230fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33240fd73130SBarry Smith       }
3325ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3326ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33270fd73130SBarry Smith       ii++;
33280fd73130SBarry Smith     }
33290fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33300fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33310fd73130SBarry Smith   }
33320fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33330fd73130SBarry Smith 
3334ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3335ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3336ceb03754SKris Buschelman 
3337ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
33387adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33397adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
33408ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
33417adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3342c3d102feSKris Buschelman   } else {
3343ceb03754SKris Buschelman     *newmat = B;
3344c3d102feSKris Buschelman   }
33450fd73130SBarry Smith   PetscFunctionReturn(0);
33460fd73130SBarry Smith }
33470fd73130SBarry Smith 
33489e516c8fSBarry Smith #undef __FUNCT__
33499e516c8fSBarry Smith #define __FUNCT__ "MatGetSubMatrix_MAIJ"
33509e516c8fSBarry Smith PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
33519e516c8fSBarry Smith {
33529e516c8fSBarry Smith   PetscErrorCode ierr;
33539e516c8fSBarry Smith   Mat            A;
33549e516c8fSBarry Smith 
33559e516c8fSBarry Smith   PetscFunctionBegin;
33569e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
33579e516c8fSBarry Smith   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
33589e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
33599e516c8fSBarry Smith   PetscFunctionReturn(0);
33609e516c8fSBarry Smith }
33610fd73130SBarry Smith 
3362bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3363ff585edeSJed Brown #undef __FUNCT__
3364ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ"
3365ff585edeSJed Brown /*@C
33660bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33670bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33680bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33690bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33700bad9183SKris Buschelman 
3371ff585edeSJed Brown   Collective
3372ff585edeSJed Brown 
3373ff585edeSJed Brown   Input Parameters:
3374ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3375ff585edeSJed Brown - dof - the block size (number of components per node)
3376ff585edeSJed Brown 
3377ff585edeSJed Brown   Output Parameter:
3378ff585edeSJed Brown . maij - the new MAIJ matrix
3379ff585edeSJed Brown 
33800bad9183SKris Buschelman   Operations provided:
33810fd73130SBarry Smith + MatMult
33820bad9183SKris Buschelman . MatMultTranspose
33830bad9183SKris Buschelman . MatMultAdd
33840bad9183SKris Buschelman . MatMultTransposeAdd
33850fd73130SBarry Smith - MatView
33860bad9183SKris Buschelman 
33870bad9183SKris Buschelman   Level: advanced
33880bad9183SKris Buschelman 
3389b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3390ff585edeSJed Brown @*/
33917087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
339282b1193eSBarry Smith {
3393dfbe8321SBarry Smith   PetscErrorCode ierr;
3394b24ad042SBarry Smith   PetscMPIInt    size;
3395b24ad042SBarry Smith   PetscInt       n;
339682b1193eSBarry Smith   Mat            B;
339782b1193eSBarry Smith 
339882b1193eSBarry Smith   PetscFunctionBegin;
3399d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3400d72c5c08SBarry Smith 
3401d72c5c08SBarry Smith   if (dof == 1) {
3402d72c5c08SBarry Smith     *maij = A;
3403d72c5c08SBarry Smith   } else {
34047adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3405d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3406bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3407bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3408bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3409bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3410cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
3411d72c5c08SBarry Smith 
34127adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3413f4a53059SBarry Smith     if (size == 1) {
3414feb9fe23SJed Brown       Mat_SeqMAIJ    *b;
3415feb9fe23SJed Brown 
3416b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3417356c569eSBarry Smith       B->ops->setup   = PETSC_NULL;
34184cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
34190fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3420feb9fe23SJed Brown       b      = (Mat_SeqMAIJ*)B->data;
3421b9b97703SBarry Smith       b->dof = dof;
34224cb9d9b8SBarry Smith       b->AIJ = A;
3423d72c5c08SBarry Smith       if (dof == 2) {
3424cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3425cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3426cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3427cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3428bcc973b7SBarry Smith       } else if (dof == 3) {
3429bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3430bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3431bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3432bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3433bcc973b7SBarry Smith       } else if (dof == 4) {
3434bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3435bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3436bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3437bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3438f9fae5adSBarry Smith       } else if (dof == 5) {
3439f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3440f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3441f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3442f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34436cd98798SBarry Smith       } else if (dof == 6) {
34446cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34456cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34466cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34476cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3448ed8eea03SBarry Smith       } else if (dof == 7) {
3449ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3450ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3451ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3452ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
345366ed3db0SBarry Smith       } else if (dof == 8) {
345466ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
345566ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
345666ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
345766ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34582b692628SMatthew Knepley       } else if (dof == 9) {
34592b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34602b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34612b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34622b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3463b9cda22cSBarry Smith       } else if (dof == 10) {
3464b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3465b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3466b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3467b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3468dbdd7285SBarry Smith       } else if (dof == 11) {
3469dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3470dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3471dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3472dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34732f7816d4SBarry Smith       } else if (dof == 16) {
34742f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34752f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34762f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34772f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3478ed1c418cSBarry Smith       } else if (dof == 18) {
3479ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3480ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3481ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3482ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
348382b1193eSBarry Smith       } else {
34846861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34856861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34866861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34876861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
348882b1193eSBarry Smith       }
34899c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
34902121bac1SHong Zhang       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_seqaij_seqmaij_C","MatPtAP_SeqAIJ_SeqMAIJ",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3491f4a53059SBarry Smith     } else {
3492f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3493feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3494f4a53059SBarry Smith       IS          from,to;
3495f4a53059SBarry Smith       Vec         gvec;
3496f4a53059SBarry Smith 
3497b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3498356c569eSBarry Smith       B->ops->setup   = PETSC_NULL;
3499d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
35000fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
3501b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3502b9b97703SBarry Smith       b->dof = dof;
3503b9b97703SBarry Smith       b->A   = A;
35044cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
35054cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
35064cb9d9b8SBarry Smith 
3507f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3508a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3509a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
351013288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3511a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3512f4a53059SBarry Smith 
3513f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3514deff0451SBarry Smith       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3515f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3516f4a53059SBarry Smith 
3517f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3518778a2246SBarry Smith       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3519f4a53059SBarry Smith 
3520f4a53059SBarry Smith       /* generate the scatter context */
3521f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3522f4a53059SBarry Smith 
35236bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
35246bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
35256bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3526f4a53059SBarry Smith 
3527f4a53059SBarry Smith       B->ops->mult                = MatMult_MPIMAIJ_dof;
35284cb9d9b8SBarry Smith       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
35294cb9d9b8SBarry Smith       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
35304cb9d9b8SBarry Smith       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
35310fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3532f4a53059SBarry Smith     }
35339e516c8fSBarry Smith     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
35344994cf47SJed Brown     ierr = MatSetUp(B);CHKERRQ(ierr);
3535cd3bbe55SBarry Smith     *maij = B;
35360fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
3537d72c5c08SBarry Smith   }
353882b1193eSBarry Smith   PetscFunctionReturn(0);
353982b1193eSBarry Smith }
3540