xref: /petsc/src/mat/impls/maij/maij.c (revision ed1c418c74a84dfffa7cb7499f3869e15faa8f53)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
382b1193eSBarry Smith /*
4cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
5cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
6cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
7cd3bbe55SBarry Smith   independently.
8cd3bbe55SBarry Smith 
9cd3bbe55SBarry Smith      We provide:
10cd3bbe55SBarry Smith          MatMult()
11cd3bbe55SBarry Smith          MatMultTranspose()
12cd3bbe55SBarry Smith          MatMultTransposeAdd()
13cd3bbe55SBarry Smith          MatMultAdd()
14cd3bbe55SBarry Smith           and
15cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
16f4a53059SBarry Smith 
17f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1882b1193eSBarry Smith */
1982b1193eSBarry Smith 
20be911618SKris Buschelman #include "src/mat/impls/maij/maij.h"
217ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h"
221d8d5f9aSSatish Balay #include "private/vecimpl.h"
2382b1193eSBarry Smith 
244a2ae208SSatish Balay #undef __FUNCT__
254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
26be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27b9b97703SBarry Smith {
28dfbe8321SBarry Smith   PetscErrorCode ierr;
29b9b97703SBarry Smith   PetscTruth     ismpimaij,isseqmaij;
30b9b97703SBarry Smith 
31b9b97703SBarry Smith   PetscFunctionBegin;
32b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34b9b97703SBarry Smith   if (ismpimaij) {
35b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36b9b97703SBarry Smith 
37b9b97703SBarry Smith     *B = b->A;
38b9b97703SBarry Smith   } else if (isseqmaij) {
39b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40b9b97703SBarry Smith 
41b9b97703SBarry Smith     *B = b->AIJ;
42b9b97703SBarry Smith   } else {
43b9b97703SBarry Smith     *B = A;
44b9b97703SBarry Smith   }
45b9b97703SBarry Smith   PetscFunctionReturn(0);
46b9b97703SBarry Smith }
47b9b97703SBarry Smith 
484a2ae208SSatish Balay #undef __FUNCT__
494a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
50be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51b9b97703SBarry Smith {
52dfbe8321SBarry Smith   PetscErrorCode ierr;
53b9b97703SBarry Smith   Mat            Aij;
54b9b97703SBarry Smith 
55b9b97703SBarry Smith   PetscFunctionBegin;
56b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58b9b97703SBarry Smith   PetscFunctionReturn(0);
59b9b97703SBarry Smith }
60b9b97703SBarry Smith 
614a2ae208SSatish Balay #undef __FUNCT__
624a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
63dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
6482b1193eSBarry Smith {
65dfbe8321SBarry Smith   PetscErrorCode ierr;
664cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
6782b1193eSBarry Smith 
6882b1193eSBarry Smith   PetscFunctionBegin;
69cd3bbe55SBarry Smith   if (b->AIJ) {
70cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
7182b1193eSBarry Smith   }
724cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
734cb9d9b8SBarry Smith   PetscFunctionReturn(0);
744cb9d9b8SBarry Smith }
754cb9d9b8SBarry Smith 
764a2ae208SSatish Balay #undef __FUNCT__
770fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
780fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
790fd73130SBarry Smith {
800fd73130SBarry Smith   PetscErrorCode ierr;
810fd73130SBarry Smith   Mat            B;
820fd73130SBarry Smith 
830fd73130SBarry Smith   PetscFunctionBegin;
84ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
850fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
860fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
870fd73130SBarry Smith   PetscFunctionReturn(0);
880fd73130SBarry Smith }
890fd73130SBarry Smith 
900fd73130SBarry Smith #undef __FUNCT__
910fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
920fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
930fd73130SBarry Smith {
940fd73130SBarry Smith   PetscErrorCode ierr;
950fd73130SBarry Smith   Mat            B;
960fd73130SBarry Smith 
970fd73130SBarry Smith   PetscFunctionBegin;
98ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
990fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1000fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
1010fd73130SBarry Smith   PetscFunctionReturn(0);
1020fd73130SBarry Smith }
1030fd73130SBarry Smith 
1040fd73130SBarry Smith #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
106dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1074cb9d9b8SBarry Smith {
108dfbe8321SBarry Smith   PetscErrorCode ierr;
1094cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1104cb9d9b8SBarry Smith 
1114cb9d9b8SBarry Smith   PetscFunctionBegin;
1124cb9d9b8SBarry Smith   if (b->AIJ) {
1134cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
1144cb9d9b8SBarry Smith   }
115f4a53059SBarry Smith   if (b->OAIJ) {
116f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117f4a53059SBarry Smith   }
118b9b97703SBarry Smith   if (b->A) {
119b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120b9b97703SBarry Smith   }
121f4a53059SBarry Smith   if (b->ctx) {
122f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123f4a53059SBarry Smith   }
124f4a53059SBarry Smith   if (b->w) {
125f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126f4a53059SBarry Smith   }
127cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
128dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
129b9b97703SBarry Smith   PetscFunctionReturn(0);
130b9b97703SBarry Smith }
131b9b97703SBarry Smith 
1320bad9183SKris Buschelman /*MC
133fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1340bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1350bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1360bad9183SKris Buschelman 
1370bad9183SKris Buschelman   Operations provided:
1380bad9183SKris Buschelman . MatMult
1390bad9183SKris Buschelman . MatMultTranspose
1400bad9183SKris Buschelman . MatMultAdd
1410bad9183SKris Buschelman . MatMultTransposeAdd
1420bad9183SKris Buschelman 
1430bad9183SKris Buschelman   Level: advanced
1440bad9183SKris Buschelman 
1450bad9183SKris Buschelman .seealso: MatCreateSeqDense
1460bad9183SKris Buschelman M*/
1470bad9183SKris Buschelman 
14882b1193eSBarry Smith EXTERN_C_BEGIN
1494a2ae208SSatish Balay #undef __FUNCT__
1504a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
151be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
15282b1193eSBarry Smith {
153dfbe8321SBarry Smith   PetscErrorCode ierr;
1544cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
15564b52464SHong Zhang   PetscMPIInt    size;
15682b1193eSBarry Smith 
15782b1193eSBarry Smith   PetscFunctionBegin;
15838f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
159b0a32e0cSBarry Smith   A->data  = (void*)b;
160cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
161cd3bbe55SBarry Smith   A->factor           = 0;
162cd3bbe55SBarry Smith   A->mapping          = 0;
163f4a53059SBarry Smith 
164cd3bbe55SBarry Smith   b->AIJ  = 0;
165cd3bbe55SBarry Smith   b->dof  = 0;
166f4a53059SBarry Smith   b->OAIJ = 0;
167f4a53059SBarry Smith   b->ctx  = 0;
168f4a53059SBarry Smith   b->w    = 0;
1697adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
17064b52464SHong Zhang   if (size == 1){
17164b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
17264b52464SHong Zhang   } else {
17364b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
17464b52464SHong Zhang   }
17582b1193eSBarry Smith   PetscFunctionReturn(0);
17682b1193eSBarry Smith }
17782b1193eSBarry Smith EXTERN_C_END
17882b1193eSBarry Smith 
179cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1804a2ae208SSatish Balay #undef __FUNCT__
181b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
182dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
18382b1193eSBarry Smith {
1844cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
18687828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
187dfbe8321SBarry Smith   PetscErrorCode ierr;
188b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
189b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
19082b1193eSBarry Smith 
191bcc973b7SBarry Smith   PetscFunctionBegin;
1921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1931ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
194bcc973b7SBarry Smith   idx  = a->j;
195bcc973b7SBarry Smith   v    = a->a;
196bcc973b7SBarry Smith   ii   = a->i;
197bcc973b7SBarry Smith 
198bcc973b7SBarry Smith   for (i=0; i<m; i++) {
199bcc973b7SBarry Smith     jrow = ii[i];
200bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
201bcc973b7SBarry Smith     sum1  = 0.0;
202bcc973b7SBarry Smith     sum2  = 0.0;
203b3c51c66SMatthew Knepley     nonzerorow += (n>0);
204bcc973b7SBarry Smith     for (j=0; j<n; j++) {
205bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
206bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
207bcc973b7SBarry Smith       jrow++;
208bcc973b7SBarry Smith      }
209bcc973b7SBarry Smith     y[2*i]   = sum1;
210bcc973b7SBarry Smith     y[2*i+1] = sum2;
211bcc973b7SBarry Smith   }
212bcc973b7SBarry Smith 
213b3c51c66SMatthew Knepley   ierr = PetscLogFlops(4*a->nz - 2*nonzerorow);CHKERRQ(ierr);
2141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2151ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
21682b1193eSBarry Smith   PetscFunctionReturn(0);
21782b1193eSBarry Smith }
218bcc973b7SBarry Smith 
2194a2ae208SSatish Balay #undef __FUNCT__
220b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
221dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
22282b1193eSBarry Smith {
2234cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
224bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
22587828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
226dfbe8321SBarry Smith   PetscErrorCode ierr;
227899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
22882b1193eSBarry Smith 
229bcc973b7SBarry Smith   PetscFunctionBegin;
2302dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
2311ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2321ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
233bfec09a0SHong Zhang 
234bcc973b7SBarry Smith   for (i=0; i<m; i++) {
235bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
236bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
237bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
238bcc973b7SBarry Smith     alpha1 = x[2*i];
239bcc973b7SBarry Smith     alpha2 = x[2*i+1];
240bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
241bcc973b7SBarry Smith   }
242b3c51c66SMatthew Knepley   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
2431ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2441ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
24582b1193eSBarry Smith   PetscFunctionReturn(0);
24682b1193eSBarry Smith }
247bcc973b7SBarry Smith 
2484a2ae208SSatish Balay #undef __FUNCT__
249b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
250dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
25182b1193eSBarry Smith {
2524cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
253bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
25487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
255dfbe8321SBarry Smith   PetscErrorCode ierr;
256899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
257b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
25882b1193eSBarry Smith 
259bcc973b7SBarry Smith   PetscFunctionBegin;
260f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2611ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2621ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
263bcc973b7SBarry Smith   idx  = a->j;
264bcc973b7SBarry Smith   v    = a->a;
265bcc973b7SBarry Smith   ii   = a->i;
266bcc973b7SBarry Smith 
267bcc973b7SBarry Smith   for (i=0; i<m; i++) {
268bcc973b7SBarry Smith     jrow = ii[i];
269bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
270bcc973b7SBarry Smith     sum1  = 0.0;
271bcc973b7SBarry Smith     sum2  = 0.0;
272bcc973b7SBarry Smith     for (j=0; j<n; j++) {
273bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
274bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
275bcc973b7SBarry Smith       jrow++;
276bcc973b7SBarry Smith      }
277bcc973b7SBarry Smith     y[2*i]   += sum1;
278bcc973b7SBarry Smith     y[2*i+1] += sum2;
279bcc973b7SBarry Smith   }
280bcc973b7SBarry Smith 
281b3c51c66SMatthew Knepley   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
2821ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2831ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
284bcc973b7SBarry Smith   PetscFunctionReturn(0);
28582b1193eSBarry Smith }
2864a2ae208SSatish Balay #undef __FUNCT__
287b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
288dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
28982b1193eSBarry Smith {
2904cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
291bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
29287828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2;
293dfbe8321SBarry Smith   PetscErrorCode ierr;
294899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
29582b1193eSBarry Smith 
296bcc973b7SBarry Smith   PetscFunctionBegin;
297f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
300bfec09a0SHong Zhang 
301bcc973b7SBarry Smith   for (i=0; i<m; i++) {
302bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
303bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
304bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
305bcc973b7SBarry Smith     alpha1 = x[2*i];
306bcc973b7SBarry Smith     alpha2 = x[2*i+1];
307bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
308bcc973b7SBarry Smith   }
309b3c51c66SMatthew Knepley   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
3101ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3111ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
312bcc973b7SBarry Smith   PetscFunctionReturn(0);
31382b1193eSBarry Smith }
314cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3154a2ae208SSatish Balay #undef __FUNCT__
316b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
317dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
318bcc973b7SBarry Smith {
3194cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
320bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
32187828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
322dfbe8321SBarry Smith   PetscErrorCode ierr;
323b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
324b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
32582b1193eSBarry Smith 
326bcc973b7SBarry Smith   PetscFunctionBegin;
3271ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3281ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
329bcc973b7SBarry Smith   idx  = a->j;
330bcc973b7SBarry Smith   v    = a->a;
331bcc973b7SBarry Smith   ii   = a->i;
332bcc973b7SBarry Smith 
333bcc973b7SBarry Smith   for (i=0; i<m; i++) {
334bcc973b7SBarry Smith     jrow = ii[i];
335bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
336bcc973b7SBarry Smith     sum1  = 0.0;
337bcc973b7SBarry Smith     sum2  = 0.0;
338bcc973b7SBarry Smith     sum3  = 0.0;
339b3c51c66SMatthew Knepley     nonzerorow += (n>0);
340bcc973b7SBarry Smith     for (j=0; j<n; j++) {
341bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
342bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
343bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
344bcc973b7SBarry Smith       jrow++;
345bcc973b7SBarry Smith      }
346bcc973b7SBarry Smith     y[3*i]   = sum1;
347bcc973b7SBarry Smith     y[3*i+1] = sum2;
348bcc973b7SBarry Smith     y[3*i+2] = sum3;
349bcc973b7SBarry Smith   }
350bcc973b7SBarry Smith 
351b3c51c66SMatthew Knepley   ierr = PetscLogFlops(6*a->nz - 3*nonzerorow);CHKERRQ(ierr);
3521ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3531ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
354bcc973b7SBarry Smith   PetscFunctionReturn(0);
355bcc973b7SBarry Smith }
356bcc973b7SBarry Smith 
3574a2ae208SSatish Balay #undef __FUNCT__
358b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
359dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360bcc973b7SBarry Smith {
3614cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
362bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
36387828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
364dfbe8321SBarry Smith   PetscErrorCode ierr;
365899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
366bcc973b7SBarry Smith 
367bcc973b7SBarry Smith   PetscFunctionBegin;
3682dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
3691ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3701ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
371bfec09a0SHong Zhang 
372bcc973b7SBarry Smith   for (i=0; i<m; i++) {
373bfec09a0SHong Zhang     idx    = a->j + a->i[i];
374bfec09a0SHong Zhang     v      = a->a + a->i[i];
375bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
376bcc973b7SBarry Smith     alpha1 = x[3*i];
377bcc973b7SBarry Smith     alpha2 = x[3*i+1];
378bcc973b7SBarry Smith     alpha3 = x[3*i+2];
379bcc973b7SBarry Smith     while (n-->0) {
380bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
381bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
382bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
383bcc973b7SBarry Smith       idx++; v++;
384bcc973b7SBarry Smith     }
385bcc973b7SBarry Smith   }
386b3c51c66SMatthew Knepley   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
3871ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3881ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
389bcc973b7SBarry Smith   PetscFunctionReturn(0);
390bcc973b7SBarry Smith }
391bcc973b7SBarry Smith 
3924a2ae208SSatish Balay #undef __FUNCT__
393b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
394dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
395bcc973b7SBarry Smith {
3964cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
397bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
39887828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
399dfbe8321SBarry Smith   PetscErrorCode ierr;
400899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
401b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
402bcc973b7SBarry Smith 
403bcc973b7SBarry Smith   PetscFunctionBegin;
404f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4051ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4061ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
407bcc973b7SBarry Smith   idx  = a->j;
408bcc973b7SBarry Smith   v    = a->a;
409bcc973b7SBarry Smith   ii   = a->i;
410bcc973b7SBarry Smith 
411bcc973b7SBarry Smith   for (i=0; i<m; i++) {
412bcc973b7SBarry Smith     jrow = ii[i];
413bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
414bcc973b7SBarry Smith     sum1  = 0.0;
415bcc973b7SBarry Smith     sum2  = 0.0;
416bcc973b7SBarry Smith     sum3  = 0.0;
417bcc973b7SBarry Smith     for (j=0; j<n; j++) {
418bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
419bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
420bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
421bcc973b7SBarry Smith       jrow++;
422bcc973b7SBarry Smith      }
423bcc973b7SBarry Smith     y[3*i]   += sum1;
424bcc973b7SBarry Smith     y[3*i+1] += sum2;
425bcc973b7SBarry Smith     y[3*i+2] += sum3;
426bcc973b7SBarry Smith   }
427bcc973b7SBarry Smith 
428efee365bSSatish Balay   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
4291ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4301ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
431bcc973b7SBarry Smith   PetscFunctionReturn(0);
432bcc973b7SBarry Smith }
4334a2ae208SSatish Balay #undef __FUNCT__
434b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
435dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436bcc973b7SBarry Smith {
4374cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
438bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
43987828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
440dfbe8321SBarry Smith   PetscErrorCode ierr;
441899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
442bcc973b7SBarry Smith 
443bcc973b7SBarry Smith   PetscFunctionBegin;
444f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4451ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4461ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
447bcc973b7SBarry Smith   for (i=0; i<m; i++) {
448bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
449bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
450bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
451bcc973b7SBarry Smith     alpha1 = x[3*i];
452bcc973b7SBarry Smith     alpha2 = x[3*i+1];
453bcc973b7SBarry Smith     alpha3 = x[3*i+2];
454bcc973b7SBarry Smith     while (n-->0) {
455bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
456bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
457bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
458bcc973b7SBarry Smith       idx++; v++;
459bcc973b7SBarry Smith     }
460bcc973b7SBarry Smith   }
461efee365bSSatish Balay   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
4621ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4631ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
464bcc973b7SBarry Smith   PetscFunctionReturn(0);
465bcc973b7SBarry Smith }
466bcc973b7SBarry Smith 
467bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4684a2ae208SSatish Balay #undef __FUNCT__
469b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
470dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
471bcc973b7SBarry Smith {
4724cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
473bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
47487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
475dfbe8321SBarry Smith   PetscErrorCode ierr;
476b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
477b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
478bcc973b7SBarry Smith 
479bcc973b7SBarry Smith   PetscFunctionBegin;
4801ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4811ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
482bcc973b7SBarry Smith   idx  = a->j;
483bcc973b7SBarry Smith   v    = a->a;
484bcc973b7SBarry Smith   ii   = a->i;
485bcc973b7SBarry Smith 
486bcc973b7SBarry Smith   for (i=0; i<m; i++) {
487bcc973b7SBarry Smith     jrow = ii[i];
488bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
489bcc973b7SBarry Smith     sum1  = 0.0;
490bcc973b7SBarry Smith     sum2  = 0.0;
491bcc973b7SBarry Smith     sum3  = 0.0;
492bcc973b7SBarry Smith     sum4  = 0.0;
493b3c51c66SMatthew Knepley     nonzerorow += (n>0);
494bcc973b7SBarry Smith     for (j=0; j<n; j++) {
495bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
496bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
497bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
498bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
499bcc973b7SBarry Smith       jrow++;
500bcc973b7SBarry Smith      }
501bcc973b7SBarry Smith     y[4*i]   = sum1;
502bcc973b7SBarry Smith     y[4*i+1] = sum2;
503bcc973b7SBarry Smith     y[4*i+2] = sum3;
504bcc973b7SBarry Smith     y[4*i+3] = sum4;
505bcc973b7SBarry Smith   }
506bcc973b7SBarry Smith 
507b3c51c66SMatthew Knepley   ierr = PetscLogFlops(8*a->nz - 4*nonzerorow);CHKERRQ(ierr);
5081ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5091ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
510bcc973b7SBarry Smith   PetscFunctionReturn(0);
511bcc973b7SBarry Smith }
512bcc973b7SBarry Smith 
5134a2ae208SSatish Balay #undef __FUNCT__
514b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
515dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516bcc973b7SBarry Smith {
5174cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
518bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
51987828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
520dfbe8321SBarry Smith   PetscErrorCode ierr;
521899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
522bcc973b7SBarry Smith 
523bcc973b7SBarry Smith   PetscFunctionBegin;
5242dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
5251ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5261ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527bcc973b7SBarry Smith   for (i=0; i<m; i++) {
528bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
529bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
530bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
531bcc973b7SBarry Smith     alpha1 = x[4*i];
532bcc973b7SBarry Smith     alpha2 = x[4*i+1];
533bcc973b7SBarry Smith     alpha3 = x[4*i+2];
534bcc973b7SBarry Smith     alpha4 = x[4*i+3];
535bcc973b7SBarry Smith     while (n-->0) {
536bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
537bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
538bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
539bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
540bcc973b7SBarry Smith       idx++; v++;
541bcc973b7SBarry Smith     }
542bcc973b7SBarry Smith   }
543b3c51c66SMatthew Knepley   ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr);
5441ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5451ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
546bcc973b7SBarry Smith   PetscFunctionReturn(0);
547bcc973b7SBarry Smith }
548bcc973b7SBarry Smith 
5494a2ae208SSatish Balay #undef __FUNCT__
550b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
551dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
552bcc973b7SBarry Smith {
5534cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
554f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
55587828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
556dfbe8321SBarry Smith   PetscErrorCode ierr;
557899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
558b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
559f9fae5adSBarry Smith 
560f9fae5adSBarry Smith   PetscFunctionBegin;
561f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5621ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5631ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
564f9fae5adSBarry Smith   idx  = a->j;
565f9fae5adSBarry Smith   v    = a->a;
566f9fae5adSBarry Smith   ii   = a->i;
567f9fae5adSBarry Smith 
568f9fae5adSBarry Smith   for (i=0; i<m; i++) {
569f9fae5adSBarry Smith     jrow = ii[i];
570f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
571f9fae5adSBarry Smith     sum1  = 0.0;
572f9fae5adSBarry Smith     sum2  = 0.0;
573f9fae5adSBarry Smith     sum3  = 0.0;
574f9fae5adSBarry Smith     sum4  = 0.0;
575f9fae5adSBarry Smith     for (j=0; j<n; j++) {
576f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
577f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
578f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
579f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
580f9fae5adSBarry Smith       jrow++;
581f9fae5adSBarry Smith      }
582f9fae5adSBarry Smith     y[4*i]   += sum1;
583f9fae5adSBarry Smith     y[4*i+1] += sum2;
584f9fae5adSBarry Smith     y[4*i+2] += sum3;
585f9fae5adSBarry Smith     y[4*i+3] += sum4;
586f9fae5adSBarry Smith   }
587f9fae5adSBarry Smith 
588b3c51c66SMatthew Knepley   ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr);
5891ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5901ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
591f9fae5adSBarry Smith   PetscFunctionReturn(0);
592bcc973b7SBarry Smith }
5934a2ae208SSatish Balay #undef __FUNCT__
594b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
595dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596bcc973b7SBarry Smith {
5974cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
598f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
59987828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
600dfbe8321SBarry Smith   PetscErrorCode ierr;
601899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
602f9fae5adSBarry Smith 
603f9fae5adSBarry Smith   PetscFunctionBegin;
604f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6051ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6061ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
607bfec09a0SHong Zhang 
608f9fae5adSBarry Smith   for (i=0; i<m; i++) {
609bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
610bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
611f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
612f9fae5adSBarry Smith     alpha1 = x[4*i];
613f9fae5adSBarry Smith     alpha2 = x[4*i+1];
614f9fae5adSBarry Smith     alpha3 = x[4*i+2];
615f9fae5adSBarry Smith     alpha4 = x[4*i+3];
616f9fae5adSBarry Smith     while (n-->0) {
617f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
618f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
619f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
620f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
621f9fae5adSBarry Smith       idx++; v++;
622f9fae5adSBarry Smith     }
623f9fae5adSBarry Smith   }
624b3c51c66SMatthew Knepley   ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr);
6251ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6261ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
627f9fae5adSBarry Smith   PetscFunctionReturn(0);
628f9fae5adSBarry Smith }
629f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6306cd98798SBarry Smith 
6314a2ae208SSatish Balay #undef __FUNCT__
632b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
633dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634f9fae5adSBarry Smith {
6354cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
636f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
63787828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
638dfbe8321SBarry Smith   PetscErrorCode ierr;
639b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
640b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
641f9fae5adSBarry Smith 
642f9fae5adSBarry Smith   PetscFunctionBegin;
6431ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6441ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
645f9fae5adSBarry Smith   idx  = a->j;
646f9fae5adSBarry Smith   v    = a->a;
647f9fae5adSBarry Smith   ii   = a->i;
648f9fae5adSBarry Smith 
649f9fae5adSBarry Smith   for (i=0; i<m; i++) {
650f9fae5adSBarry Smith     jrow = ii[i];
651f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
652f9fae5adSBarry Smith     sum1  = 0.0;
653f9fae5adSBarry Smith     sum2  = 0.0;
654f9fae5adSBarry Smith     sum3  = 0.0;
655f9fae5adSBarry Smith     sum4  = 0.0;
656f9fae5adSBarry Smith     sum5  = 0.0;
657b3c51c66SMatthew Knepley     nonzerorow += (n>0);
658f9fae5adSBarry Smith     for (j=0; j<n; j++) {
659f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
660f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
661f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
662f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
663f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
664f9fae5adSBarry Smith       jrow++;
665f9fae5adSBarry Smith      }
666f9fae5adSBarry Smith     y[5*i]   = sum1;
667f9fae5adSBarry Smith     y[5*i+1] = sum2;
668f9fae5adSBarry Smith     y[5*i+2] = sum3;
669f9fae5adSBarry Smith     y[5*i+3] = sum4;
670f9fae5adSBarry Smith     y[5*i+4] = sum5;
671f9fae5adSBarry Smith   }
672f9fae5adSBarry Smith 
673b3c51c66SMatthew Knepley   ierr = PetscLogFlops(10*a->nz - 5*nonzerorow);CHKERRQ(ierr);
6741ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6751ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
676f9fae5adSBarry Smith   PetscFunctionReturn(0);
677f9fae5adSBarry Smith }
678f9fae5adSBarry Smith 
6794a2ae208SSatish Balay #undef __FUNCT__
680b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
681dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
682f9fae5adSBarry Smith {
6834cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
684f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
68587828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
686dfbe8321SBarry Smith   PetscErrorCode ierr;
687899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
688f9fae5adSBarry Smith 
689f9fae5adSBarry Smith   PetscFunctionBegin;
6902dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
6911ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6921ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
693bfec09a0SHong Zhang 
694f9fae5adSBarry Smith   for (i=0; i<m; i++) {
695bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
696bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
697f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
698f9fae5adSBarry Smith     alpha1 = x[5*i];
699f9fae5adSBarry Smith     alpha2 = x[5*i+1];
700f9fae5adSBarry Smith     alpha3 = x[5*i+2];
701f9fae5adSBarry Smith     alpha4 = x[5*i+3];
702f9fae5adSBarry Smith     alpha5 = x[5*i+4];
703f9fae5adSBarry Smith     while (n-->0) {
704f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
705f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
706f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
707f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
708f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
709f9fae5adSBarry Smith       idx++; v++;
710f9fae5adSBarry Smith     }
711f9fae5adSBarry Smith   }
712b3c51c66SMatthew Knepley   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
7131ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7141ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
715f9fae5adSBarry Smith   PetscFunctionReturn(0);
716f9fae5adSBarry Smith }
717f9fae5adSBarry Smith 
7184a2ae208SSatish Balay #undef __FUNCT__
719b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
720dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
721f9fae5adSBarry Smith {
7224cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
723f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
72487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
725dfbe8321SBarry Smith   PetscErrorCode ierr;
726899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
727b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
728f9fae5adSBarry Smith 
729f9fae5adSBarry Smith   PetscFunctionBegin;
730f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7311ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7321ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
733f9fae5adSBarry Smith   idx  = a->j;
734f9fae5adSBarry Smith   v    = a->a;
735f9fae5adSBarry Smith   ii   = a->i;
736f9fae5adSBarry Smith 
737f9fae5adSBarry Smith   for (i=0; i<m; i++) {
738f9fae5adSBarry Smith     jrow = ii[i];
739f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
740f9fae5adSBarry Smith     sum1  = 0.0;
741f9fae5adSBarry Smith     sum2  = 0.0;
742f9fae5adSBarry Smith     sum3  = 0.0;
743f9fae5adSBarry Smith     sum4  = 0.0;
744f9fae5adSBarry Smith     sum5  = 0.0;
745f9fae5adSBarry Smith     for (j=0; j<n; j++) {
746f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
747f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
748f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
749f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
750f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
751f9fae5adSBarry Smith       jrow++;
752f9fae5adSBarry Smith      }
753f9fae5adSBarry Smith     y[5*i]   += sum1;
754f9fae5adSBarry Smith     y[5*i+1] += sum2;
755f9fae5adSBarry Smith     y[5*i+2] += sum3;
756f9fae5adSBarry Smith     y[5*i+3] += sum4;
757f9fae5adSBarry Smith     y[5*i+4] += sum5;
758f9fae5adSBarry Smith   }
759f9fae5adSBarry Smith 
760efee365bSSatish Balay   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
7611ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7621ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
763f9fae5adSBarry Smith   PetscFunctionReturn(0);
764f9fae5adSBarry Smith }
765f9fae5adSBarry Smith 
7664a2ae208SSatish Balay #undef __FUNCT__
767b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
768dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
769f9fae5adSBarry Smith {
7704cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
771f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
77287828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
773dfbe8321SBarry Smith   PetscErrorCode ierr;
774899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
775f9fae5adSBarry Smith 
776f9fae5adSBarry Smith   PetscFunctionBegin;
777f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7781ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7791ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
780bfec09a0SHong Zhang 
781f9fae5adSBarry Smith   for (i=0; i<m; i++) {
782bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
783bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
784f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
785f9fae5adSBarry Smith     alpha1 = x[5*i];
786f9fae5adSBarry Smith     alpha2 = x[5*i+1];
787f9fae5adSBarry Smith     alpha3 = x[5*i+2];
788f9fae5adSBarry Smith     alpha4 = x[5*i+3];
789f9fae5adSBarry Smith     alpha5 = x[5*i+4];
790f9fae5adSBarry Smith     while (n-->0) {
791f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
792f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
793f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
794f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
795f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
796f9fae5adSBarry Smith       idx++; v++;
797f9fae5adSBarry Smith     }
798f9fae5adSBarry Smith   }
799efee365bSSatish Balay   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
8001ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8011ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
802f9fae5adSBarry Smith   PetscFunctionReturn(0);
803bcc973b7SBarry Smith }
804bcc973b7SBarry Smith 
8056cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8064a2ae208SSatish Balay #undef __FUNCT__
807b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
808dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8096cd98798SBarry Smith {
8106cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8116cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
81287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
813dfbe8321SBarry Smith   PetscErrorCode ierr;
814b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
815b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
8166cd98798SBarry Smith 
8176cd98798SBarry Smith   PetscFunctionBegin;
8181ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8191ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8206cd98798SBarry Smith   idx  = a->j;
8216cd98798SBarry Smith   v    = a->a;
8226cd98798SBarry Smith   ii   = a->i;
8236cd98798SBarry Smith 
8246cd98798SBarry Smith   for (i=0; i<m; i++) {
8256cd98798SBarry Smith     jrow = ii[i];
8266cd98798SBarry Smith     n    = ii[i+1] - jrow;
8276cd98798SBarry Smith     sum1  = 0.0;
8286cd98798SBarry Smith     sum2  = 0.0;
8296cd98798SBarry Smith     sum3  = 0.0;
8306cd98798SBarry Smith     sum4  = 0.0;
8316cd98798SBarry Smith     sum5  = 0.0;
8326cd98798SBarry Smith     sum6  = 0.0;
833b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8346cd98798SBarry Smith     for (j=0; j<n; j++) {
8356cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8366cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8376cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8386cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8396cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8406cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8416cd98798SBarry Smith       jrow++;
8426cd98798SBarry Smith      }
8436cd98798SBarry Smith     y[6*i]   = sum1;
8446cd98798SBarry Smith     y[6*i+1] = sum2;
8456cd98798SBarry Smith     y[6*i+2] = sum3;
8466cd98798SBarry Smith     y[6*i+3] = sum4;
8476cd98798SBarry Smith     y[6*i+4] = sum5;
8486cd98798SBarry Smith     y[6*i+5] = sum6;
8496cd98798SBarry Smith   }
8506cd98798SBarry Smith 
851b3c51c66SMatthew Knepley   ierr = PetscLogFlops(12*a->nz - 6*nonzerorow);CHKERRQ(ierr);
8521ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8531ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8546cd98798SBarry Smith   PetscFunctionReturn(0);
8556cd98798SBarry Smith }
8566cd98798SBarry Smith 
8574a2ae208SSatish Balay #undef __FUNCT__
858b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
859dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8606cd98798SBarry Smith {
8616cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8626cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
86387828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
864dfbe8321SBarry Smith   PetscErrorCode ierr;
865899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
8666cd98798SBarry Smith 
8676cd98798SBarry Smith   PetscFunctionBegin;
8682dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
8691ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8701ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
871bfec09a0SHong Zhang 
8726cd98798SBarry Smith   for (i=0; i<m; i++) {
873bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
874bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
8756cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8766cd98798SBarry Smith     alpha1 = x[6*i];
8776cd98798SBarry Smith     alpha2 = x[6*i+1];
8786cd98798SBarry Smith     alpha3 = x[6*i+2];
8796cd98798SBarry Smith     alpha4 = x[6*i+3];
8806cd98798SBarry Smith     alpha5 = x[6*i+4];
8816cd98798SBarry Smith     alpha6 = x[6*i+5];
8826cd98798SBarry Smith     while (n-->0) {
8836cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8846cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8856cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8866cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8876cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8886cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8896cd98798SBarry Smith       idx++; v++;
8906cd98798SBarry Smith     }
8916cd98798SBarry Smith   }
892b3c51c66SMatthew Knepley   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
8931ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8941ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8956cd98798SBarry Smith   PetscFunctionReturn(0);
8966cd98798SBarry Smith }
8976cd98798SBarry Smith 
8984a2ae208SSatish Balay #undef __FUNCT__
899b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
900dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9016cd98798SBarry Smith {
9026cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9036cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
90487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
905dfbe8321SBarry Smith   PetscErrorCode ierr;
906899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
907b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
9086cd98798SBarry Smith 
9096cd98798SBarry Smith   PetscFunctionBegin;
9106cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9111ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9121ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9136cd98798SBarry Smith   idx  = a->j;
9146cd98798SBarry Smith   v    = a->a;
9156cd98798SBarry Smith   ii   = a->i;
9166cd98798SBarry Smith 
9176cd98798SBarry Smith   for (i=0; i<m; i++) {
9186cd98798SBarry Smith     jrow = ii[i];
9196cd98798SBarry Smith     n    = ii[i+1] - jrow;
9206cd98798SBarry Smith     sum1  = 0.0;
9216cd98798SBarry Smith     sum2  = 0.0;
9226cd98798SBarry Smith     sum3  = 0.0;
9236cd98798SBarry Smith     sum4  = 0.0;
9246cd98798SBarry Smith     sum5  = 0.0;
9256cd98798SBarry Smith     sum6  = 0.0;
9266cd98798SBarry Smith     for (j=0; j<n; j++) {
9276cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9286cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9296cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9306cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9316cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9326cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9336cd98798SBarry Smith       jrow++;
9346cd98798SBarry Smith      }
9356cd98798SBarry Smith     y[6*i]   += sum1;
9366cd98798SBarry Smith     y[6*i+1] += sum2;
9376cd98798SBarry Smith     y[6*i+2] += sum3;
9386cd98798SBarry Smith     y[6*i+3] += sum4;
9396cd98798SBarry Smith     y[6*i+4] += sum5;
9406cd98798SBarry Smith     y[6*i+5] += sum6;
9416cd98798SBarry Smith   }
9426cd98798SBarry Smith 
943efee365bSSatish Balay   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
9441ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9451ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9466cd98798SBarry Smith   PetscFunctionReturn(0);
9476cd98798SBarry Smith }
9486cd98798SBarry Smith 
9494a2ae208SSatish Balay #undef __FUNCT__
950b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
951dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9526cd98798SBarry Smith {
9536cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9546cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
95587828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
956dfbe8321SBarry Smith   PetscErrorCode ierr;
957899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
9586cd98798SBarry Smith 
9596cd98798SBarry Smith   PetscFunctionBegin;
9606cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9611ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9621ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
963bfec09a0SHong Zhang 
9646cd98798SBarry Smith   for (i=0; i<m; i++) {
965bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
966bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9676cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9686cd98798SBarry Smith     alpha1 = x[6*i];
9696cd98798SBarry Smith     alpha2 = x[6*i+1];
9706cd98798SBarry Smith     alpha3 = x[6*i+2];
9716cd98798SBarry Smith     alpha4 = x[6*i+3];
9726cd98798SBarry Smith     alpha5 = x[6*i+4];
9736cd98798SBarry Smith     alpha6 = x[6*i+5];
9746cd98798SBarry Smith     while (n-->0) {
9756cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9766cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9776cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9786cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9796cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9806cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9816cd98798SBarry Smith       idx++; v++;
9826cd98798SBarry Smith     }
9836cd98798SBarry Smith   }
984efee365bSSatish Balay   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
9851ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9861ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9876cd98798SBarry Smith   PetscFunctionReturn(0);
9886cd98798SBarry Smith }
9896cd98798SBarry Smith 
99066ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
99166ed3db0SBarry Smith #undef __FUNCT__
992ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
993ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
994ed8eea03SBarry Smith {
995ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
996ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
997ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
998ed8eea03SBarry Smith   PetscErrorCode ierr;
999b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1000ed8eea03SBarry Smith   PetscInt       n,i,jrow,j;
1001ed8eea03SBarry Smith 
1002ed8eea03SBarry Smith   PetscFunctionBegin;
1003ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1004ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1005ed8eea03SBarry Smith   idx  = a->j;
1006ed8eea03SBarry Smith   v    = a->a;
1007ed8eea03SBarry Smith   ii   = a->i;
1008ed8eea03SBarry Smith 
1009ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1010ed8eea03SBarry Smith     jrow = ii[i];
1011ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1012ed8eea03SBarry Smith     sum1  = 0.0;
1013ed8eea03SBarry Smith     sum2  = 0.0;
1014ed8eea03SBarry Smith     sum3  = 0.0;
1015ed8eea03SBarry Smith     sum4  = 0.0;
1016ed8eea03SBarry Smith     sum5  = 0.0;
1017ed8eea03SBarry Smith     sum6  = 0.0;
1018ed8eea03SBarry Smith     sum7  = 0.0;
1019b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1020ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1021ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1022ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1023ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1024ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1025ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1026ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1027ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1028ed8eea03SBarry Smith       jrow++;
1029ed8eea03SBarry Smith      }
1030ed8eea03SBarry Smith     y[7*i]   = sum1;
1031ed8eea03SBarry Smith     y[7*i+1] = sum2;
1032ed8eea03SBarry Smith     y[7*i+2] = sum3;
1033ed8eea03SBarry Smith     y[7*i+3] = sum4;
1034ed8eea03SBarry Smith     y[7*i+4] = sum5;
1035ed8eea03SBarry Smith     y[7*i+5] = sum6;
1036ed8eea03SBarry Smith     y[7*i+6] = sum7;
1037ed8eea03SBarry Smith   }
1038ed8eea03SBarry Smith 
1039b3c51c66SMatthew Knepley   ierr = PetscLogFlops(14*a->nz - 7*nonzerorow);CHKERRQ(ierr);
1040ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1041ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1042ed8eea03SBarry Smith   PetscFunctionReturn(0);
1043ed8eea03SBarry Smith }
1044ed8eea03SBarry Smith 
1045ed8eea03SBarry Smith #undef __FUNCT__
1046ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1047ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1048ed8eea03SBarry Smith {
1049ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1050ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1051ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1052ed8eea03SBarry Smith   PetscErrorCode ierr;
1053899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1054ed8eea03SBarry Smith 
1055ed8eea03SBarry Smith   PetscFunctionBegin;
10562dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1057ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1058ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1059ed8eea03SBarry Smith 
1060ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1061ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1062ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1063ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1064ed8eea03SBarry Smith     alpha1 = x[7*i];
1065ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1066ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1067ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1068ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1069ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1070ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1071ed8eea03SBarry Smith     while (n-->0) {
1072ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1073ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1074ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1075ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1076ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1077ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1078ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1079ed8eea03SBarry Smith       idx++; v++;
1080ed8eea03SBarry Smith     }
1081ed8eea03SBarry Smith   }
1082b3c51c66SMatthew Knepley   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1083ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1084ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1085ed8eea03SBarry Smith   PetscFunctionReturn(0);
1086ed8eea03SBarry Smith }
1087ed8eea03SBarry Smith 
1088ed8eea03SBarry Smith #undef __FUNCT__
1089ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1090ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1091ed8eea03SBarry Smith {
1092ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1093ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1094ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1095ed8eea03SBarry Smith   PetscErrorCode ierr;
1096899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1097ed8eea03SBarry Smith   PetscInt       n,i,jrow,j;
1098ed8eea03SBarry Smith 
1099ed8eea03SBarry Smith   PetscFunctionBegin;
1100ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1101ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1102ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1103ed8eea03SBarry Smith   idx  = a->j;
1104ed8eea03SBarry Smith   v    = a->a;
1105ed8eea03SBarry Smith   ii   = a->i;
1106ed8eea03SBarry Smith 
1107ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1108ed8eea03SBarry Smith     jrow = ii[i];
1109ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1110ed8eea03SBarry Smith     sum1  = 0.0;
1111ed8eea03SBarry Smith     sum2  = 0.0;
1112ed8eea03SBarry Smith     sum3  = 0.0;
1113ed8eea03SBarry Smith     sum4  = 0.0;
1114ed8eea03SBarry Smith     sum5  = 0.0;
1115ed8eea03SBarry Smith     sum6  = 0.0;
1116ed8eea03SBarry Smith     sum7  = 0.0;
1117ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1118ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1119ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1120ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1121ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1122ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1123ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1124ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1125ed8eea03SBarry Smith       jrow++;
1126ed8eea03SBarry Smith      }
1127ed8eea03SBarry Smith     y[7*i]   += sum1;
1128ed8eea03SBarry Smith     y[7*i+1] += sum2;
1129ed8eea03SBarry Smith     y[7*i+2] += sum3;
1130ed8eea03SBarry Smith     y[7*i+3] += sum4;
1131ed8eea03SBarry Smith     y[7*i+4] += sum5;
1132ed8eea03SBarry Smith     y[7*i+5] += sum6;
1133ed8eea03SBarry Smith     y[7*i+6] += sum7;
1134ed8eea03SBarry Smith   }
1135ed8eea03SBarry Smith 
1136efee365bSSatish Balay   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1137ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1138ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1139ed8eea03SBarry Smith   PetscFunctionReturn(0);
1140ed8eea03SBarry Smith }
1141ed8eea03SBarry Smith 
1142ed8eea03SBarry Smith #undef __FUNCT__
1143ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1144ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1145ed8eea03SBarry Smith {
1146ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1147ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1148ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1149ed8eea03SBarry Smith   PetscErrorCode ierr;
1150899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1151ed8eea03SBarry Smith 
1152ed8eea03SBarry Smith   PetscFunctionBegin;
1153ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1154ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1155ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1156ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1157ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1158ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1159ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1160ed8eea03SBarry Smith     alpha1 = x[7*i];
1161ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1162ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1163ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1164ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1165ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1166ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1167ed8eea03SBarry Smith     while (n-->0) {
1168ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1169ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1170ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1171ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1172ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1173ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1174ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1175ed8eea03SBarry Smith       idx++; v++;
1176ed8eea03SBarry Smith     }
1177ed8eea03SBarry Smith   }
1178efee365bSSatish Balay   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1179ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1180ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1181ed8eea03SBarry Smith   PetscFunctionReturn(0);
1182ed8eea03SBarry Smith }
1183ed8eea03SBarry Smith 
1184ed8eea03SBarry Smith #undef __FUNCT__
118566ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1186dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
118766ed3db0SBarry Smith {
118866ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
118966ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
119087828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1191dfbe8321SBarry Smith   PetscErrorCode ierr;
1192b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1193b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
119466ed3db0SBarry Smith 
119566ed3db0SBarry Smith   PetscFunctionBegin;
11961ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
11971ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
119866ed3db0SBarry Smith   idx  = a->j;
119966ed3db0SBarry Smith   v    = a->a;
120066ed3db0SBarry Smith   ii   = a->i;
120166ed3db0SBarry Smith 
120266ed3db0SBarry Smith   for (i=0; i<m; i++) {
120366ed3db0SBarry Smith     jrow = ii[i];
120466ed3db0SBarry Smith     n    = ii[i+1] - jrow;
120566ed3db0SBarry Smith     sum1  = 0.0;
120666ed3db0SBarry Smith     sum2  = 0.0;
120766ed3db0SBarry Smith     sum3  = 0.0;
120866ed3db0SBarry Smith     sum4  = 0.0;
120966ed3db0SBarry Smith     sum5  = 0.0;
121066ed3db0SBarry Smith     sum6  = 0.0;
121166ed3db0SBarry Smith     sum7  = 0.0;
121266ed3db0SBarry Smith     sum8  = 0.0;
1213b3c51c66SMatthew Knepley     nonzerorow += (n>0);
121466ed3db0SBarry Smith     for (j=0; j<n; j++) {
121566ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
121666ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
121766ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
121866ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
121966ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
122066ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
122166ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
122266ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
122366ed3db0SBarry Smith       jrow++;
122466ed3db0SBarry Smith      }
122566ed3db0SBarry Smith     y[8*i]   = sum1;
122666ed3db0SBarry Smith     y[8*i+1] = sum2;
122766ed3db0SBarry Smith     y[8*i+2] = sum3;
122866ed3db0SBarry Smith     y[8*i+3] = sum4;
122966ed3db0SBarry Smith     y[8*i+4] = sum5;
123066ed3db0SBarry Smith     y[8*i+5] = sum6;
123166ed3db0SBarry Smith     y[8*i+6] = sum7;
123266ed3db0SBarry Smith     y[8*i+7] = sum8;
123366ed3db0SBarry Smith   }
123466ed3db0SBarry Smith 
1235b3c51c66SMatthew Knepley   ierr = PetscLogFlops(16*a->nz - 8*nonzerorow);CHKERRQ(ierr);
12361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12371ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
123866ed3db0SBarry Smith   PetscFunctionReturn(0);
123966ed3db0SBarry Smith }
124066ed3db0SBarry Smith 
124166ed3db0SBarry Smith #undef __FUNCT__
124266ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1243dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
124466ed3db0SBarry Smith {
124566ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
124666ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
124787828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1248dfbe8321SBarry Smith   PetscErrorCode ierr;
1249899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
125066ed3db0SBarry Smith 
125166ed3db0SBarry Smith   PetscFunctionBegin;
12522dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
12531ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12541ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1255bfec09a0SHong Zhang 
125666ed3db0SBarry Smith   for (i=0; i<m; i++) {
1257bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1258bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
125966ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
126066ed3db0SBarry Smith     alpha1 = x[8*i];
126166ed3db0SBarry Smith     alpha2 = x[8*i+1];
126266ed3db0SBarry Smith     alpha3 = x[8*i+2];
126366ed3db0SBarry Smith     alpha4 = x[8*i+3];
126466ed3db0SBarry Smith     alpha5 = x[8*i+4];
126566ed3db0SBarry Smith     alpha6 = x[8*i+5];
126666ed3db0SBarry Smith     alpha7 = x[8*i+6];
126766ed3db0SBarry Smith     alpha8 = x[8*i+7];
126866ed3db0SBarry Smith     while (n-->0) {
126966ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
127066ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
127166ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
127266ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
127366ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
127466ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
127566ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
127666ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
127766ed3db0SBarry Smith       idx++; v++;
127866ed3db0SBarry Smith     }
127966ed3db0SBarry Smith   }
1280b3c51c66SMatthew Knepley   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
12811ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12821ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
128366ed3db0SBarry Smith   PetscFunctionReturn(0);
128466ed3db0SBarry Smith }
128566ed3db0SBarry Smith 
128666ed3db0SBarry Smith #undef __FUNCT__
128766ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1288dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
128966ed3db0SBarry Smith {
129066ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
129166ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
129287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1293dfbe8321SBarry Smith   PetscErrorCode ierr;
1294899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1295b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
129666ed3db0SBarry Smith 
129766ed3db0SBarry Smith   PetscFunctionBegin;
129866ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12991ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
13001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
130166ed3db0SBarry Smith   idx  = a->j;
130266ed3db0SBarry Smith   v    = a->a;
130366ed3db0SBarry Smith   ii   = a->i;
130466ed3db0SBarry Smith 
130566ed3db0SBarry Smith   for (i=0; i<m; i++) {
130666ed3db0SBarry Smith     jrow = ii[i];
130766ed3db0SBarry Smith     n    = ii[i+1] - jrow;
130866ed3db0SBarry Smith     sum1  = 0.0;
130966ed3db0SBarry Smith     sum2  = 0.0;
131066ed3db0SBarry Smith     sum3  = 0.0;
131166ed3db0SBarry Smith     sum4  = 0.0;
131266ed3db0SBarry Smith     sum5  = 0.0;
131366ed3db0SBarry Smith     sum6  = 0.0;
131466ed3db0SBarry Smith     sum7  = 0.0;
131566ed3db0SBarry Smith     sum8  = 0.0;
131666ed3db0SBarry Smith     for (j=0; j<n; j++) {
131766ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
131866ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
131966ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
132066ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
132166ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
132266ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
132366ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
132466ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
132566ed3db0SBarry Smith       jrow++;
132666ed3db0SBarry Smith      }
132766ed3db0SBarry Smith     y[8*i]   += sum1;
132866ed3db0SBarry Smith     y[8*i+1] += sum2;
132966ed3db0SBarry Smith     y[8*i+2] += sum3;
133066ed3db0SBarry Smith     y[8*i+3] += sum4;
133166ed3db0SBarry Smith     y[8*i+4] += sum5;
133266ed3db0SBarry Smith     y[8*i+5] += sum6;
133366ed3db0SBarry Smith     y[8*i+6] += sum7;
133466ed3db0SBarry Smith     y[8*i+7] += sum8;
133566ed3db0SBarry Smith   }
133666ed3db0SBarry Smith 
1337efee365bSSatish Balay   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
13381ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13391ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
134066ed3db0SBarry Smith   PetscFunctionReturn(0);
134166ed3db0SBarry Smith }
134266ed3db0SBarry Smith 
134366ed3db0SBarry Smith #undef __FUNCT__
134466ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1345dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
134666ed3db0SBarry Smith {
134766ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
134866ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
134987828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1350dfbe8321SBarry Smith   PetscErrorCode ierr;
1351899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
135266ed3db0SBarry Smith 
135366ed3db0SBarry Smith   PetscFunctionBegin;
135466ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13551ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
13561ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
135766ed3db0SBarry Smith   for (i=0; i<m; i++) {
1358bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1359bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
136066ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
136166ed3db0SBarry Smith     alpha1 = x[8*i];
136266ed3db0SBarry Smith     alpha2 = x[8*i+1];
136366ed3db0SBarry Smith     alpha3 = x[8*i+2];
136466ed3db0SBarry Smith     alpha4 = x[8*i+3];
136566ed3db0SBarry Smith     alpha5 = x[8*i+4];
136666ed3db0SBarry Smith     alpha6 = x[8*i+5];
136766ed3db0SBarry Smith     alpha7 = x[8*i+6];
136866ed3db0SBarry Smith     alpha8 = x[8*i+7];
136966ed3db0SBarry Smith     while (n-->0) {
137066ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
137166ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
137266ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
137366ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
137466ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
137566ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
137666ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
137766ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
137866ed3db0SBarry Smith       idx++; v++;
137966ed3db0SBarry Smith     }
138066ed3db0SBarry Smith   }
1381efee365bSSatish Balay   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
13821ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13831ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
13842f7816d4SBarry Smith   PetscFunctionReturn(0);
13852f7816d4SBarry Smith }
13862f7816d4SBarry Smith 
13872b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
13882b692628SMatthew Knepley #undef __FUNCT__
13892b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1390dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
13912b692628SMatthew Knepley {
13922b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
13932b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
13942b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1395dfbe8321SBarry Smith   PetscErrorCode ierr;
1396b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1397b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
13982b692628SMatthew Knepley 
13992b692628SMatthew Knepley   PetscFunctionBegin;
14001ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14011ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14022b692628SMatthew Knepley   idx  = a->j;
14032b692628SMatthew Knepley   v    = a->a;
14042b692628SMatthew Knepley   ii   = a->i;
14052b692628SMatthew Knepley 
14062b692628SMatthew Knepley   for (i=0; i<m; i++) {
14072b692628SMatthew Knepley     jrow = ii[i];
14082b692628SMatthew Knepley     n    = ii[i+1] - jrow;
14092b692628SMatthew Knepley     sum1  = 0.0;
14102b692628SMatthew Knepley     sum2  = 0.0;
14112b692628SMatthew Knepley     sum3  = 0.0;
14122b692628SMatthew Knepley     sum4  = 0.0;
14132b692628SMatthew Knepley     sum5  = 0.0;
14142b692628SMatthew Knepley     sum6  = 0.0;
14152b692628SMatthew Knepley     sum7  = 0.0;
14162b692628SMatthew Knepley     sum8  = 0.0;
14172b692628SMatthew Knepley     sum9  = 0.0;
1418b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14192b692628SMatthew Knepley     for (j=0; j<n; j++) {
14202b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14212b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14222b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14232b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14242b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14252b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14262b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14272b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14282b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14292b692628SMatthew Knepley       jrow++;
14302b692628SMatthew Knepley      }
14312b692628SMatthew Knepley     y[9*i]   = sum1;
14322b692628SMatthew Knepley     y[9*i+1] = sum2;
14332b692628SMatthew Knepley     y[9*i+2] = sum3;
14342b692628SMatthew Knepley     y[9*i+3] = sum4;
14352b692628SMatthew Knepley     y[9*i+4] = sum5;
14362b692628SMatthew Knepley     y[9*i+5] = sum6;
14372b692628SMatthew Knepley     y[9*i+6] = sum7;
14382b692628SMatthew Knepley     y[9*i+7] = sum8;
14392b692628SMatthew Knepley     y[9*i+8] = sum9;
14402b692628SMatthew Knepley   }
14412b692628SMatthew Knepley 
1442b3c51c66SMatthew Knepley   ierr = PetscLogFlops(18*a->nz - 9*nonzerorow);CHKERRQ(ierr);
14431ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
14441ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14452b692628SMatthew Knepley   PetscFunctionReturn(0);
14462b692628SMatthew Knepley }
14472b692628SMatthew Knepley 
1448b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1449b9cda22cSBarry Smith 
14502b692628SMatthew Knepley #undef __FUNCT__
14512b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1452dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14532b692628SMatthew Knepley {
14542b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
14552b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
14562b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1457dfbe8321SBarry Smith   PetscErrorCode ierr;
1458899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
14592b692628SMatthew Knepley 
14602b692628SMatthew Knepley   PetscFunctionBegin;
14612dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
14621ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14631ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14642b692628SMatthew Knepley 
14652b692628SMatthew Knepley   for (i=0; i<m; i++) {
14662b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
14672b692628SMatthew Knepley     v      = a->a + a->i[i] ;
14682b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14692b692628SMatthew Knepley     alpha1 = x[9*i];
14702b692628SMatthew Knepley     alpha2 = x[9*i+1];
14712b692628SMatthew Knepley     alpha3 = x[9*i+2];
14722b692628SMatthew Knepley     alpha4 = x[9*i+3];
14732b692628SMatthew Knepley     alpha5 = x[9*i+4];
14742b692628SMatthew Knepley     alpha6 = x[9*i+5];
14752b692628SMatthew Knepley     alpha7 = x[9*i+6];
14762b692628SMatthew Knepley     alpha8 = x[9*i+7];
14772f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14782b692628SMatthew Knepley     while (n-->0) {
14792b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
14802b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
14812b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
14822b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
14832b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
14842b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
14852b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
14862b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
14872b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
14882b692628SMatthew Knepley       idx++; v++;
14892b692628SMatthew Knepley     }
14902b692628SMatthew Knepley   }
1491b3c51c66SMatthew Knepley   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
14921ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
14931ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14942b692628SMatthew Knepley   PetscFunctionReturn(0);
14952b692628SMatthew Knepley }
14962b692628SMatthew Knepley 
14972b692628SMatthew Knepley #undef __FUNCT__
14982b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1499dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15002b692628SMatthew Knepley {
15012b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
15022b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
15032b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1504dfbe8321SBarry Smith   PetscErrorCode ierr;
1505899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1506b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
15072b692628SMatthew Knepley 
15082b692628SMatthew Knepley   PetscFunctionBegin;
15092b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15101ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
15111ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15122b692628SMatthew Knepley   idx  = a->j;
15132b692628SMatthew Knepley   v    = a->a;
15142b692628SMatthew Knepley   ii   = a->i;
15152b692628SMatthew Knepley 
15162b692628SMatthew Knepley   for (i=0; i<m; i++) {
15172b692628SMatthew Knepley     jrow = ii[i];
15182b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15192b692628SMatthew Knepley     sum1  = 0.0;
15202b692628SMatthew Knepley     sum2  = 0.0;
15212b692628SMatthew Knepley     sum3  = 0.0;
15222b692628SMatthew Knepley     sum4  = 0.0;
15232b692628SMatthew Knepley     sum5  = 0.0;
15242b692628SMatthew Knepley     sum6  = 0.0;
15252b692628SMatthew Knepley     sum7  = 0.0;
15262b692628SMatthew Knepley     sum8  = 0.0;
15272b692628SMatthew Knepley     sum9  = 0.0;
15282b692628SMatthew Knepley     for (j=0; j<n; j++) {
15292b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15302b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15312b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15322b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15332b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15342b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15352b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15362b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15372b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15382b692628SMatthew Knepley       jrow++;
15392b692628SMatthew Knepley      }
15402b692628SMatthew Knepley     y[9*i]   += sum1;
15412b692628SMatthew Knepley     y[9*i+1] += sum2;
15422b692628SMatthew Knepley     y[9*i+2] += sum3;
15432b692628SMatthew Knepley     y[9*i+3] += sum4;
15442b692628SMatthew Knepley     y[9*i+4] += sum5;
15452b692628SMatthew Knepley     y[9*i+5] += sum6;
15462b692628SMatthew Knepley     y[9*i+6] += sum7;
15472b692628SMatthew Knepley     y[9*i+7] += sum8;
15482b692628SMatthew Knepley     y[9*i+8] += sum9;
15492b692628SMatthew Knepley   }
15502b692628SMatthew Knepley 
1551efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15521ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15531ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15542b692628SMatthew Knepley   PetscFunctionReturn(0);
15552b692628SMatthew Knepley }
15562b692628SMatthew Knepley 
15572b692628SMatthew Knepley #undef __FUNCT__
15582b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1559dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15602b692628SMatthew Knepley {
15612b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
15622b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
15632b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1564dfbe8321SBarry Smith   PetscErrorCode ierr;
1565899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
15662b692628SMatthew Knepley 
15672b692628SMatthew Knepley   PetscFunctionBegin;
15682b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15691ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
15701ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15712b692628SMatthew Knepley   for (i=0; i<m; i++) {
15722b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15732b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15742b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15752b692628SMatthew Knepley     alpha1 = x[9*i];
15762b692628SMatthew Knepley     alpha2 = x[9*i+1];
15772b692628SMatthew Knepley     alpha3 = x[9*i+2];
15782b692628SMatthew Knepley     alpha4 = x[9*i+3];
15792b692628SMatthew Knepley     alpha5 = x[9*i+4];
15802b692628SMatthew Knepley     alpha6 = x[9*i+5];
15812b692628SMatthew Knepley     alpha7 = x[9*i+6];
15822b692628SMatthew Knepley     alpha8 = x[9*i+7];
15832b692628SMatthew Knepley     alpha9 = x[9*i+8];
15842b692628SMatthew Knepley     while (n-->0) {
15852b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15862b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15872b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15882b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15892b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15902b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15912b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15922b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15932b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15942b692628SMatthew Knepley       idx++; v++;
15952b692628SMatthew Knepley     }
15962b692628SMatthew Knepley   }
1597efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15981ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15991ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16002b692628SMatthew Knepley   PetscFunctionReturn(0);
16012b692628SMatthew Knepley }
1602b9cda22cSBarry Smith /*--------------------------------------------------------------------------------------------*/
1603b9cda22cSBarry Smith #undef __FUNCT__
1604b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1605b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1606b9cda22cSBarry Smith {
1607b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1608b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1609b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1610b9cda22cSBarry Smith   PetscErrorCode ierr;
1611b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1612b9cda22cSBarry Smith   PetscInt       n,i,jrow,j;
1613b9cda22cSBarry Smith 
1614b9cda22cSBarry Smith   PetscFunctionBegin;
1615b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1616b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1617b9cda22cSBarry Smith   idx  = a->j;
1618b9cda22cSBarry Smith   v    = a->a;
1619b9cda22cSBarry Smith   ii   = a->i;
1620b9cda22cSBarry Smith 
1621b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1622b9cda22cSBarry Smith     jrow = ii[i];
1623b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1624b9cda22cSBarry Smith     sum1  = 0.0;
1625b9cda22cSBarry Smith     sum2  = 0.0;
1626b9cda22cSBarry Smith     sum3  = 0.0;
1627b9cda22cSBarry Smith     sum4  = 0.0;
1628b9cda22cSBarry Smith     sum5  = 0.0;
1629b9cda22cSBarry Smith     sum6  = 0.0;
1630b9cda22cSBarry Smith     sum7  = 0.0;
1631b9cda22cSBarry Smith     sum8  = 0.0;
1632b9cda22cSBarry Smith     sum9  = 0.0;
1633b9cda22cSBarry Smith     sum10 = 0.0;
1634b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1635b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1636b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1637b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1638b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1639b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1640b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1641b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1642b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1643b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1644b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1645b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1646b9cda22cSBarry Smith       jrow++;
1647b9cda22cSBarry Smith      }
1648b9cda22cSBarry Smith     y[10*i]   = sum1;
1649b9cda22cSBarry Smith     y[10*i+1] = sum2;
1650b9cda22cSBarry Smith     y[10*i+2] = sum3;
1651b9cda22cSBarry Smith     y[10*i+3] = sum4;
1652b9cda22cSBarry Smith     y[10*i+4] = sum5;
1653b9cda22cSBarry Smith     y[10*i+5] = sum6;
1654b9cda22cSBarry Smith     y[10*i+6] = sum7;
1655b9cda22cSBarry Smith     y[10*i+7] = sum8;
1656b9cda22cSBarry Smith     y[10*i+8] = sum9;
1657b9cda22cSBarry Smith     y[10*i+9] = sum10;
1658b9cda22cSBarry Smith   }
1659b9cda22cSBarry Smith 
1660b3c51c66SMatthew Knepley   ierr = PetscLogFlops(20*a->nz - 10*nonzerorow);CHKERRQ(ierr);
1661b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1662b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1663b9cda22cSBarry Smith   PetscFunctionReturn(0);
1664b9cda22cSBarry Smith }
1665b9cda22cSBarry Smith 
1666b9cda22cSBarry Smith #undef __FUNCT__
1667b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1668b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1669b9cda22cSBarry Smith {
1670b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1671b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1672b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1673b9cda22cSBarry Smith   PetscErrorCode ierr;
1674899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1675b9cda22cSBarry Smith   PetscInt       n,i,jrow,j;
1676b9cda22cSBarry Smith 
1677b9cda22cSBarry Smith   PetscFunctionBegin;
1678b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1679b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1680b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1681b9cda22cSBarry Smith   idx  = a->j;
1682b9cda22cSBarry Smith   v    = a->a;
1683b9cda22cSBarry Smith   ii   = a->i;
1684b9cda22cSBarry Smith 
1685b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1686b9cda22cSBarry Smith     jrow = ii[i];
1687b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1688b9cda22cSBarry Smith     sum1  = 0.0;
1689b9cda22cSBarry Smith     sum2  = 0.0;
1690b9cda22cSBarry Smith     sum3  = 0.0;
1691b9cda22cSBarry Smith     sum4  = 0.0;
1692b9cda22cSBarry Smith     sum5  = 0.0;
1693b9cda22cSBarry Smith     sum6  = 0.0;
1694b9cda22cSBarry Smith     sum7  = 0.0;
1695b9cda22cSBarry Smith     sum8  = 0.0;
1696b9cda22cSBarry Smith     sum9  = 0.0;
1697b9cda22cSBarry Smith     sum10 = 0.0;
1698b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1699b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1700b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1701b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1702b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1703b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1704b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1705b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1706b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1707b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1708b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1709b9cda22cSBarry Smith       jrow++;
1710b9cda22cSBarry Smith      }
1711b9cda22cSBarry Smith     y[10*i]   += sum1;
1712b9cda22cSBarry Smith     y[10*i+1] += sum2;
1713b9cda22cSBarry Smith     y[10*i+2] += sum3;
1714b9cda22cSBarry Smith     y[10*i+3] += sum4;
1715b9cda22cSBarry Smith     y[10*i+4] += sum5;
1716b9cda22cSBarry Smith     y[10*i+5] += sum6;
1717b9cda22cSBarry Smith     y[10*i+6] += sum7;
1718b9cda22cSBarry Smith     y[10*i+7] += sum8;
1719b9cda22cSBarry Smith     y[10*i+8] += sum9;
1720b9cda22cSBarry Smith     y[10*i+9] += sum10;
1721b9cda22cSBarry Smith   }
1722b9cda22cSBarry Smith 
1723b3c51c66SMatthew Knepley   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1724b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1725b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1726b9cda22cSBarry Smith   PetscFunctionReturn(0);
1727b9cda22cSBarry Smith }
1728b9cda22cSBarry Smith 
1729b9cda22cSBarry Smith #undef __FUNCT__
1730b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1731b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1732b9cda22cSBarry Smith {
1733b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1734b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1735b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1736b9cda22cSBarry Smith   PetscErrorCode ierr;
1737899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1738b9cda22cSBarry Smith 
1739b9cda22cSBarry Smith   PetscFunctionBegin;
1740b9cda22cSBarry Smith   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1741b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1742b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1743b9cda22cSBarry Smith 
1744b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1745b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1746b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1747b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1748e29fdc22SBarry Smith     alpha1 = x[10*i];
1749e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1750e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1751e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1752e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1753e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1754e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1755e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1756e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1757b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1758b9cda22cSBarry Smith     while (n-->0) {
1759e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1760e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1761e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1762e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1763e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1764e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1765e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1766e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1767e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1768b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1769b9cda22cSBarry Smith       idx++; v++;
1770b9cda22cSBarry Smith     }
1771b9cda22cSBarry Smith   }
1772b3c51c66SMatthew Knepley   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1773b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1774b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1775b9cda22cSBarry Smith   PetscFunctionReturn(0);
1776b9cda22cSBarry Smith }
1777b9cda22cSBarry Smith 
1778b9cda22cSBarry Smith #undef __FUNCT__
1779b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1780b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1781b9cda22cSBarry Smith {
1782b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1783b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1784b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1785b9cda22cSBarry Smith   PetscErrorCode ierr;
1786899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1787b9cda22cSBarry Smith 
1788b9cda22cSBarry Smith   PetscFunctionBegin;
1789b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1790b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1791b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1792b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1793b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1794b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1795b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1796b9cda22cSBarry Smith     alpha1 = x[10*i];
1797b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1798b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1799b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1800b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1801b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1802b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1803b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1804b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1805b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1806b9cda22cSBarry Smith     while (n-->0) {
1807b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1808b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1809b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1810b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1811b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1812b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1813b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1814b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1815b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1816b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1817b9cda22cSBarry Smith       idx++; v++;
1818b9cda22cSBarry Smith     }
1819b9cda22cSBarry Smith   }
1820b9cda22cSBarry Smith   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1821b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1822b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1823b9cda22cSBarry Smith   PetscFunctionReturn(0);
1824b9cda22cSBarry Smith }
1825b9cda22cSBarry Smith 
18262b692628SMatthew Knepley 
18272f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
18282f7816d4SBarry Smith #undef __FUNCT__
18292f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
1830dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
18312f7816d4SBarry Smith {
18322f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
18332f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
18342f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
18352f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1836dfbe8321SBarry Smith   PetscErrorCode ierr;
1837b3c51c66SMatthew Knepley   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1838b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
18392f7816d4SBarry Smith 
18402f7816d4SBarry Smith   PetscFunctionBegin;
18411ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
18421ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
18432f7816d4SBarry Smith   idx  = a->j;
18442f7816d4SBarry Smith   v    = a->a;
18452f7816d4SBarry Smith   ii   = a->i;
18462f7816d4SBarry Smith 
18472f7816d4SBarry Smith   for (i=0; i<m; i++) {
18482f7816d4SBarry Smith     jrow = ii[i];
18492f7816d4SBarry Smith     n    = ii[i+1] - jrow;
18502f7816d4SBarry Smith     sum1  = 0.0;
18512f7816d4SBarry Smith     sum2  = 0.0;
18522f7816d4SBarry Smith     sum3  = 0.0;
18532f7816d4SBarry Smith     sum4  = 0.0;
18542f7816d4SBarry Smith     sum5  = 0.0;
18552f7816d4SBarry Smith     sum6  = 0.0;
18562f7816d4SBarry Smith     sum7  = 0.0;
18572f7816d4SBarry Smith     sum8  = 0.0;
18582f7816d4SBarry Smith     sum9  = 0.0;
18592f7816d4SBarry Smith     sum10 = 0.0;
18602f7816d4SBarry Smith     sum11 = 0.0;
18612f7816d4SBarry Smith     sum12 = 0.0;
18622f7816d4SBarry Smith     sum13 = 0.0;
18632f7816d4SBarry Smith     sum14 = 0.0;
18642f7816d4SBarry Smith     sum15 = 0.0;
18652f7816d4SBarry Smith     sum16 = 0.0;
1866b3c51c66SMatthew Knepley     nonzerorow += (n>0);
18672f7816d4SBarry Smith     for (j=0; j<n; j++) {
18682f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
18692f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
18702f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
18712f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
18722f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
18732f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
18742f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
18752f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
18762f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
18772f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
18782f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
18792f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
18802f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
18812f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
18822f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
18832f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
18842f7816d4SBarry Smith       jrow++;
18852f7816d4SBarry Smith      }
18862f7816d4SBarry Smith     y[16*i]    = sum1;
18872f7816d4SBarry Smith     y[16*i+1]  = sum2;
18882f7816d4SBarry Smith     y[16*i+2]  = sum3;
18892f7816d4SBarry Smith     y[16*i+3]  = sum4;
18902f7816d4SBarry Smith     y[16*i+4]  = sum5;
18912f7816d4SBarry Smith     y[16*i+5]  = sum6;
18922f7816d4SBarry Smith     y[16*i+6]  = sum7;
18932f7816d4SBarry Smith     y[16*i+7]  = sum8;
18942f7816d4SBarry Smith     y[16*i+8]  = sum9;
18952f7816d4SBarry Smith     y[16*i+9]  = sum10;
18962f7816d4SBarry Smith     y[16*i+10] = sum11;
18972f7816d4SBarry Smith     y[16*i+11] = sum12;
18982f7816d4SBarry Smith     y[16*i+12] = sum13;
18992f7816d4SBarry Smith     y[16*i+13] = sum14;
19002f7816d4SBarry Smith     y[16*i+14] = sum15;
19012f7816d4SBarry Smith     y[16*i+15] = sum16;
19022f7816d4SBarry Smith   }
19032f7816d4SBarry Smith 
1904b3c51c66SMatthew Knepley   ierr = PetscLogFlops(32*a->nz - 16*nonzerorow);CHKERRQ(ierr);
19051ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
19061ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
19072f7816d4SBarry Smith   PetscFunctionReturn(0);
19082f7816d4SBarry Smith }
19092f7816d4SBarry Smith 
19102f7816d4SBarry Smith #undef __FUNCT__
19112f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1912dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
19132f7816d4SBarry Smith {
19142f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
19152f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
19162f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
19172f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1918dfbe8321SBarry Smith   PetscErrorCode ierr;
1919899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
19202f7816d4SBarry Smith 
19212f7816d4SBarry Smith   PetscFunctionBegin;
19222dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
19231ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
19241ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1925bfec09a0SHong Zhang 
19262f7816d4SBarry Smith   for (i=0; i<m; i++) {
1927bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1928bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
19292f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
19302f7816d4SBarry Smith     alpha1  = x[16*i];
19312f7816d4SBarry Smith     alpha2  = x[16*i+1];
19322f7816d4SBarry Smith     alpha3  = x[16*i+2];
19332f7816d4SBarry Smith     alpha4  = x[16*i+3];
19342f7816d4SBarry Smith     alpha5  = x[16*i+4];
19352f7816d4SBarry Smith     alpha6  = x[16*i+5];
19362f7816d4SBarry Smith     alpha7  = x[16*i+6];
19372f7816d4SBarry Smith     alpha8  = x[16*i+7];
19382f7816d4SBarry Smith     alpha9  = x[16*i+8];
19392f7816d4SBarry Smith     alpha10 = x[16*i+9];
19402f7816d4SBarry Smith     alpha11 = x[16*i+10];
19412f7816d4SBarry Smith     alpha12 = x[16*i+11];
19422f7816d4SBarry Smith     alpha13 = x[16*i+12];
19432f7816d4SBarry Smith     alpha14 = x[16*i+13];
19442f7816d4SBarry Smith     alpha15 = x[16*i+14];
19452f7816d4SBarry Smith     alpha16 = x[16*i+15];
19462f7816d4SBarry Smith     while (n-->0) {
19472f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
19482f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
19492f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
19502f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
19512f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
19522f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
19532f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
19542f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
19552f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
19562f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
19572f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
19582f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
19592f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
19602f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
19612f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
19622f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
19632f7816d4SBarry Smith       idx++; v++;
19642f7816d4SBarry Smith     }
19652f7816d4SBarry Smith   }
1966b3c51c66SMatthew Knepley   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
19671ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
19681ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
19692f7816d4SBarry Smith   PetscFunctionReturn(0);
19702f7816d4SBarry Smith }
19712f7816d4SBarry Smith 
19722f7816d4SBarry Smith #undef __FUNCT__
19732f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1974dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
19752f7816d4SBarry Smith {
19762f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
19772f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
19782f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
19792f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1980dfbe8321SBarry Smith   PetscErrorCode ierr;
1981899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1982b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
19832f7816d4SBarry Smith 
19842f7816d4SBarry Smith   PetscFunctionBegin;
19852f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19861ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
19871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
19882f7816d4SBarry Smith   idx  = a->j;
19892f7816d4SBarry Smith   v    = a->a;
19902f7816d4SBarry Smith   ii   = a->i;
19912f7816d4SBarry Smith 
19922f7816d4SBarry Smith   for (i=0; i<m; i++) {
19932f7816d4SBarry Smith     jrow = ii[i];
19942f7816d4SBarry Smith     n    = ii[i+1] - jrow;
19952f7816d4SBarry Smith     sum1  = 0.0;
19962f7816d4SBarry Smith     sum2  = 0.0;
19972f7816d4SBarry Smith     sum3  = 0.0;
19982f7816d4SBarry Smith     sum4  = 0.0;
19992f7816d4SBarry Smith     sum5  = 0.0;
20002f7816d4SBarry Smith     sum6  = 0.0;
20012f7816d4SBarry Smith     sum7  = 0.0;
20022f7816d4SBarry Smith     sum8  = 0.0;
20032f7816d4SBarry Smith     sum9  = 0.0;
20042f7816d4SBarry Smith     sum10 = 0.0;
20052f7816d4SBarry Smith     sum11 = 0.0;
20062f7816d4SBarry Smith     sum12 = 0.0;
20072f7816d4SBarry Smith     sum13 = 0.0;
20082f7816d4SBarry Smith     sum14 = 0.0;
20092f7816d4SBarry Smith     sum15 = 0.0;
20102f7816d4SBarry Smith     sum16 = 0.0;
20112f7816d4SBarry Smith     for (j=0; j<n; j++) {
20122f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
20132f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
20142f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
20152f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
20162f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
20172f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
20182f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
20192f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
20202f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
20212f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
20222f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
20232f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
20242f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
20252f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
20262f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
20272f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
20282f7816d4SBarry Smith       jrow++;
20292f7816d4SBarry Smith      }
20302f7816d4SBarry Smith     y[16*i]    += sum1;
20312f7816d4SBarry Smith     y[16*i+1]  += sum2;
20322f7816d4SBarry Smith     y[16*i+2]  += sum3;
20332f7816d4SBarry Smith     y[16*i+3]  += sum4;
20342f7816d4SBarry Smith     y[16*i+4]  += sum5;
20352f7816d4SBarry Smith     y[16*i+5]  += sum6;
20362f7816d4SBarry Smith     y[16*i+6]  += sum7;
20372f7816d4SBarry Smith     y[16*i+7]  += sum8;
20382f7816d4SBarry Smith     y[16*i+8]  += sum9;
20392f7816d4SBarry Smith     y[16*i+9]  += sum10;
20402f7816d4SBarry Smith     y[16*i+10] += sum11;
20412f7816d4SBarry Smith     y[16*i+11] += sum12;
20422f7816d4SBarry Smith     y[16*i+12] += sum13;
20432f7816d4SBarry Smith     y[16*i+13] += sum14;
20442f7816d4SBarry Smith     y[16*i+14] += sum15;
20452f7816d4SBarry Smith     y[16*i+15] += sum16;
20462f7816d4SBarry Smith   }
20472f7816d4SBarry Smith 
2048efee365bSSatish Balay   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
20491ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
20501ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
20512f7816d4SBarry Smith   PetscFunctionReturn(0);
20522f7816d4SBarry Smith }
20532f7816d4SBarry Smith 
20542f7816d4SBarry Smith #undef __FUNCT__
20552f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2056dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
20572f7816d4SBarry Smith {
20582f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
20592f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
20602f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
20612f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2062dfbe8321SBarry Smith   PetscErrorCode ierr;
2063899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
20642f7816d4SBarry Smith 
20652f7816d4SBarry Smith   PetscFunctionBegin;
20662f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20671ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
20681ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
20692f7816d4SBarry Smith   for (i=0; i<m; i++) {
2070bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2071bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
20722f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
20732f7816d4SBarry Smith     alpha1 = x[16*i];
20742f7816d4SBarry Smith     alpha2 = x[16*i+1];
20752f7816d4SBarry Smith     alpha3 = x[16*i+2];
20762f7816d4SBarry Smith     alpha4 = x[16*i+3];
20772f7816d4SBarry Smith     alpha5 = x[16*i+4];
20782f7816d4SBarry Smith     alpha6 = x[16*i+5];
20792f7816d4SBarry Smith     alpha7 = x[16*i+6];
20802f7816d4SBarry Smith     alpha8 = x[16*i+7];
20812f7816d4SBarry Smith     alpha9  = x[16*i+8];
20822f7816d4SBarry Smith     alpha10 = x[16*i+9];
20832f7816d4SBarry Smith     alpha11 = x[16*i+10];
20842f7816d4SBarry Smith     alpha12 = x[16*i+11];
20852f7816d4SBarry Smith     alpha13 = x[16*i+12];
20862f7816d4SBarry Smith     alpha14 = x[16*i+13];
20872f7816d4SBarry Smith     alpha15 = x[16*i+14];
20882f7816d4SBarry Smith     alpha16 = x[16*i+15];
20892f7816d4SBarry Smith     while (n-->0) {
20902f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
20912f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
20922f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
20932f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
20942f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
20952f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
20962f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
20972f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
20982f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
20992f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
21002f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
21012f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
21022f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
21032f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
21042f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
21052f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
21062f7816d4SBarry Smith       idx++; v++;
21072f7816d4SBarry Smith     }
21082f7816d4SBarry Smith   }
2109efee365bSSatish Balay   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
21101ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
21111ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
211266ed3db0SBarry Smith   PetscFunctionReturn(0);
211366ed3db0SBarry Smith }
211466ed3db0SBarry Smith 
2115*ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2116*ed1c418cSBarry Smith #undef __FUNCT__
2117*ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2118*ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2119*ed1c418cSBarry Smith {
2120*ed1c418cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2121*ed1c418cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2122*ed1c418cSBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2123*ed1c418cSBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2124*ed1c418cSBarry Smith   PetscErrorCode ierr;
2125*ed1c418cSBarry Smith   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
2126*ed1c418cSBarry Smith   PetscInt       n,i,jrow,j;
2127*ed1c418cSBarry Smith 
2128*ed1c418cSBarry Smith   PetscFunctionBegin;
2129*ed1c418cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2130*ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2131*ed1c418cSBarry Smith   idx  = a->j;
2132*ed1c418cSBarry Smith   v    = a->a;
2133*ed1c418cSBarry Smith   ii   = a->i;
2134*ed1c418cSBarry Smith 
2135*ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2136*ed1c418cSBarry Smith     jrow = ii[i];
2137*ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2138*ed1c418cSBarry Smith     sum1  = 0.0;
2139*ed1c418cSBarry Smith     sum2  = 0.0;
2140*ed1c418cSBarry Smith     sum3  = 0.0;
2141*ed1c418cSBarry Smith     sum4  = 0.0;
2142*ed1c418cSBarry Smith     sum5  = 0.0;
2143*ed1c418cSBarry Smith     sum6  = 0.0;
2144*ed1c418cSBarry Smith     sum7  = 0.0;
2145*ed1c418cSBarry Smith     sum8  = 0.0;
2146*ed1c418cSBarry Smith     sum9  = 0.0;
2147*ed1c418cSBarry Smith     sum10 = 0.0;
2148*ed1c418cSBarry Smith     sum11 = 0.0;
2149*ed1c418cSBarry Smith     sum12 = 0.0;
2150*ed1c418cSBarry Smith     sum13 = 0.0;
2151*ed1c418cSBarry Smith     sum14 = 0.0;
2152*ed1c418cSBarry Smith     sum15 = 0.0;
2153*ed1c418cSBarry Smith     sum16 = 0.0;
2154*ed1c418cSBarry Smith     sum17 = 0.0;
2155*ed1c418cSBarry Smith     sum18 = 0.0;
2156*ed1c418cSBarry Smith     nonzerorow += (n>0);
2157*ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2158*ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2159*ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2160*ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2161*ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2162*ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2163*ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2164*ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2165*ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2166*ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2167*ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2168*ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2169*ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2170*ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2171*ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2172*ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2173*ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2174*ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2175*ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2176*ed1c418cSBarry Smith       jrow++;
2177*ed1c418cSBarry Smith      }
2178*ed1c418cSBarry Smith     y[18*i]    = sum1;
2179*ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2180*ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2181*ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2182*ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2183*ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2184*ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2185*ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2186*ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2187*ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2188*ed1c418cSBarry Smith     y[18*i+10] = sum11;
2189*ed1c418cSBarry Smith     y[18*i+11] = sum12;
2190*ed1c418cSBarry Smith     y[18*i+12] = sum13;
2191*ed1c418cSBarry Smith     y[18*i+13] = sum14;
2192*ed1c418cSBarry Smith     y[18*i+14] = sum15;
2193*ed1c418cSBarry Smith     y[18*i+15] = sum16;
2194*ed1c418cSBarry Smith     y[18*i+16] = sum17;
2195*ed1c418cSBarry Smith     y[18*i+17] = sum18;
2196*ed1c418cSBarry Smith   }
2197*ed1c418cSBarry Smith 
2198*ed1c418cSBarry Smith   ierr = PetscLogFlops(36*a->nz - 18*nonzerorow);CHKERRQ(ierr);
2199*ed1c418cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2200*ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2201*ed1c418cSBarry Smith   PetscFunctionReturn(0);
2202*ed1c418cSBarry Smith }
2203*ed1c418cSBarry Smith 
2204*ed1c418cSBarry Smith #undef __FUNCT__
2205*ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2206*ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2207*ed1c418cSBarry Smith {
2208*ed1c418cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2209*ed1c418cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2210*ed1c418cSBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2211*ed1c418cSBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2212*ed1c418cSBarry Smith   PetscErrorCode ierr;
2213*ed1c418cSBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2214*ed1c418cSBarry Smith 
2215*ed1c418cSBarry Smith   PetscFunctionBegin;
2216*ed1c418cSBarry Smith   ierr = VecSet(yy,zero);CHKERRQ(ierr);
2217*ed1c418cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2218*ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2219*ed1c418cSBarry Smith 
2220*ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2221*ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2222*ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2223*ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2224*ed1c418cSBarry Smith     alpha1  = x[18*i];
2225*ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2226*ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2227*ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2228*ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2229*ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2230*ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2231*ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2232*ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2233*ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2234*ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2235*ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2236*ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2237*ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2238*ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2239*ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2240*ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2241*ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2242*ed1c418cSBarry Smith     while (n-->0) {
2243*ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2244*ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2245*ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2246*ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2247*ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2248*ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2249*ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2250*ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2251*ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2252*ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2253*ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2254*ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2255*ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2256*ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2257*ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2258*ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2259*ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2260*ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2261*ed1c418cSBarry Smith       idx++; v++;
2262*ed1c418cSBarry Smith     }
2263*ed1c418cSBarry Smith   }
2264*ed1c418cSBarry Smith   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
2265*ed1c418cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2266*ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2267*ed1c418cSBarry Smith   PetscFunctionReturn(0);
2268*ed1c418cSBarry Smith }
2269*ed1c418cSBarry Smith 
2270*ed1c418cSBarry Smith #undef __FUNCT__
2271*ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2272*ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2273*ed1c418cSBarry Smith {
2274*ed1c418cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2275*ed1c418cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2276*ed1c418cSBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2277*ed1c418cSBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2278*ed1c418cSBarry Smith   PetscErrorCode ierr;
2279*ed1c418cSBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
2280*ed1c418cSBarry Smith   PetscInt       n,i,jrow,j;
2281*ed1c418cSBarry Smith 
2282*ed1c418cSBarry Smith   PetscFunctionBegin;
2283*ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2284*ed1c418cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2285*ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2286*ed1c418cSBarry Smith   idx  = a->j;
2287*ed1c418cSBarry Smith   v    = a->a;
2288*ed1c418cSBarry Smith   ii   = a->i;
2289*ed1c418cSBarry Smith 
2290*ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2291*ed1c418cSBarry Smith     jrow = ii[i];
2292*ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2293*ed1c418cSBarry Smith     sum1  = 0.0;
2294*ed1c418cSBarry Smith     sum2  = 0.0;
2295*ed1c418cSBarry Smith     sum3  = 0.0;
2296*ed1c418cSBarry Smith     sum4  = 0.0;
2297*ed1c418cSBarry Smith     sum5  = 0.0;
2298*ed1c418cSBarry Smith     sum6  = 0.0;
2299*ed1c418cSBarry Smith     sum7  = 0.0;
2300*ed1c418cSBarry Smith     sum8  = 0.0;
2301*ed1c418cSBarry Smith     sum9  = 0.0;
2302*ed1c418cSBarry Smith     sum10 = 0.0;
2303*ed1c418cSBarry Smith     sum11 = 0.0;
2304*ed1c418cSBarry Smith     sum12 = 0.0;
2305*ed1c418cSBarry Smith     sum13 = 0.0;
2306*ed1c418cSBarry Smith     sum14 = 0.0;
2307*ed1c418cSBarry Smith     sum15 = 0.0;
2308*ed1c418cSBarry Smith     sum16 = 0.0;
2309*ed1c418cSBarry Smith     sum17 = 0.0;
2310*ed1c418cSBarry Smith     sum18 = 0.0;
2311*ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2312*ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2313*ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2314*ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2315*ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2316*ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2317*ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2318*ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2319*ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2320*ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2321*ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2322*ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2323*ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2324*ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2325*ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2326*ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2327*ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2328*ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2329*ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2330*ed1c418cSBarry Smith       jrow++;
2331*ed1c418cSBarry Smith      }
2332*ed1c418cSBarry Smith     y[18*i]    += sum1;
2333*ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2334*ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2335*ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2336*ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2337*ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2338*ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2339*ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2340*ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2341*ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2342*ed1c418cSBarry Smith     y[18*i+10] += sum11;
2343*ed1c418cSBarry Smith     y[18*i+11] += sum12;
2344*ed1c418cSBarry Smith     y[18*i+12] += sum13;
2345*ed1c418cSBarry Smith     y[18*i+13] += sum14;
2346*ed1c418cSBarry Smith     y[18*i+14] += sum15;
2347*ed1c418cSBarry Smith     y[18*i+15] += sum16;
2348*ed1c418cSBarry Smith     y[18*i+16] += sum17;
2349*ed1c418cSBarry Smith     y[18*i+17] += sum18;
2350*ed1c418cSBarry Smith   }
2351*ed1c418cSBarry Smith 
2352*ed1c418cSBarry Smith   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
2353*ed1c418cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2354*ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2355*ed1c418cSBarry Smith   PetscFunctionReturn(0);
2356*ed1c418cSBarry Smith }
2357*ed1c418cSBarry Smith 
2358*ed1c418cSBarry Smith #undef __FUNCT__
2359*ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2360*ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2361*ed1c418cSBarry Smith {
2362*ed1c418cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2363*ed1c418cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2364*ed1c418cSBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2365*ed1c418cSBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2366*ed1c418cSBarry Smith   PetscErrorCode ierr;
2367*ed1c418cSBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2368*ed1c418cSBarry Smith 
2369*ed1c418cSBarry Smith   PetscFunctionBegin;
2370*ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2371*ed1c418cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2372*ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2373*ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2374*ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2375*ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2376*ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2377*ed1c418cSBarry Smith     alpha1 = x[18*i];
2378*ed1c418cSBarry Smith     alpha2 = x[18*i+1];
2379*ed1c418cSBarry Smith     alpha3 = x[18*i+2];
2380*ed1c418cSBarry Smith     alpha4 = x[18*i+3];
2381*ed1c418cSBarry Smith     alpha5 = x[18*i+4];
2382*ed1c418cSBarry Smith     alpha6 = x[18*i+5];
2383*ed1c418cSBarry Smith     alpha7 = x[18*i+6];
2384*ed1c418cSBarry Smith     alpha8 = x[18*i+7];
2385*ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2386*ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2387*ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2388*ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2389*ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2390*ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2391*ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2392*ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2393*ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2394*ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2395*ed1c418cSBarry Smith     while (n-->0) {
2396*ed1c418cSBarry Smith       y[18*(*idx)]   += alpha1*(*v);
2397*ed1c418cSBarry Smith       y[18*(*idx)+1] += alpha2*(*v);
2398*ed1c418cSBarry Smith       y[18*(*idx)+2] += alpha3*(*v);
2399*ed1c418cSBarry Smith       y[18*(*idx)+3] += alpha4*(*v);
2400*ed1c418cSBarry Smith       y[18*(*idx)+4] += alpha5*(*v);
2401*ed1c418cSBarry Smith       y[18*(*idx)+5] += alpha6*(*v);
2402*ed1c418cSBarry Smith       y[18*(*idx)+6] += alpha7*(*v);
2403*ed1c418cSBarry Smith       y[18*(*idx)+7] += alpha8*(*v);
2404*ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2405*ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2406*ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2407*ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2408*ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2409*ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2410*ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2411*ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2412*ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2413*ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2414*ed1c418cSBarry Smith       idx++; v++;
2415*ed1c418cSBarry Smith     }
2416*ed1c418cSBarry Smith   }
2417*ed1c418cSBarry Smith   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
2418*ed1c418cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2419*ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2420*ed1c418cSBarry Smith   PetscFunctionReturn(0);
2421*ed1c418cSBarry Smith }
2422*ed1c418cSBarry Smith 
2423f4a53059SBarry Smith /*===================================================================================*/
24244a2ae208SSatish Balay #undef __FUNCT__
24254a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2426dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2427f4a53059SBarry Smith {
24284cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2429dfbe8321SBarry Smith   PetscErrorCode ierr;
2430f4a53059SBarry Smith 
2431b24ad042SBarry Smith   PetscFunctionBegin;
2432f4a53059SBarry Smith   /* start the scatter */
2433ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
24344cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2435ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
24364cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2437f4a53059SBarry Smith   PetscFunctionReturn(0);
2438f4a53059SBarry Smith }
2439f4a53059SBarry Smith 
24404a2ae208SSatish Balay #undef __FUNCT__
24414a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2442dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
24434cb9d9b8SBarry Smith {
24444cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2445dfbe8321SBarry Smith   PetscErrorCode ierr;
2446b24ad042SBarry Smith 
24474cb9d9b8SBarry Smith   PetscFunctionBegin;
24484cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
24494cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2450ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2451ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
24524cb9d9b8SBarry Smith   PetscFunctionReturn(0);
24534cb9d9b8SBarry Smith }
24544cb9d9b8SBarry Smith 
24554a2ae208SSatish Balay #undef __FUNCT__
24564a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2457dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
24584cb9d9b8SBarry Smith {
24594cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2460dfbe8321SBarry Smith   PetscErrorCode ierr;
24614cb9d9b8SBarry Smith 
2462b24ad042SBarry Smith   PetscFunctionBegin;
24634cb9d9b8SBarry Smith   /* start the scatter */
2464ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2465d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2466ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2467717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
24684cb9d9b8SBarry Smith   PetscFunctionReturn(0);
24694cb9d9b8SBarry Smith }
24704cb9d9b8SBarry Smith 
24714a2ae208SSatish Balay #undef __FUNCT__
24724a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2473dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
24744cb9d9b8SBarry Smith {
24754cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2476dfbe8321SBarry Smith   PetscErrorCode ierr;
2477b24ad042SBarry Smith 
24784cb9d9b8SBarry Smith   PetscFunctionBegin;
24794cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2480ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2481d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2482ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
24834cb9d9b8SBarry Smith   PetscFunctionReturn(0);
24844cb9d9b8SBarry Smith }
24854cb9d9b8SBarry Smith 
248695ddefa2SHong Zhang /* ----------------------------------------------------------------*/
24879c4fc4c7SBarry Smith #undef __FUNCT__
24887ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
24897ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
24907ba1a0bfSKris Buschelman {
24917ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
24927ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2493a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
24947ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
24957ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
24967ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
24977ba1a0bfSKris Buschelman   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
24987ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2499899cda47SBarry Smith   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
25007ba1a0bfSKris Buschelman   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
25017ba1a0bfSKris Buschelman   MatScalar          *ca;
25027ba1a0bfSKris Buschelman 
25037ba1a0bfSKris Buschelman   PetscFunctionBegin;
25047ba1a0bfSKris Buschelman   /* Start timer */
25057ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
25067ba1a0bfSKris Buschelman 
25077ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
25087ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
25097ba1a0bfSKris Buschelman 
25107ba1a0bfSKris Buschelman   cn = pn*ppdof;
25117ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
25127ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
25137ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
25147ba1a0bfSKris Buschelman   ci[0] = 0;
25157ba1a0bfSKris Buschelman 
25167ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
25177ba1a0bfSKris Buschelman   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
25187ba1a0bfSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
25197ba1a0bfSKris Buschelman   ptasparserow = ptadenserow  + an;
25207ba1a0bfSKris Buschelman   denserow     = ptasparserow + an;
25217ba1a0bfSKris Buschelman   sparserow    = denserow     + cn;
25227ba1a0bfSKris Buschelman 
25237ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
25247ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
25257ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2526a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
25277ba1a0bfSKris Buschelman   current_space = free_space;
25287ba1a0bfSKris Buschelman 
25297ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
25307ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
25317ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
25327ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
25337ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
25347ba1a0bfSKris Buschelman       ptanzi = 0;
25357ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
25367ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
25377ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
25387ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
25397ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
25407ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
25417ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
25427ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
25437ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
25447ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
25457ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
25467ba1a0bfSKris Buschelman           }
25477ba1a0bfSKris Buschelman         }
25487ba1a0bfSKris Buschelman       }
25497ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
25507ba1a0bfSKris Buschelman       ptaj = ptasparserow;
25517ba1a0bfSKris Buschelman       cnzi   = 0;
25527ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
25537ba1a0bfSKris Buschelman         /* Get offset within block of P */
25547ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
25557ba1a0bfSKris Buschelman         /* Get block row of P */
25567ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
25577ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
25587ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
25597ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
25607ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
25617ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
25627ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
25637ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
25647ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
25657ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
25667ba1a0bfSKris Buschelman           }
25677ba1a0bfSKris Buschelman         }
25687ba1a0bfSKris Buschelman       }
25697ba1a0bfSKris Buschelman 
25707ba1a0bfSKris Buschelman       /* sort sparserow */
25717ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
25727ba1a0bfSKris Buschelman 
25737ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
25747ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
25757ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
2576a1a86e44SBarry Smith         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
25777ba1a0bfSKris Buschelman       }
25787ba1a0bfSKris Buschelman 
25797ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
25807ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
25817ba1a0bfSKris Buschelman       current_space->array           += cnzi;
25827ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
25837ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
25847ba1a0bfSKris Buschelman 
25857ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
25867ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
25877ba1a0bfSKris Buschelman       }
25887ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
25897ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
25907ba1a0bfSKris Buschelman       }
25917ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
25927ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
25937ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
25947ba1a0bfSKris Buschelman     }
25957ba1a0bfSKris Buschelman   }
25967ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
25977ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
25987ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
25997ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2600a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
26017ba1a0bfSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
26027ba1a0bfSKris Buschelman 
26037ba1a0bfSKris Buschelman   /* Allocate space for ca */
26047ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
26057ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
26067ba1a0bfSKris Buschelman 
26077ba1a0bfSKris Buschelman   /* put together the new matrix */
26087adad957SLisandro Dalcin   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
26097ba1a0bfSKris Buschelman 
26107ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
26117ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
26127ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ *)((*C)->data);
2613e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2614e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
26157ba1a0bfSKris Buschelman   c->nonew   = 0;
26167ba1a0bfSKris Buschelman 
26177ba1a0bfSKris Buschelman   /* Clean up. */
26187ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
26197ba1a0bfSKris Buschelman 
26207ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
26217ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
26227ba1a0bfSKris Buschelman }
26237ba1a0bfSKris Buschelman 
26247ba1a0bfSKris Buschelman #undef __FUNCT__
26257ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
26267ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
26277ba1a0bfSKris Buschelman {
26287ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
26297ba1a0bfSKris Buschelman   PetscErrorCode ierr;
26307ba1a0bfSKris Buschelman   PetscInt       flops=0;
26317ba1a0bfSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
26327ba1a0bfSKris Buschelman   Mat            P=pp->AIJ;
26337ba1a0bfSKris Buschelman   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
26347ba1a0bfSKris Buschelman   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
26357ba1a0bfSKris Buschelman   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
26367ba1a0bfSKris Buschelman   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
26377ba1a0bfSKris Buschelman   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2638899cda47SBarry Smith   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
26397ba1a0bfSKris Buschelman   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
26407ba1a0bfSKris Buschelman   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
26417ba1a0bfSKris Buschelman 
26427ba1a0bfSKris Buschelman   PetscFunctionBegin;
26437ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
26447ba1a0bfSKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
26457ba1a0bfSKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
26467ba1a0bfSKris Buschelman 
26477ba1a0bfSKris Buschelman   apj      = (PetscInt *)(apa + cn);
26487ba1a0bfSKris Buschelman   apjdense = apj + cn;
26497ba1a0bfSKris Buschelman 
26507ba1a0bfSKris Buschelman   /* Clear old values in C */
26517ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
26527ba1a0bfSKris Buschelman 
26537ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
26547ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
26557ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
26567ba1a0bfSKris Buschelman     apnzj = 0;
26577ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
26587ba1a0bfSKris Buschelman       /* Get offset within block of P */
26597ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
26607ba1a0bfSKris Buschelman       /* Get block row of P */
26617ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
26627ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
26637ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
26647ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
26657ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
26667ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
26677ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
26687ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
26697ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
26707ba1a0bfSKris Buschelman         }
26717ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
26727ba1a0bfSKris Buschelman       }
26737ba1a0bfSKris Buschelman       flops += 2*pnzj;
26747ba1a0bfSKris Buschelman       aa++;
26757ba1a0bfSKris Buschelman     }
26767ba1a0bfSKris Buschelman 
26777ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
26787ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
26797ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
26807ba1a0bfSKris Buschelman 
26817ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
26827ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
26837ba1a0bfSKris Buschelman     pshift  = i%ppdof;
26847ba1a0bfSKris Buschelman     poffset = pi[prow];
26857ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
26867ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
26877ba1a0bfSKris Buschelman     pJ   = pj+poffset;
26887ba1a0bfSKris Buschelman     pA   = pa+poffset;
26897ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
26907ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
26917ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
26927ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
26937ba1a0bfSKris Buschelman       pJ++;
26947ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
26957ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
26967ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
26977ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
26987ba1a0bfSKris Buschelman         }
26997ba1a0bfSKris Buschelman       }
27007ba1a0bfSKris Buschelman       flops += 2*apnzj;
27017ba1a0bfSKris Buschelman       pA++;
27027ba1a0bfSKris Buschelman     }
27037ba1a0bfSKris Buschelman 
27047ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
27057ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
27067ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
27077ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
27087ba1a0bfSKris Buschelman     }
27097ba1a0bfSKris Buschelman   }
27107ba1a0bfSKris Buschelman 
27117ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
27127ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27137ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27147ba1a0bfSKris Buschelman   ierr = PetscFree(apa);CHKERRQ(ierr);
27157ba1a0bfSKris Buschelman   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
271695ddefa2SHong Zhang   PetscFunctionReturn(0);
271795ddefa2SHong Zhang }
27187ba1a0bfSKris Buschelman 
271995ddefa2SHong Zhang #undef __FUNCT__
272095ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
272195ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
272295ddefa2SHong Zhang {
272395ddefa2SHong Zhang   PetscErrorCode    ierr;
272495ddefa2SHong Zhang 
272595ddefa2SHong Zhang   PetscFunctionBegin;
272695ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
272795ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
272895ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
272995ddefa2SHong Zhang   PetscFunctionReturn(0);
273095ddefa2SHong Zhang }
273195ddefa2SHong Zhang 
273295ddefa2SHong Zhang #undef __FUNCT__
273395ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
273495ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
273595ddefa2SHong Zhang {
273695ddefa2SHong Zhang   PetscFunctionBegin;
273795ddefa2SHong Zhang   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
27387ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
27397ba1a0bfSKris Buschelman }
27407ba1a0bfSKris Buschelman 
2741be1d678aSKris Buschelman EXTERN_C_BEGIN
27427ba1a0bfSKris Buschelman #undef __FUNCT__
27430fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2744f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
27459c4fc4c7SBarry Smith {
27469c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2747ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
27489c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
27499c4fc4c7SBarry Smith   PetscErrorCode    ierr;
27500fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2751ba8c8a56SBarry Smith   PetscInt          *cols;
2752ba8c8a56SBarry Smith   PetscScalar       *vals;
27539c4fc4c7SBarry Smith 
27549c4fc4c7SBarry Smith   PetscFunctionBegin;
27559c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
27567c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
27579c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
27589c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
27590fd73130SBarry Smith     for (j=0; j<dof; j++) {
27600fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
27619c4fc4c7SBarry Smith     }
27629c4fc4c7SBarry Smith   }
2763ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
27649c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
27659c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
27669c4fc4c7SBarry Smith   ii   = 0;
27679c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
2768ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
27690fd73130SBarry Smith     for (j=0; j<dof; j++) {
27709c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
27710fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
27729c4fc4c7SBarry Smith       }
2773ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
27749c4fc4c7SBarry Smith       ii++;
27759c4fc4c7SBarry Smith     }
2776ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
27779c4fc4c7SBarry Smith   }
27789c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
2779ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2780ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2781ceb03754SKris Buschelman 
2782ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
27838ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2784c3d102feSKris Buschelman   } else {
2785ceb03754SKris Buschelman     *newmat = B;
2786c3d102feSKris Buschelman   }
27879c4fc4c7SBarry Smith   PetscFunctionReturn(0);
27889c4fc4c7SBarry Smith }
2789be1d678aSKris Buschelman EXTERN_C_END
27909c4fc4c7SBarry Smith 
27910fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
2792be1d678aSKris Buschelman 
2793be1d678aSKris Buschelman EXTERN_C_BEGIN
27940fd73130SBarry Smith #undef __FUNCT__
27950fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2796f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
27970fd73130SBarry Smith {
27980fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2799ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
28000fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
28010fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
28020fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
28030fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2804910ba992SMatthew Knepley   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2805910ba992SMatthew Knepley   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
28060fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
28070fd73130SBarry Smith   PetscErrorCode    ierr;
28080fd73130SBarry Smith   PetscScalar       *vals,*ovals;
28090fd73130SBarry Smith 
28100fd73130SBarry Smith   PetscFunctionBegin;
2811899cda47SBarry Smith   ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr);
2812899cda47SBarry Smith   for (i=0; i<A->rmap.n/dof; i++) {
28130fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
28140fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
28150fd73130SBarry Smith     for (j=0; j<dof; j++) {
28160fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
28170fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
28180fd73130SBarry Smith     }
28190fd73130SBarry Smith   }
28207adad957SLisandro Dalcin   ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr);
28210fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
28220fd73130SBarry Smith 
28237a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
28249b5a856fSSatish Balay   rstart = dof*maij->A->rmap.rstart;
28259b5a856fSSatish Balay   cstart = dof*maij->A->cmap.rstart;
28260fd73130SBarry Smith   garray = mpiaij->garray;
28270fd73130SBarry Smith 
28280fd73130SBarry Smith   ii = rstart;
2829899cda47SBarry Smith   for (i=0; i<A->rmap.n/dof; i++) {
28300fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
28310fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
28320fd73130SBarry Smith     for (j=0; j<dof; j++) {
28330fd73130SBarry Smith       for (k=0; k<ncols; k++) {
28340fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
28350fd73130SBarry Smith       }
28360fd73130SBarry Smith       for (k=0; k<oncols; k++) {
28370fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
28380fd73130SBarry Smith       }
2839ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2840ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
28410fd73130SBarry Smith       ii++;
28420fd73130SBarry Smith     }
28430fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
28440fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
28450fd73130SBarry Smith   }
28460fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
28470fd73130SBarry Smith 
2848ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2849ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2850ceb03754SKris Buschelman 
2851ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
28527adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
28537adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
28548ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
28557adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2856c3d102feSKris Buschelman   } else {
2857ceb03754SKris Buschelman     *newmat = B;
2858c3d102feSKris Buschelman   }
28590fd73130SBarry Smith   PetscFunctionReturn(0);
28600fd73130SBarry Smith }
2861be1d678aSKris Buschelman EXTERN_C_END
28620fd73130SBarry Smith 
28630fd73130SBarry Smith 
2864bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
28655983afb6SSatish Balay /*MC
28660bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
28670bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
28680bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
28690bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
28700bad9183SKris Buschelman 
28710bad9183SKris Buschelman   Operations provided:
28720fd73130SBarry Smith + MatMult
28730bad9183SKris Buschelman . MatMultTranspose
28740bad9183SKris Buschelman . MatMultAdd
28750bad9183SKris Buschelman . MatMultTransposeAdd
28760fd73130SBarry Smith - MatView
28770bad9183SKris Buschelman 
28780bad9183SKris Buschelman   Level: advanced
28790bad9183SKris Buschelman 
28800bad9183SKris Buschelman M*/
28814a2ae208SSatish Balay #undef __FUNCT__
28824a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ"
2883be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
288482b1193eSBarry Smith {
2885dfbe8321SBarry Smith   PetscErrorCode ierr;
2886b24ad042SBarry Smith   PetscMPIInt    size;
2887b24ad042SBarry Smith   PetscInt       n;
28884cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
288982b1193eSBarry Smith   Mat            B;
289082b1193eSBarry Smith 
289182b1193eSBarry Smith   PetscFunctionBegin;
2892d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2893d72c5c08SBarry Smith 
2894d72c5c08SBarry Smith   if (dof == 1) {
2895d72c5c08SBarry Smith     *maij = A;
2896d72c5c08SBarry Smith   } else {
28977adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2898899cda47SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr);
2899cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
2900d72c5c08SBarry Smith 
29017adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2902f4a53059SBarry Smith     if (size == 1) {
2903b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
29044cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
29050fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
2906b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2907b9b97703SBarry Smith       b->dof = dof;
29084cb9d9b8SBarry Smith       b->AIJ = A;
2909d72c5c08SBarry Smith       if (dof == 2) {
2910cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
2911cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2912cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2913cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2914bcc973b7SBarry Smith       } else if (dof == 3) {
2915bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
2916bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2917bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2918bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2919bcc973b7SBarry Smith       } else if (dof == 4) {
2920bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
2921bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2922bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2923bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2924f9fae5adSBarry Smith       } else if (dof == 5) {
2925f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
2926f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2927f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2928f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
29296cd98798SBarry Smith       } else if (dof == 6) {
29306cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
29316cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
29326cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
29336cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2934ed8eea03SBarry Smith       } else if (dof == 7) {
2935ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
2936ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2937ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2938ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
293966ed3db0SBarry Smith       } else if (dof == 8) {
294066ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
294166ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
294266ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
294366ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
29442b692628SMatthew Knepley       } else if (dof == 9) {
29452b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
29462b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
29472b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
29482b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2949b9cda22cSBarry Smith       } else if (dof == 10) {
2950b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
2951b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2952b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2953b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
29542f7816d4SBarry Smith       } else if (dof == 16) {
29552f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
29562f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
29572f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
29582f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2959*ed1c418cSBarry Smith       } else if (dof == 18) {
2960*ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
2961*ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
2962*ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
2963*ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
296482b1193eSBarry Smith       } else {
296577431f27SBarry Smith         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
296682b1193eSBarry Smith       }
29677ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
29687ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29699c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2970f4a53059SBarry Smith     } else {
2971f4a53059SBarry Smith       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2972f4a53059SBarry Smith       IS         from,to;
2973f4a53059SBarry Smith       Vec        gvec;
2974b24ad042SBarry Smith       PetscInt   *garray,i;
2975f4a53059SBarry Smith 
2976b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2977d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
29780fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
2979b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2980b9b97703SBarry Smith       b->dof = dof;
2981b9b97703SBarry Smith       b->A   = A;
29824cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
29834cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
29844cb9d9b8SBarry Smith 
2985f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2986f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
298713288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2988f4a53059SBarry Smith 
2989f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
2990b24ad042SBarry Smith       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2991f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
29927adad957SLisandro Dalcin       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr);
2993f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
2994f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2995f4a53059SBarry Smith 
2996f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
29977adad957SLisandro Dalcin       ierr = VecCreateMPI(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr);
299813288a74SBarry Smith       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2999f4a53059SBarry Smith 
3000f4a53059SBarry Smith       /* generate the scatter context */
3001f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3002f4a53059SBarry Smith 
3003f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
3004f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
3005f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
3006f4a53059SBarry Smith 
3007f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
30084cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
30094cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
30104cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
301195ddefa2SHong Zhang       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
301295ddefa2SHong Zhang       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
30130fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3014f4a53059SBarry Smith     }
3015cd3bbe55SBarry Smith     *maij = B;
30160fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
3017d72c5c08SBarry Smith   }
301882b1193eSBarry Smith   PetscFunctionReturn(0);
301982b1193eSBarry Smith }
302082b1193eSBarry Smith 
302182b1193eSBarry Smith 
302282b1193eSBarry Smith 
302382b1193eSBarry Smith 
302482b1193eSBarry Smith 
302582b1193eSBarry Smith 
302682b1193eSBarry Smith 
302782b1193eSBarry Smith 
302882b1193eSBarry Smith 
302982b1193eSBarry Smith 
303082b1193eSBarry Smith 
303182b1193eSBarry Smith 
3032