xref: /petsc/src/mat/impls/aij/seq/aijperm/aijperm.c (revision 938d9b047269c8ce7cdcee3b5e5291f4da012931)
1*938d9b04SRichard Tran Mills 
2*938d9b04SRichard Tran Mills /*
3*938d9b04SRichard Tran Mills   Defines basic operations for the MATSEQAIJPERM matrix class.
4*938d9b04SRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
5*938d9b04SRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but augments
6*938d9b04SRichard Tran Mills   it with some permutation information that enables some operations
7*938d9b04SRichard Tran Mills   to be more vectorizable.  A physically rearranged copy of the matrix
8*938d9b04SRichard Tran Mills   may be stored if the user desires.
9*938d9b04SRichard Tran Mills 
10*938d9b04SRichard Tran Mills   Eventually a variety of permutations may be supported.
11*938d9b04SRichard Tran Mills */
12*938d9b04SRichard Tran Mills 
13*938d9b04SRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
14*938d9b04SRichard Tran Mills 
15*938d9b04SRichard Tran Mills #define NDIM 512
16*938d9b04SRichard Tran Mills /* NDIM specifies how many rows at a time we should work with when
17*938d9b04SRichard Tran Mills  * performing the vectorized mat-vec.  This depends on various factors
18*938d9b04SRichard Tran Mills  * such as vector register length, etc., and I really need to add a
19*938d9b04SRichard Tran Mills  * way for the user (or the library) to tune this.  I'm setting it to
20*938d9b04SRichard Tran Mills  * 512 for now since that is what Ed D'Azevedo was using in his Fortran
21*938d9b04SRichard Tran Mills  * routines. */
22*938d9b04SRichard Tran Mills 
23*938d9b04SRichard Tran Mills typedef struct {
24*938d9b04SRichard Tran Mills   PetscObjectState nonzerostate; /* used to determine if the nonzero structure has changed and hence the permutations need updating */
25*938d9b04SRichard Tran Mills 
26*938d9b04SRichard Tran Mills   PetscInt         ngroup;
27*938d9b04SRichard Tran Mills   PetscInt         *xgroup;
28*938d9b04SRichard Tran Mills   /* Denotes where groups of rows with same number of nonzeros
29*938d9b04SRichard Tran Mills    * begin and end, i.e., xgroup[i] gives us the position in iperm[]
30*938d9b04SRichard Tran Mills    * where the ith group begins. */
31*938d9b04SRichard Tran Mills 
32*938d9b04SRichard Tran Mills   PetscInt         *nzgroup; /*  how many nonzeros each row that is a member of group i has. */
33*938d9b04SRichard Tran Mills   PetscInt         *iperm;  /* The permutation vector. */
34*938d9b04SRichard Tran Mills 
35*938d9b04SRichard Tran Mills   /* Some of this stuff is for Ed's recursive triangular solve.
36*938d9b04SRichard Tran Mills    * I'm not sure what I need yet. */
37*938d9b04SRichard Tran Mills   PetscInt         blocksize;
38*938d9b04SRichard Tran Mills   PetscInt         nstep;
39*938d9b04SRichard Tran Mills   PetscInt         *jstart_list;
40*938d9b04SRichard Tran Mills   PetscInt         *jend_list;
41*938d9b04SRichard Tran Mills   PetscInt         *action_list;
42*938d9b04SRichard Tran Mills   PetscInt         *ngroup_list;
43*938d9b04SRichard Tran Mills   PetscInt         **ipointer_list;
44*938d9b04SRichard Tran Mills   PetscInt         **xgroup_list;
45*938d9b04SRichard Tran Mills   PetscInt         **nzgroup_list;
46*938d9b04SRichard Tran Mills   PetscInt         **iperm_list;
47*938d9b04SRichard Tran Mills } Mat_SeqAIJPERM;
48*938d9b04SRichard Tran Mills 
49*938d9b04SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
50*938d9b04SRichard Tran Mills {
51*938d9b04SRichard Tran Mills   /* This routine is only called to convert a MATAIJPERM to its base PETSc type, */
52*938d9b04SRichard Tran Mills   /* so we will ignore 'MatType type'. */
53*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
54*938d9b04SRichard Tran Mills   Mat            B       = *newmat;
55*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM *aijperm=(Mat_SeqAIJPERM*)A->spptr;
56*938d9b04SRichard Tran Mills 
57*938d9b04SRichard Tran Mills   PetscFunctionBegin;
58*938d9b04SRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
59*938d9b04SRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
60*938d9b04SRichard Tran Mills     aijperm=(Mat_SeqAIJPERM*)B->spptr;
61*938d9b04SRichard Tran Mills   }
62*938d9b04SRichard Tran Mills 
63*938d9b04SRichard Tran Mills   /* Reset the original function pointers. */
64*938d9b04SRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
65*938d9b04SRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJ;
66*938d9b04SRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJ;
67*938d9b04SRichard Tran Mills   B->ops->mult        = MatMult_SeqAIJ;
68*938d9b04SRichard Tran Mills   B->ops->multadd     = MatMultAdd_SeqAIJ;
69*938d9b04SRichard Tran Mills 
70*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijperm_seqaij_C",NULL);CHKERRQ(ierr);
71*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijperm_C",NULL);CHKERRQ(ierr);
72*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijperm_C",NULL);CHKERRQ(ierr);
73*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijperm_C",NULL);CHKERRQ(ierr);
74*938d9b04SRichard Tran Mills 
75*938d9b04SRichard Tran Mills   /* Free everything in the Mat_SeqAIJPERM data structure.*/
76*938d9b04SRichard Tran Mills   ierr = PetscFree(aijperm->xgroup);CHKERRQ(ierr);
77*938d9b04SRichard Tran Mills   ierr = PetscFree(aijperm->nzgroup);CHKERRQ(ierr);
78*938d9b04SRichard Tran Mills   ierr = PetscFree(aijperm->iperm);CHKERRQ(ierr);
79*938d9b04SRichard Tran Mills   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
80*938d9b04SRichard Tran Mills 
81*938d9b04SRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
82*938d9b04SRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
83*938d9b04SRichard Tran Mills 
84*938d9b04SRichard Tran Mills   *newmat = B;
85*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
86*938d9b04SRichard Tran Mills }
87*938d9b04SRichard Tran Mills 
88*938d9b04SRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJPERM(Mat A)
89*938d9b04SRichard Tran Mills {
90*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
91*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
92*938d9b04SRichard Tran Mills 
93*938d9b04SRichard Tran Mills   PetscFunctionBegin;
94*938d9b04SRichard Tran Mills   if (aijperm) {
95*938d9b04SRichard Tran Mills     /* If MatHeaderMerge() was used then this SeqAIJPERM matrix will not have a spprt. */
96*938d9b04SRichard Tran Mills     ierr = PetscFree(aijperm->xgroup);CHKERRQ(ierr);
97*938d9b04SRichard Tran Mills     ierr = PetscFree(aijperm->nzgroup);CHKERRQ(ierr);
98*938d9b04SRichard Tran Mills     ierr = PetscFree(aijperm->iperm);CHKERRQ(ierr);
99*938d9b04SRichard Tran Mills     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
100*938d9b04SRichard Tran Mills   }
101*938d9b04SRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
102*938d9b04SRichard Tran Mills    * to destroy everything that remains. */
103*938d9b04SRichard Tran Mills   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
104*938d9b04SRichard Tran Mills   /* Note that I don't call MatSetType().  I believe this is because that
105*938d9b04SRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
106*938d9b04SRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
107*938d9b04SRichard Tran Mills   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
108*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
109*938d9b04SRichard Tran Mills }
110*938d9b04SRichard Tran Mills 
111*938d9b04SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJPERM(Mat A, MatDuplicateOption op, Mat *M)
112*938d9b04SRichard Tran Mills {
113*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
114*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM *aijperm      = (Mat_SeqAIJPERM*) A->spptr;
115*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM *aijperm_dest;
116*938d9b04SRichard Tran Mills   PetscBool      perm;
117*938d9b04SRichard Tran Mills 
118*938d9b04SRichard Tran Mills   PetscFunctionBegin;
119*938d9b04SRichard Tran Mills   ierr        = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
120*938d9b04SRichard Tran Mills   ierr        = PetscObjectTypeCompare((PetscObject)*M,MATSEQAIJPERM,&perm);CHKERRQ(ierr);
121*938d9b04SRichard Tran Mills   if (perm) {
122*938d9b04SRichard Tran Mills     aijperm_dest = (Mat_SeqAIJPERM *) (*M)->spptr;
123*938d9b04SRichard Tran Mills     ierr = PetscFree(aijperm_dest->xgroup);CHKERRQ(ierr);
124*938d9b04SRichard Tran Mills     ierr = PetscFree(aijperm_dest->nzgroup);CHKERRQ(ierr);
125*938d9b04SRichard Tran Mills     ierr = PetscFree(aijperm_dest->iperm);CHKERRQ(ierr);
126*938d9b04SRichard Tran Mills   } else {
127*938d9b04SRichard Tran Mills     ierr        = PetscNewLog(*M,&aijperm_dest);CHKERRQ(ierr);
128*938d9b04SRichard Tran Mills     (*M)->spptr = (void*) aijperm_dest;
129*938d9b04SRichard Tran Mills     ierr        = PetscObjectChangeTypeName((PetscObject)*M,MATSEQAIJPERM);CHKERRQ(ierr);
130*938d9b04SRichard Tran Mills     ierr        = PetscObjectComposeFunction((PetscObject)*M,"MatConvert_seqaijperm_seqaij_C",MatConvert_SeqAIJPERM_SeqAIJ);CHKERRQ(ierr);
131*938d9b04SRichard Tran Mills     ierr        = PetscObjectComposeFunction((PetscObject)*M,"MatMatMult_seqdense_seqaijperm_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
132*938d9b04SRichard Tran Mills     ierr        = PetscObjectComposeFunction((PetscObject)*M,"MatMatMultSymbolic_seqdense_seqaijperm_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
133*938d9b04SRichard Tran Mills     ierr        = PetscObjectComposeFunction((PetscObject)*M,"MatMatMultNumeric_seqdense_seqaijperm_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
134*938d9b04SRichard Tran Mills   }
135*938d9b04SRichard Tran Mills   ierr        = PetscMemcpy(aijperm_dest,aijperm,sizeof(Mat_SeqAIJPERM));CHKERRQ(ierr);
136*938d9b04SRichard Tran Mills   /* Allocate space for, and copy the grouping and permutation info.
137*938d9b04SRichard Tran Mills    * I note that when the groups are initially determined in
138*938d9b04SRichard Tran Mills    * MatSeqAIJPERM_create_perm, xgroup and nzgroup may be sized larger than
139*938d9b04SRichard Tran Mills    * necessary.  But at this point, we know how large they need to be, and
140*938d9b04SRichard Tran Mills    * allocate only the necessary amount of memory.  So the duplicated matrix
141*938d9b04SRichard Tran Mills    * may actually use slightly less storage than the original! */
142*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(A->rmap->n, &aijperm_dest->iperm);CHKERRQ(ierr);
143*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(aijperm->ngroup+1, &aijperm_dest->xgroup);CHKERRQ(ierr);
144*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(aijperm->ngroup, &aijperm_dest->nzgroup);CHKERRQ(ierr);
145*938d9b04SRichard Tran Mills   ierr = PetscMemcpy(aijperm_dest->iperm,aijperm->iperm,sizeof(PetscInt)*A->rmap->n);CHKERRQ(ierr);
146*938d9b04SRichard Tran Mills   ierr = PetscMemcpy(aijperm_dest->xgroup,aijperm->xgroup,sizeof(PetscInt)*(aijperm->ngroup+1));CHKERRQ(ierr);
147*938d9b04SRichard Tran Mills   ierr = PetscMemcpy(aijperm_dest->nzgroup,aijperm->nzgroup,sizeof(PetscInt)*aijperm->ngroup);CHKERRQ(ierr);
148*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
149*938d9b04SRichard Tran Mills }
150*938d9b04SRichard Tran Mills 
151*938d9b04SRichard Tran Mills PetscErrorCode MatSeqAIJPERM_create_perm(Mat A)
152*938d9b04SRichard Tran Mills {
153*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
154*938d9b04SRichard Tran Mills   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)(A)->data;
155*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
156*938d9b04SRichard Tran Mills   PetscInt       m;       /* Number of rows in the matrix. */
157*938d9b04SRichard Tran Mills   PetscInt       *ia;       /* From the CSR representation; points to the beginning  of each row. */
158*938d9b04SRichard Tran Mills   PetscInt       maxnz;      /* Maximum number of nonzeros in any row. */
159*938d9b04SRichard Tran Mills   PetscInt       *rows_in_bucket;
160*938d9b04SRichard Tran Mills   /* To construct the permutation, we sort each row into one of maxnz
161*938d9b04SRichard Tran Mills    * buckets based on how many nonzeros are in the row. */
162*938d9b04SRichard Tran Mills   PetscInt       nz;
163*938d9b04SRichard Tran Mills   PetscInt       *nz_in_row;         /* the number of nonzero elements in row k. */
164*938d9b04SRichard Tran Mills   PetscInt       *ipnz;
165*938d9b04SRichard Tran Mills   /* When constructing the iperm permutation vector,
166*938d9b04SRichard Tran Mills    * ipnz[nz] is used to point to the next place in the permutation vector
167*938d9b04SRichard Tran Mills    * that a row with nz nonzero elements should be placed.*/
168*938d9b04SRichard Tran Mills   PetscInt       i, ngroup, istart, ipos;
169*938d9b04SRichard Tran Mills 
170*938d9b04SRichard Tran Mills   PetscFunctionBegin;
171*938d9b04SRichard Tran Mills   if (aijperm->nonzerostate == A->nonzerostate) PetscFunctionReturn(0); /* permutation exists and matches current nonzero structure */
172*938d9b04SRichard Tran Mills   aijperm->nonzerostate = A->nonzerostate;
173*938d9b04SRichard Tran Mills  /* Free anything previously put in the Mat_SeqAIJPERM data structure. */
174*938d9b04SRichard Tran Mills   ierr = PetscFree(aijperm->xgroup);CHKERRQ(ierr);
175*938d9b04SRichard Tran Mills   ierr = PetscFree(aijperm->nzgroup);CHKERRQ(ierr);
176*938d9b04SRichard Tran Mills   ierr = PetscFree(aijperm->iperm);CHKERRQ(ierr);
177*938d9b04SRichard Tran Mills 
178*938d9b04SRichard Tran Mills   m  = A->rmap->n;
179*938d9b04SRichard Tran Mills   ia = a->i;
180*938d9b04SRichard Tran Mills 
181*938d9b04SRichard Tran Mills   /* Allocate the arrays that will hold the permutation vector. */
182*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(m, &aijperm->iperm);CHKERRQ(ierr);
183*938d9b04SRichard Tran Mills 
184*938d9b04SRichard Tran Mills   /* Allocate some temporary work arrays that will be used in
185*938d9b04SRichard Tran Mills    * calculating the permuation vector and groupings. */
186*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(m, &nz_in_row);CHKERRQ(ierr);
187*938d9b04SRichard Tran Mills 
188*938d9b04SRichard Tran Mills   /* Now actually figure out the permutation and grouping. */
189*938d9b04SRichard Tran Mills 
190*938d9b04SRichard Tran Mills   /* First pass: Determine number of nonzeros in each row, maximum
191*938d9b04SRichard Tran Mills    * number of nonzeros in any row, and how many rows fall into each
192*938d9b04SRichard Tran Mills    * "bucket" of rows with same number of nonzeros. */
193*938d9b04SRichard Tran Mills   maxnz = 0;
194*938d9b04SRichard Tran Mills   for (i=0; i<m; i++) {
195*938d9b04SRichard Tran Mills     nz_in_row[i] = ia[i+1]-ia[i];
196*938d9b04SRichard Tran Mills     if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
197*938d9b04SRichard Tran Mills   }
198*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(PetscMax(maxnz,m)+1, &rows_in_bucket);CHKERRQ(ierr);
199*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(PetscMax(maxnz,m)+1, &ipnz);CHKERRQ(ierr);
200*938d9b04SRichard Tran Mills 
201*938d9b04SRichard Tran Mills   for (i=0; i<=maxnz; i++) {
202*938d9b04SRichard Tran Mills     rows_in_bucket[i] = 0;
203*938d9b04SRichard Tran Mills   }
204*938d9b04SRichard Tran Mills   for (i=0; i<m; i++) {
205*938d9b04SRichard Tran Mills     nz = nz_in_row[i];
206*938d9b04SRichard Tran Mills     rows_in_bucket[nz]++;
207*938d9b04SRichard Tran Mills   }
208*938d9b04SRichard Tran Mills 
209*938d9b04SRichard Tran Mills   /* Allocate space for the grouping info.  There will be at most (maxnz + 1)
210*938d9b04SRichard Tran Mills    * groups.  (It is maxnz + 1 instead of simply maxnz because there may be
211*938d9b04SRichard Tran Mills    * rows with no nonzero elements.)  If there are (maxnz + 1) groups,
212*938d9b04SRichard Tran Mills    * then xgroup[] must consist of (maxnz + 2) elements, since the last
213*938d9b04SRichard Tran Mills    * element of xgroup will tell us where the (maxnz + 1)th group ends.
214*938d9b04SRichard Tran Mills    * We allocate space for the maximum number of groups;
215*938d9b04SRichard Tran Mills    * that is potentially a little wasteful, but not too much so.
216*938d9b04SRichard Tran Mills    * Perhaps I should fix it later. */
217*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(maxnz+2, &aijperm->xgroup);CHKERRQ(ierr);
218*938d9b04SRichard Tran Mills   ierr = PetscMalloc1(maxnz+1, &aijperm->nzgroup);CHKERRQ(ierr);
219*938d9b04SRichard Tran Mills 
220*938d9b04SRichard Tran Mills   /* Second pass.  Look at what is in the buckets and create the groupings.
221*938d9b04SRichard Tran Mills    * Note that it is OK to have a group of rows with no non-zero values. */
222*938d9b04SRichard Tran Mills   ngroup = 0;
223*938d9b04SRichard Tran Mills   istart = 0;
224*938d9b04SRichard Tran Mills   for (i=0; i<=maxnz; i++) {
225*938d9b04SRichard Tran Mills     if (rows_in_bucket[i] > 0) {
226*938d9b04SRichard Tran Mills       aijperm->nzgroup[ngroup] = i;
227*938d9b04SRichard Tran Mills       aijperm->xgroup[ngroup]  = istart;
228*938d9b04SRichard Tran Mills       ngroup++;
229*938d9b04SRichard Tran Mills       istart += rows_in_bucket[i];
230*938d9b04SRichard Tran Mills     }
231*938d9b04SRichard Tran Mills   }
232*938d9b04SRichard Tran Mills 
233*938d9b04SRichard Tran Mills   aijperm->xgroup[ngroup] = istart;
234*938d9b04SRichard Tran Mills   aijperm->ngroup         = ngroup;
235*938d9b04SRichard Tran Mills 
236*938d9b04SRichard Tran Mills   /* Now fill in the permutation vector iperm. */
237*938d9b04SRichard Tran Mills   ipnz[0] = 0;
238*938d9b04SRichard Tran Mills   for (i=0; i<maxnz; i++) {
239*938d9b04SRichard Tran Mills     ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
240*938d9b04SRichard Tran Mills   }
241*938d9b04SRichard Tran Mills 
242*938d9b04SRichard Tran Mills   for (i=0; i<m; i++) {
243*938d9b04SRichard Tran Mills     nz                   = nz_in_row[i];
244*938d9b04SRichard Tran Mills     ipos                 = ipnz[nz];
245*938d9b04SRichard Tran Mills     aijperm->iperm[ipos] = i;
246*938d9b04SRichard Tran Mills     ipnz[nz]++;
247*938d9b04SRichard Tran Mills   }
248*938d9b04SRichard Tran Mills 
249*938d9b04SRichard Tran Mills   /* Clean up temporary work arrays. */
250*938d9b04SRichard Tran Mills   ierr = PetscFree(rows_in_bucket);CHKERRQ(ierr);
251*938d9b04SRichard Tran Mills   ierr = PetscFree(ipnz);CHKERRQ(ierr);
252*938d9b04SRichard Tran Mills   ierr = PetscFree(nz_in_row);CHKERRQ(ierr);
253*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
254*938d9b04SRichard Tran Mills }
255*938d9b04SRichard Tran Mills 
256*938d9b04SRichard Tran Mills 
257*938d9b04SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJPERM(Mat A, MatAssemblyType mode)
258*938d9b04SRichard Tran Mills {
259*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
260*938d9b04SRichard Tran Mills   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
261*938d9b04SRichard Tran Mills 
262*938d9b04SRichard Tran Mills   PetscFunctionBegin;
263*938d9b04SRichard Tran Mills   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
264*938d9b04SRichard Tran Mills 
265*938d9b04SRichard Tran Mills   /* Since a MATSEQAIJPERM matrix is really just a MATSEQAIJ with some
266*938d9b04SRichard Tran Mills    * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
267*938d9b04SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
268*938d9b04SRichard Tran Mills    * a lot of code duplication.
269*938d9b04SRichard Tran Mills    * I also note that currently MATSEQAIJPERM doesn't know anything about
270*938d9b04SRichard Tran Mills    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
271*938d9b04SRichard Tran Mills    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
272*938d9b04SRichard Tran Mills    * this, this may break things.  (Don't know... haven't looked at it.) */
273*938d9b04SRichard Tran Mills   a->inode.use = PETSC_FALSE;
274*938d9b04SRichard Tran Mills   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
275*938d9b04SRichard Tran Mills 
276*938d9b04SRichard Tran Mills   /* Now calculate the permutation and grouping information. */
277*938d9b04SRichard Tran Mills   ierr = MatSeqAIJPERM_create_perm(A);CHKERRQ(ierr);
278*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
279*938d9b04SRichard Tran Mills }
280*938d9b04SRichard Tran Mills 
281*938d9b04SRichard Tran Mills PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
282*938d9b04SRichard Tran Mills {
283*938d9b04SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
284*938d9b04SRichard Tran Mills   const PetscScalar *x;
285*938d9b04SRichard Tran Mills   PetscScalar       *y;
286*938d9b04SRichard Tran Mills   const MatScalar   *aa;
287*938d9b04SRichard Tran Mills   PetscErrorCode    ierr;
288*938d9b04SRichard Tran Mills   const PetscInt    *aj,*ai;
289*938d9b04SRichard Tran Mills #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
290*938d9b04SRichard Tran Mills   PetscInt          i,j;
291*938d9b04SRichard Tran Mills #endif
292*938d9b04SRichard Tran Mills 
293*938d9b04SRichard Tran Mills   /* Variables that don't appear in MatMult_SeqAIJ. */
294*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM    *aijperm = (Mat_SeqAIJPERM*) A->spptr;
295*938d9b04SRichard Tran Mills   PetscInt          *iperm;  /* Points to the permutation vector. */
296*938d9b04SRichard Tran Mills   PetscInt          *xgroup;
297*938d9b04SRichard Tran Mills   /* Denotes where groups of rows with same number of nonzeros
298*938d9b04SRichard Tran Mills    * begin and end in iperm. */
299*938d9b04SRichard Tran Mills   PetscInt          *nzgroup;
300*938d9b04SRichard Tran Mills   PetscInt          ngroup;
301*938d9b04SRichard Tran Mills   PetscInt          igroup;
302*938d9b04SRichard Tran Mills   PetscInt          jstart,jend;
303*938d9b04SRichard Tran Mills   /* jstart is used in loops to denote the position in iperm where a
304*938d9b04SRichard Tran Mills    * group starts; jend denotes the position where it ends.
305*938d9b04SRichard Tran Mills    * (jend + 1 is where the next group starts.) */
306*938d9b04SRichard Tran Mills   PetscInt          iold,nz;
307*938d9b04SRichard Tran Mills   PetscInt          istart,iend,isize;
308*938d9b04SRichard Tran Mills   PetscInt          ipos;
309*938d9b04SRichard Tran Mills   PetscScalar       yp[NDIM];
310*938d9b04SRichard Tran Mills   PetscInt          ip[NDIM];    /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */
311*938d9b04SRichard Tran Mills 
312*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
313*938d9b04SRichard Tran Mills #pragma disjoint(*x,*y,*aa)
314*938d9b04SRichard Tran Mills #endif
315*938d9b04SRichard Tran Mills 
316*938d9b04SRichard Tran Mills   PetscFunctionBegin;
317*938d9b04SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
318*938d9b04SRichard Tran Mills   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
319*938d9b04SRichard Tran Mills   aj   = a->j;   /* aj[k] gives column index for element aa[k]. */
320*938d9b04SRichard Tran Mills   aa   = a->a; /* Nonzero elements stored row-by-row. */
321*938d9b04SRichard Tran Mills   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
322*938d9b04SRichard Tran Mills 
323*938d9b04SRichard Tran Mills   /* Get the info we need about the permutations and groupings. */
324*938d9b04SRichard Tran Mills   iperm   = aijperm->iperm;
325*938d9b04SRichard Tran Mills   ngroup  = aijperm->ngroup;
326*938d9b04SRichard Tran Mills   xgroup  = aijperm->xgroup;
327*938d9b04SRichard Tran Mills   nzgroup = aijperm->nzgroup;
328*938d9b04SRichard Tran Mills 
329*938d9b04SRichard Tran Mills #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking)
330*938d9b04SRichard Tran Mills   fortranmultaijperm_(&m,x,ii,aj,aa,y);
331*938d9b04SRichard Tran Mills #else
332*938d9b04SRichard Tran Mills 
333*938d9b04SRichard Tran Mills   for (igroup=0; igroup<ngroup; igroup++) {
334*938d9b04SRichard Tran Mills     jstart = xgroup[igroup];
335*938d9b04SRichard Tran Mills     jend   = xgroup[igroup+1] - 1;
336*938d9b04SRichard Tran Mills     nz     = nzgroup[igroup];
337*938d9b04SRichard Tran Mills 
338*938d9b04SRichard Tran Mills     /* Handle the special cases where the number of nonzeros per row
339*938d9b04SRichard Tran Mills      * in the group is either 0 or 1. */
340*938d9b04SRichard Tran Mills     if (nz == 0) {
341*938d9b04SRichard Tran Mills       for (i=jstart; i<=jend; i++) {
342*938d9b04SRichard Tran Mills         y[iperm[i]] = 0.0;
343*938d9b04SRichard Tran Mills       }
344*938d9b04SRichard Tran Mills     } else if (nz == 1) {
345*938d9b04SRichard Tran Mills       for (i=jstart; i<=jend; i++) {
346*938d9b04SRichard Tran Mills         iold    = iperm[i];
347*938d9b04SRichard Tran Mills         ipos    = ai[iold];
348*938d9b04SRichard Tran Mills         y[iold] = aa[ipos] * x[aj[ipos]];
349*938d9b04SRichard Tran Mills       }
350*938d9b04SRichard Tran Mills     } else {
351*938d9b04SRichard Tran Mills 
352*938d9b04SRichard Tran Mills       /* We work our way through the current group in chunks of NDIM rows
353*938d9b04SRichard Tran Mills        * at a time. */
354*938d9b04SRichard Tran Mills 
355*938d9b04SRichard Tran Mills       for (istart=jstart; istart<=jend; istart+=NDIM) {
356*938d9b04SRichard Tran Mills         /* Figure out where the chunk of 'isize' rows ends in iperm.
357*938d9b04SRichard Tran Mills          * 'isize may of course be less than NDIM for the last chunk. */
358*938d9b04SRichard Tran Mills         iend = istart + (NDIM - 1);
359*938d9b04SRichard Tran Mills 
360*938d9b04SRichard Tran Mills         if (iend > jend) iend = jend;
361*938d9b04SRichard Tran Mills 
362*938d9b04SRichard Tran Mills         isize = iend - istart + 1;
363*938d9b04SRichard Tran Mills 
364*938d9b04SRichard Tran Mills         /* Initialize the yp[] array that will be used to hold part of
365*938d9b04SRichard Tran Mills          * the permuted results vector, and figure out where in aa each
366*938d9b04SRichard Tran Mills          * row of the chunk will begin. */
367*938d9b04SRichard Tran Mills         for (i=0; i<isize; i++) {
368*938d9b04SRichard Tran Mills           iold = iperm[istart + i];
369*938d9b04SRichard Tran Mills           /* iold is a row number from the matrix A *before* reordering. */
370*938d9b04SRichard Tran Mills           ip[i] = ai[iold];
371*938d9b04SRichard Tran Mills           /* ip[i] tells us where the ith row of the chunk begins in aa. */
372*938d9b04SRichard Tran Mills           yp[i] = (PetscScalar) 0.0;
373*938d9b04SRichard Tran Mills         }
374*938d9b04SRichard Tran Mills 
375*938d9b04SRichard Tran Mills         /* If the number of zeros per row exceeds the number of rows in
376*938d9b04SRichard Tran Mills          * the chunk, we should vectorize along nz, that is, perform the
377*938d9b04SRichard Tran Mills          * mat-vec one row at a time as in the usual CSR case. */
378*938d9b04SRichard Tran Mills         if (nz > isize) {
379*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR)
380*938d9b04SRichard Tran Mills #pragma _CRI preferstream
381*938d9b04SRichard Tran Mills #endif
382*938d9b04SRichard Tran Mills           for (i=0; i<isize; i++) {
383*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR)
384*938d9b04SRichard Tran Mills #pragma _CRI prefervector
385*938d9b04SRichard Tran Mills #endif
386*938d9b04SRichard Tran Mills             for (j=0; j<nz; j++) {
387*938d9b04SRichard Tran Mills               ipos   = ip[i] + j;
388*938d9b04SRichard Tran Mills               yp[i] += aa[ipos] * x[aj[ipos]];
389*938d9b04SRichard Tran Mills             }
390*938d9b04SRichard Tran Mills           }
391*938d9b04SRichard Tran Mills         } else {
392*938d9b04SRichard Tran Mills           /* Otherwise, there are enough rows in the chunk to make it
393*938d9b04SRichard Tran Mills            * worthwhile to vectorize across the rows, that is, to do the
394*938d9b04SRichard Tran Mills            * matvec by operating with "columns" of the chunk. */
395*938d9b04SRichard Tran Mills           for (j=0; j<nz; j++) {
396*938d9b04SRichard Tran Mills             for (i=0; i<isize; i++) {
397*938d9b04SRichard Tran Mills               ipos   = ip[i] + j;
398*938d9b04SRichard Tran Mills               yp[i] += aa[ipos] * x[aj[ipos]];
399*938d9b04SRichard Tran Mills             }
400*938d9b04SRichard Tran Mills           }
401*938d9b04SRichard Tran Mills         }
402*938d9b04SRichard Tran Mills 
403*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR)
404*938d9b04SRichard Tran Mills #pragma _CRI ivdep
405*938d9b04SRichard Tran Mills #endif
406*938d9b04SRichard Tran Mills         /* Put results from yp[] into non-permuted result vector y. */
407*938d9b04SRichard Tran Mills         for (i=0; i<isize; i++) {
408*938d9b04SRichard Tran Mills           y[iperm[istart+i]] = yp[i];
409*938d9b04SRichard Tran Mills         }
410*938d9b04SRichard Tran Mills       } /* End processing chunk of isize rows of a group. */
411*938d9b04SRichard Tran Mills     } /* End handling matvec for chunk with nz > 1. */
412*938d9b04SRichard Tran Mills   } /* End loop over igroup. */
413*938d9b04SRichard Tran Mills #endif
414*938d9b04SRichard Tran Mills   ierr = PetscLogFlops(PetscMax(2.0*a->nz - A->rmap->n,0));CHKERRQ(ierr);
415*938d9b04SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
416*938d9b04SRichard Tran Mills   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
417*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
418*938d9b04SRichard Tran Mills }
419*938d9b04SRichard Tran Mills 
420*938d9b04SRichard Tran Mills 
421*938d9b04SRichard Tran Mills /* MatMultAdd_SeqAIJPERM() calculates yy = ww + A * xx.
422*938d9b04SRichard Tran Mills  * Note that the names I used to designate the vectors differs from that
423*938d9b04SRichard Tran Mills  * used in MatMultAdd_SeqAIJ().  I did this to keep my notation consistent
424*938d9b04SRichard Tran Mills  * with the MatMult_SeqAIJPERM() routine, which is very similar to this one. */
425*938d9b04SRichard Tran Mills /*
426*938d9b04SRichard Tran Mills     I hate having virtually identical code for the mult and the multadd!!!
427*938d9b04SRichard Tran Mills */
428*938d9b04SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJPERM(Mat A,Vec xx,Vec ww,Vec yy)
429*938d9b04SRichard Tran Mills {
430*938d9b04SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
431*938d9b04SRichard Tran Mills   const PetscScalar *x;
432*938d9b04SRichard Tran Mills   PetscScalar       *y,*w;
433*938d9b04SRichard Tran Mills   const MatScalar   *aa;
434*938d9b04SRichard Tran Mills   PetscErrorCode    ierr;
435*938d9b04SRichard Tran Mills   const PetscInt    *aj,*ai;
436*938d9b04SRichard Tran Mills #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
437*938d9b04SRichard Tran Mills   PetscInt i,j;
438*938d9b04SRichard Tran Mills #endif
439*938d9b04SRichard Tran Mills 
440*938d9b04SRichard Tran Mills   /* Variables that don't appear in MatMultAdd_SeqAIJ. */
441*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM * aijperm;
442*938d9b04SRichard Tran Mills   PetscInt       *iperm;    /* Points to the permutation vector. */
443*938d9b04SRichard Tran Mills   PetscInt       *xgroup;
444*938d9b04SRichard Tran Mills   /* Denotes where groups of rows with same number of nonzeros
445*938d9b04SRichard Tran Mills    * begin and end in iperm. */
446*938d9b04SRichard Tran Mills   PetscInt *nzgroup;
447*938d9b04SRichard Tran Mills   PetscInt ngroup;
448*938d9b04SRichard Tran Mills   PetscInt igroup;
449*938d9b04SRichard Tran Mills   PetscInt jstart,jend;
450*938d9b04SRichard Tran Mills   /* jstart is used in loops to denote the position in iperm where a
451*938d9b04SRichard Tran Mills    * group starts; jend denotes the position where it ends.
452*938d9b04SRichard Tran Mills    * (jend + 1 is where the next group starts.) */
453*938d9b04SRichard Tran Mills   PetscInt    iold,nz;
454*938d9b04SRichard Tran Mills   PetscInt    istart,iend,isize;
455*938d9b04SRichard Tran Mills   PetscInt    ipos;
456*938d9b04SRichard Tran Mills   PetscScalar yp[NDIM];
457*938d9b04SRichard Tran Mills   PetscInt    ip[NDIM];
458*938d9b04SRichard Tran Mills   /* yp[] and ip[] are treated as vector "registers" for performing
459*938d9b04SRichard Tran Mills    * the mat-vec. */
460*938d9b04SRichard Tran Mills 
461*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
462*938d9b04SRichard Tran Mills #pragma disjoint(*x,*y,*aa)
463*938d9b04SRichard Tran Mills #endif
464*938d9b04SRichard Tran Mills 
465*938d9b04SRichard Tran Mills   PetscFunctionBegin;
466*938d9b04SRichard Tran Mills   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
467*938d9b04SRichard Tran Mills   ierr = VecGetArrayPair(yy,ww,&y,&w);CHKERRQ(ierr);
468*938d9b04SRichard Tran Mills 
469*938d9b04SRichard Tran Mills   aj = a->j;   /* aj[k] gives column index for element aa[k]. */
470*938d9b04SRichard Tran Mills   aa = a->a;   /* Nonzero elements stored row-by-row. */
471*938d9b04SRichard Tran Mills   ai = a->i;   /* ai[k] is the position in aa and aj where row k starts. */
472*938d9b04SRichard Tran Mills 
473*938d9b04SRichard Tran Mills   /* Get the info we need about the permutations and groupings. */
474*938d9b04SRichard Tran Mills   aijperm = (Mat_SeqAIJPERM*) A->spptr;
475*938d9b04SRichard Tran Mills   iperm   = aijperm->iperm;
476*938d9b04SRichard Tran Mills   ngroup  = aijperm->ngroup;
477*938d9b04SRichard Tran Mills   xgroup  = aijperm->xgroup;
478*938d9b04SRichard Tran Mills   nzgroup = aijperm->nzgroup;
479*938d9b04SRichard Tran Mills 
480*938d9b04SRichard Tran Mills #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
481*938d9b04SRichard Tran Mills   fortranmultaddaijperm_(&m,x,ii,aj,aa,y,w);
482*938d9b04SRichard Tran Mills #else
483*938d9b04SRichard Tran Mills 
484*938d9b04SRichard Tran Mills   for (igroup=0; igroup<ngroup; igroup++) {
485*938d9b04SRichard Tran Mills     jstart = xgroup[igroup];
486*938d9b04SRichard Tran Mills     jend   = xgroup[igroup+1] - 1;
487*938d9b04SRichard Tran Mills 
488*938d9b04SRichard Tran Mills     nz = nzgroup[igroup];
489*938d9b04SRichard Tran Mills 
490*938d9b04SRichard Tran Mills     /* Handle the special cases where the number of nonzeros per row
491*938d9b04SRichard Tran Mills      * in the group is either 0 or 1. */
492*938d9b04SRichard Tran Mills     if (nz == 0) {
493*938d9b04SRichard Tran Mills       for (i=jstart; i<=jend; i++) {
494*938d9b04SRichard Tran Mills         iold    = iperm[i];
495*938d9b04SRichard Tran Mills         y[iold] = w[iold];
496*938d9b04SRichard Tran Mills       }
497*938d9b04SRichard Tran Mills     }
498*938d9b04SRichard Tran Mills     else if (nz == 1) {
499*938d9b04SRichard Tran Mills       for (i=jstart; i<=jend; i++) {
500*938d9b04SRichard Tran Mills         iold    = iperm[i];
501*938d9b04SRichard Tran Mills         ipos    = ai[iold];
502*938d9b04SRichard Tran Mills         y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
503*938d9b04SRichard Tran Mills       }
504*938d9b04SRichard Tran Mills     }
505*938d9b04SRichard Tran Mills     /* For the general case: */
506*938d9b04SRichard Tran Mills     else {
507*938d9b04SRichard Tran Mills 
508*938d9b04SRichard Tran Mills       /* We work our way through the current group in chunks of NDIM rows
509*938d9b04SRichard Tran Mills        * at a time. */
510*938d9b04SRichard Tran Mills 
511*938d9b04SRichard Tran Mills       for (istart=jstart; istart<=jend; istart+=NDIM) {
512*938d9b04SRichard Tran Mills         /* Figure out where the chunk of 'isize' rows ends in iperm.
513*938d9b04SRichard Tran Mills          * 'isize may of course be less than NDIM for the last chunk. */
514*938d9b04SRichard Tran Mills         iend = istart + (NDIM - 1);
515*938d9b04SRichard Tran Mills         if (iend > jend) iend = jend;
516*938d9b04SRichard Tran Mills         isize = iend - istart + 1;
517*938d9b04SRichard Tran Mills 
518*938d9b04SRichard Tran Mills         /* Initialize the yp[] array that will be used to hold part of
519*938d9b04SRichard Tran Mills          * the permuted results vector, and figure out where in aa each
520*938d9b04SRichard Tran Mills          * row of the chunk will begin. */
521*938d9b04SRichard Tran Mills         for (i=0; i<isize; i++) {
522*938d9b04SRichard Tran Mills           iold = iperm[istart + i];
523*938d9b04SRichard Tran Mills           /* iold is a row number from the matrix A *before* reordering. */
524*938d9b04SRichard Tran Mills           ip[i] = ai[iold];
525*938d9b04SRichard Tran Mills           /* ip[i] tells us where the ith row of the chunk begins in aa. */
526*938d9b04SRichard Tran Mills           yp[i] = w[iold];
527*938d9b04SRichard Tran Mills         }
528*938d9b04SRichard Tran Mills 
529*938d9b04SRichard Tran Mills         /* If the number of zeros per row exceeds the number of rows in
530*938d9b04SRichard Tran Mills          * the chunk, we should vectorize along nz, that is, perform the
531*938d9b04SRichard Tran Mills          * mat-vec one row at a time as in the usual CSR case. */
532*938d9b04SRichard Tran Mills         if (nz > isize) {
533*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR)
534*938d9b04SRichard Tran Mills #pragma _CRI preferstream
535*938d9b04SRichard Tran Mills #endif
536*938d9b04SRichard Tran Mills           for (i=0; i<isize; i++) {
537*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR)
538*938d9b04SRichard Tran Mills #pragma _CRI prefervector
539*938d9b04SRichard Tran Mills #endif
540*938d9b04SRichard Tran Mills             for (j=0; j<nz; j++) {
541*938d9b04SRichard Tran Mills               ipos   = ip[i] + j;
542*938d9b04SRichard Tran Mills               yp[i] += aa[ipos] * x[aj[ipos]];
543*938d9b04SRichard Tran Mills             }
544*938d9b04SRichard Tran Mills           }
545*938d9b04SRichard Tran Mills         }
546*938d9b04SRichard Tran Mills         /* Otherwise, there are enough rows in the chunk to make it
547*938d9b04SRichard Tran Mills          * worthwhile to vectorize across the rows, that is, to do the
548*938d9b04SRichard Tran Mills          * matvec by operating with "columns" of the chunk. */
549*938d9b04SRichard Tran Mills         else {
550*938d9b04SRichard Tran Mills           for (j=0; j<nz; j++) {
551*938d9b04SRichard Tran Mills             for (i=0; i<isize; i++) {
552*938d9b04SRichard Tran Mills               ipos   = ip[i] + j;
553*938d9b04SRichard Tran Mills               yp[i] += aa[ipos] * x[aj[ipos]];
554*938d9b04SRichard Tran Mills             }
555*938d9b04SRichard Tran Mills           }
556*938d9b04SRichard Tran Mills         }
557*938d9b04SRichard Tran Mills 
558*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR)
559*938d9b04SRichard Tran Mills #pragma _CRI ivdep
560*938d9b04SRichard Tran Mills #endif
561*938d9b04SRichard Tran Mills         /* Put results from yp[] into non-permuted result vector y. */
562*938d9b04SRichard Tran Mills         for (i=0; i<isize; i++) {
563*938d9b04SRichard Tran Mills           y[iperm[istart+i]] = yp[i];
564*938d9b04SRichard Tran Mills         }
565*938d9b04SRichard Tran Mills       } /* End processing chunk of isize rows of a group. */
566*938d9b04SRichard Tran Mills 
567*938d9b04SRichard Tran Mills     } /* End handling matvec for chunk with nz > 1. */
568*938d9b04SRichard Tran Mills   } /* End loop over igroup. */
569*938d9b04SRichard Tran Mills 
570*938d9b04SRichard Tran Mills #endif
571*938d9b04SRichard Tran Mills   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
572*938d9b04SRichard Tran Mills   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
573*938d9b04SRichard Tran Mills   ierr = VecRestoreArrayPair(yy,ww,&y,&w);CHKERRQ(ierr);
574*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
575*938d9b04SRichard Tran Mills }
576*938d9b04SRichard Tran Mills 
577*938d9b04SRichard Tran Mills 
578*938d9b04SRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJPERM converts a SeqAIJ matrix into a
579*938d9b04SRichard Tran Mills  * SeqAIJPERM matrix.  This routine is called by the MatCreate_SeqAIJPERM()
580*938d9b04SRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
581*938d9b04SRichard Tran Mills  * into a SeqAIJPERM one. */
582*938d9b04SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJPERM(Mat A,MatType type,MatReuse reuse,Mat *newmat)
583*938d9b04SRichard Tran Mills {
584*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
585*938d9b04SRichard Tran Mills   Mat            B = *newmat;
586*938d9b04SRichard Tran Mills   Mat_SeqAIJPERM *aijperm;
587*938d9b04SRichard Tran Mills   PetscBool      sametype;
588*938d9b04SRichard Tran Mills 
589*938d9b04SRichard Tran Mills   PetscFunctionBegin;
590*938d9b04SRichard Tran Mills   if (reuse == MAT_INITIAL_MATRIX) {
591*938d9b04SRichard Tran Mills     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
592*938d9b04SRichard Tran Mills   }
593*938d9b04SRichard Tran Mills   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
594*938d9b04SRichard Tran Mills   if (sametype) PetscFunctionReturn(0);
595*938d9b04SRichard Tran Mills 
596*938d9b04SRichard Tran Mills   ierr     = PetscNewLog(B,&aijperm);CHKERRQ(ierr);
597*938d9b04SRichard Tran Mills   B->spptr = (void*) aijperm;
598*938d9b04SRichard Tran Mills 
599*938d9b04SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override. */
600*938d9b04SRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJPERM;
601*938d9b04SRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJPERM;
602*938d9b04SRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJPERM;
603*938d9b04SRichard Tran Mills   B->ops->mult        = MatMult_SeqAIJPERM;
604*938d9b04SRichard Tran Mills   B->ops->multadd     = MatMultAdd_SeqAIJPERM;
605*938d9b04SRichard Tran Mills 
606*938d9b04SRichard Tran Mills   aijperm->nonzerostate = -1;  /* this will trigger the generation of the permutation information the first time through MatAssembly()*/
607*938d9b04SRichard Tran Mills   /* If A has already been assembled, compute the permutation. */
608*938d9b04SRichard Tran Mills   if (A->assembled) {
609*938d9b04SRichard Tran Mills     ierr = MatSeqAIJPERM_create_perm(B);CHKERRQ(ierr);
610*938d9b04SRichard Tran Mills   }
611*938d9b04SRichard Tran Mills 
612*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijperm_seqaij_C",MatConvert_SeqAIJPERM_SeqAIJ);CHKERRQ(ierr);
613*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijperm_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
614*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijperm_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
615*938d9b04SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijperm_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
616*938d9b04SRichard Tran Mills 
617*938d9b04SRichard Tran Mills   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJPERM);CHKERRQ(ierr);
618*938d9b04SRichard Tran Mills   *newmat = B;
619*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
620*938d9b04SRichard Tran Mills }
621*938d9b04SRichard Tran Mills 
622*938d9b04SRichard Tran Mills /*@C
623*938d9b04SRichard Tran Mills    MatCreateSeqAIJPERM - Creates a sparse matrix of type SEQAIJPERM.
624*938d9b04SRichard Tran Mills    This type inherits from AIJ, but calculates some additional permutation
625*938d9b04SRichard Tran Mills    information that is used to allow better vectorization of some
626*938d9b04SRichard Tran Mills    operations.  At the cost of increased storage, the AIJ formatted
627*938d9b04SRichard Tran Mills    matrix can be copied to a format in which pieces of the matrix are
628*938d9b04SRichard Tran Mills    stored in ELLPACK format, allowing the vectorized matrix multiply
629*938d9b04SRichard Tran Mills    routine to use stride-1 memory accesses.  As with the AIJ type, it is
630*938d9b04SRichard Tran Mills    important to preallocate matrix storage in order to get good assembly
631*938d9b04SRichard Tran Mills    performance.
632*938d9b04SRichard Tran Mills 
633*938d9b04SRichard Tran Mills    Collective on MPI_Comm
634*938d9b04SRichard Tran Mills 
635*938d9b04SRichard Tran Mills    Input Parameters:
636*938d9b04SRichard Tran Mills +  comm - MPI communicator, set to PETSC_COMM_SELF
637*938d9b04SRichard Tran Mills .  m - number of rows
638*938d9b04SRichard Tran Mills .  n - number of columns
639*938d9b04SRichard Tran Mills .  nz - number of nonzeros per row (same for all rows)
640*938d9b04SRichard Tran Mills -  nnz - array containing the number of nonzeros in the various rows
641*938d9b04SRichard Tran Mills          (possibly different for each row) or NULL
642*938d9b04SRichard Tran Mills 
643*938d9b04SRichard Tran Mills    Output Parameter:
644*938d9b04SRichard Tran Mills .  A - the matrix
645*938d9b04SRichard Tran Mills 
646*938d9b04SRichard Tran Mills    Notes:
647*938d9b04SRichard Tran Mills    If nnz is given then nz is ignored
648*938d9b04SRichard Tran Mills 
649*938d9b04SRichard Tran Mills    Level: intermediate
650*938d9b04SRichard Tran Mills 
651*938d9b04SRichard Tran Mills .keywords: matrix, cray, sparse, parallel
652*938d9b04SRichard Tran Mills 
653*938d9b04SRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
654*938d9b04SRichard Tran Mills @*/
655*938d9b04SRichard Tran Mills PetscErrorCode  MatCreateSeqAIJPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
656*938d9b04SRichard Tran Mills {
657*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
658*938d9b04SRichard Tran Mills 
659*938d9b04SRichard Tran Mills   PetscFunctionBegin;
660*938d9b04SRichard Tran Mills   ierr = MatCreate(comm,A);CHKERRQ(ierr);
661*938d9b04SRichard Tran Mills   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
662*938d9b04SRichard Tran Mills   ierr = MatSetType(*A,MATSEQAIJPERM);CHKERRQ(ierr);
663*938d9b04SRichard Tran Mills   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
664*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
665*938d9b04SRichard Tran Mills }
666*938d9b04SRichard Tran Mills 
667*938d9b04SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJPERM(Mat A)
668*938d9b04SRichard Tran Mills {
669*938d9b04SRichard Tran Mills   PetscErrorCode ierr;
670*938d9b04SRichard Tran Mills 
671*938d9b04SRichard Tran Mills   PetscFunctionBegin;
672*938d9b04SRichard Tran Mills   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
673*938d9b04SRichard Tran Mills   ierr = MatConvert_SeqAIJ_SeqAIJPERM(A,MATSEQAIJPERM,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
674*938d9b04SRichard Tran Mills   PetscFunctionReturn(0);
675*938d9b04SRichard Tran Mills }
676*938d9b04SRichard Tran Mills 
677