xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision bcaeba4d41d6ca6c6dc4189db20683073a9959ce)
1 
2 /*
3   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but augments
6   it with a column oriented storage that is more efficient for
7   matrix vector products on Vector machines.
8 
9   CRL stands for constant row length (that is the same number of columns
10   is kept (padded with zeros) for each row of the sparse matrix.
11 */
12 #include <../src/mat/impls/aij/seq/crl/crl.h>
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDestroy_SeqAIJCRL"
16 PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
17 {
18   PetscErrorCode ierr;
19   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL *) A->spptr;
20 
21   /* Free everything in the Mat_AIJCRL data structure. */
22   if (aijcrl) {
23     ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
24   }
25   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
26   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
27   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
32 {
33   PetscFunctionBegin;
34   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet");
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSeqAIJCRL_create_aijcrl"
40 PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
41 {
42   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
43   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
44   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
45   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
46   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
47   MatScalar      *aa = a->a;
48   PetscScalar    *acols;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   aijcrl->nz   = a->nz;
53   aijcrl->m    = A->rmap->n;
54   aijcrl->rmax = rmax;
55   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
56   ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr);
57   acols = aijcrl->acols;
58   icols = aijcrl->icols;
59   for (i=0; i<m; i++) {
60     for (j=0; j<ilen[i]; j++) {
61       acols[j*m+i] = *aa++;
62       icols[j*m+i] = *aj++;
63     }
64     for (;j<rmax; j++) { /* empty column entries */
65       acols[j*m+i] = 0.0;
66       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
67     }
68   }
69   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatAssemblyEnd_SeqAIJCRL"
77 PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
78 {
79   PetscErrorCode ierr;
80   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
81 
82   PetscFunctionBegin;
83   a->inode.use = PETSC_FALSE;
84   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
85   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
86 
87   /* Now calculate the permutation and grouping information. */
88   ierr = MatSeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatMult_AIJCRL"
96 /*
97     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
98     - the scatter is used only in the parallel version
99 
100 */
101 PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy)
102 {
103   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
104   PetscInt       m = aijcrl->m;  /* Number of rows in the matrix. */
105   PetscInt       rmax = aijcrl->rmax,*icols = aijcrl->icols;
106   PetscScalar    *acols = aijcrl->acols;
107   PetscErrorCode ierr;
108   PetscScalar    *x,*y;
109 #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
110   PetscInt       i,j,ii;
111 #endif
112 
113 
114 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
115 #pragma disjoint(*x,*y,*aa)
116 #endif
117 
118   PetscFunctionBegin;
119   if (aijcrl->xscat) {
120     ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr);
121     /* get remote values needed for local part of multiply */
122     ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
123     ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
124     xx = aijcrl->xwork;
125   };
126 
127   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
128   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
129 
130 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
131   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
132 #else
133 
134   /* first column */
135   for (j=0; j<m; j++) {
136     y[j] = acols[j]*x[icols[j]];
137   }
138 
139   /* other columns */
140 #if defined(PETSC_HAVE_CRAY_VECTOR)
141 #pragma _CRI preferstream
142 #endif
143   for (i=1; i<rmax; i++) {
144     ii = i*m;
145 #if defined(PETSC_HAVE_CRAY_VECTOR)
146 #pragma _CRI prefervector
147 #endif
148     for (j=0; j<m; j++) {
149       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
150     }
151   }
152 #if defined(PETSC_HAVE_CRAY_VECTOR)
153 #pragma _CRI ivdep
154 #endif
155 
156 #endif
157   ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr);
158   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
159   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
160   PetscFunctionReturn(0);
161 }
162 
163 
164 /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
165  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
166  * routine, but can also be used to convert an assembled SeqAIJ matrix
167  * into a SeqAIJCRL one. */
168 EXTERN_C_BEGIN
169 #undef __FUNCT__
170 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJCRL"
171 PetscErrorCode  MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
172 {
173   PetscErrorCode ierr;
174   Mat            B = *newmat;
175   Mat_AIJCRL     *aijcrl;
176 
177   PetscFunctionBegin;
178   if (reuse == MAT_INITIAL_MATRIX) {
179     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
180   }
181 
182   ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr);
183   B->spptr = (void *) aijcrl;
184 
185   /* Set function pointers for methods that we inherit from AIJ but override. */
186   B->ops->duplicate   = MatDuplicate_AIJCRL;
187   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
188   B->ops->destroy     = MatDestroy_SeqAIJCRL;
189   B->ops->mult        = MatMult_AIJCRL;
190 
191   /* If A has already been assembled, compute the permutation. */
192   if (A->assembled) {
193     ierr = MatSeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
194   }
195   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr);
196   *newmat = B;
197   PetscFunctionReturn(0);
198 }
199 EXTERN_C_END
200 
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "MatCreateSeqAIJCRL"
204 /*@C
205    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
206    This type inherits from AIJ, but stores some additional
207    information that is used to allow better vectorization of
208    the matrix-vector product. At the cost of increased storage, the AIJ formatted
209    matrix can be copied to a format in which pieces of the matrix are
210    stored in ELLPACK format, allowing the vectorized matrix multiply
211    routine to use stride-1 memory accesses.  As with the AIJ type, it is
212    important to preallocate matrix storage in order to get good assembly
213    performance.
214 
215    Collective on MPI_Comm
216 
217    Input Parameters:
218 +  comm - MPI communicator, set to PETSC_COMM_SELF
219 .  m - number of rows
220 .  n - number of columns
221 .  nz - number of nonzeros per row (same for all rows)
222 -  nnz - array containing the number of nonzeros in the various rows
223          (possibly different for each row) or PETSC_NULL
224 
225    Output Parameter:
226 .  A - the matrix
227 
228    Notes:
229    If nnz is given then nz is ignored
230 
231    Level: intermediate
232 
233 .keywords: matrix, cray, sparse, parallel
234 
235 .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
236 @*/
237 PetscErrorCode  MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
238 {
239   PetscErrorCode ierr;
240 
241   PetscFunctionBegin;
242   ierr = MatCreate(comm,A);CHKERRQ(ierr);
243   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
244   ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr);
245   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 
250 EXTERN_C_BEGIN
251 #undef __FUNCT__
252 #define __FUNCT__ "MatCreate_SeqAIJCRL"
253 PetscErrorCode  MatCreate_SeqAIJCRL(Mat A)
254 {
255   PetscErrorCode ierr;
256 
257   PetscFunctionBegin;
258   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
259   ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 EXTERN_C_END
263 
264