xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision 753ebd1d069d7a4e86ed47ccb83df941bd1fb5db)
1 #define PETSCMAT_DLL
2 
3 /*
4   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
5   This class is derived from the MATSEQAIJ class and retains the
6   compressed row storage (aka Yale sparse matrix format) but augments
7   it with a column oriented storage that is more efficient for
8   matrix vector products on Vector machines.
9 
10   CRL stands for constant row length (that is the same number of columns
11   is kept (padded with zeros) for each row of the sparse matrix.
12 */
13 #include "../src/mat/impls/aij/seq/crl/crl.h"
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatDestroy_SeqCRL"
17 PetscErrorCode MatDestroy_SeqCRL(Mat A)
18 {
19   PetscErrorCode ierr;
20   Mat_CRL        *crl = (Mat_CRL *) A->spptr;
21 
22   /* Free everything in the Mat_CRL data structure. */
23   ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr);
24 
25   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
26   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
27   PetscFunctionReturn(0);
28 }
29 
30 PetscErrorCode MatDuplicate_CRL(Mat A, MatDuplicateOption op, Mat *M)
31 {
32   PetscFunctionBegin;
33   SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet");
34   PetscFunctionReturn(0);
35 }
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "SeqCRL_create_crl"
39 PetscErrorCode SeqCRL_create_crl(Mat A)
40 {
41   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
42   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
43   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
44   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
45   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
46   MatScalar      *aa = a->a;
47   PetscScalar    *acols;
48   PetscErrorCode ierr;
49 
50   PetscFunctionBegin;
51   crl->nz   = a->nz;
52   crl->m    = A->rmap->n;
53   crl->rmax = rmax;
54   ierr  = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr);
55   acols = crl->acols;
56   icols = crl->icols;
57   for (i=0; i<m; i++) {
58     for (j=0; j<ilen[i]; j++) {
59       acols[j*m+i] = *aa++;
60       icols[j*m+i] = *aj++;
61     }
62     for (;j<rmax; j++) { /* empty column entries */
63       acols[j*m+i] = 0.0;
64       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
65     }
66   }
67   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);
68   PetscFunctionReturn(0);
69 }
70 
71 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatAssemblyEnd_SeqCRL"
75 PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode)
76 {
77   PetscErrorCode ierr;
78   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
79 
80   PetscFunctionBegin;
81   a->inode.use = PETSC_FALSE;
82   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
83   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
84 
85   /* Now calculate the permutation and grouping information. */
86   ierr = SeqCRL_create_crl(A);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #include "../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h"
91 
92 #undef __FUNCT__
93 #define __FUNCT__ "MatMult_CRL"
94 /*
95     Shared by both sequential and parallel versions of CRL matrix: MATMPICRL and MATSEQCRL
96     - the scatter is used only in the parallel version
97 
98 */
99 PetscErrorCode MatMult_CRL(Mat A,Vec xx,Vec yy)
100 {
101   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
102   PetscInt       m = crl->m;  /* Number of rows in the matrix. */
103   PetscInt       rmax = crl->rmax,*icols = crl->icols;
104   PetscScalar    *acols = crl->acols;
105   PetscErrorCode ierr;
106   PetscScalar    *x,*y;
107 #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
108   PetscInt       i,j,ii;
109 #endif
110 
111 
112 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
113 #pragma disjoint(*x,*y,*aa)
114 #endif
115 
116   PetscFunctionBegin;
117   if (crl->xscat) {
118     ierr = VecCopy(xx,crl->xwork);CHKERRQ(ierr);
119     /* get remote values needed for local part of multiply */
120     ierr = VecScatterBegin(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
121     ierr = VecScatterEnd(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122     xx = crl->xwork;
123   };
124 
125   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
126   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
127 
128 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
129   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
130 #else
131 
132   /* first column */
133   for (j=0; j<m; j++) {
134     y[j] = acols[j]*x[icols[j]];
135   }
136 
137   /* other columns */
138 #if defined(PETSC_HAVE_CRAYC)
139 #pragma _CRI preferstream
140 #endif
141   for (i=1; i<rmax; i++) {
142     ii = i*m;
143 #if defined(PETSC_HAVE_CRAYC)
144 #pragma _CRI prefervector
145 #endif
146     for (j=0; j<m; j++) {
147       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
148     }
149   }
150 #if defined(PETSC_HAVE_CRAYC)
151 #pragma _CRI ivdep
152 #endif
153 
154 #endif
155   ierr = PetscLogFlops(2.0*crl->nz - m);CHKERRQ(ierr);
156   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
157   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
158   PetscFunctionReturn(0);
159 }
160 
161 
162 /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a
163  * SeqCRL matrix.  This routine is called by the MatCreate_SeqCRL()
164  * routine, but can also be used to convert an assembled SeqAIJ matrix
165  * into a SeqCRL one. */
166 EXTERN_C_BEGIN
167 #undef __FUNCT__
168 #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL"
169 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
170 {
171   PetscErrorCode ierr;
172   Mat            B = *newmat;
173   Mat_CRL        *crl;
174 
175   PetscFunctionBegin;
176   if (reuse == MAT_INITIAL_MATRIX) {
177     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
178   }
179 
180   ierr = PetscNewLog(B,Mat_CRL,&crl);CHKERRQ(ierr);
181   B->spptr = (void *) crl;
182 
183   /* Set function pointers for methods that we inherit from AIJ but override. */
184   B->ops->duplicate   = MatDuplicate_CRL;
185   B->ops->assemblyend = MatAssemblyEnd_SeqCRL;
186   B->ops->destroy     = MatDestroy_SeqCRL;
187   B->ops->mult        = MatMult_CRL;
188 
189   /* If A has already been assembled, compute the permutation. */
190   if (A->assembled) {
191     ierr = SeqCRL_create_crl(B);CHKERRQ(ierr);
192   }
193   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr);
194   *newmat = B;
195   PetscFunctionReturn(0);
196 }
197 EXTERN_C_END
198 
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatCreateSeqCRL"
202 /*@C
203    MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL.
204    This type inherits from AIJ, but stores some additional
205    information that is used to allow better vectorization of
206    the matrix-vector product. At the cost of increased storage, the AIJ formatted
207    matrix can be copied to a format in which pieces of the matrix are
208    stored in ELLPACK format, allowing the vectorized matrix multiply
209    routine to use stride-1 memory accesses.  As with the AIJ type, it is
210    important to preallocate matrix storage in order to get good assembly
211    performance.
212 
213    Collective on MPI_Comm
214 
215    Input Parameters:
216 +  comm - MPI communicator, set to PETSC_COMM_SELF
217 .  m - number of rows
218 .  n - number of columns
219 .  nz - number of nonzeros per row (same for all rows)
220 -  nnz - array containing the number of nonzeros in the various rows
221          (possibly different for each row) or PETSC_NULL
222 
223    Output Parameter:
224 .  A - the matrix
225 
226    Notes:
227    If nnz is given then nz is ignored
228 
229    Level: intermediate
230 
231 .keywords: matrix, cray, sparse, parallel
232 
233 .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
234 @*/
235 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
236 {
237   PetscErrorCode ierr;
238 
239   PetscFunctionBegin;
240   ierr = MatCreate(comm,A);CHKERRQ(ierr);
241   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
242   ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr);
243   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 
248 EXTERN_C_BEGIN
249 #undef __FUNCT__
250 #define __FUNCT__ "MatCreate_SeqCRL"
251 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
257   ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 EXTERN_C_END
261 
262