1 2 /* 3 Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but augments 6 it with a column oriented storage that is more efficient for 7 matrix vector products on Vector machines. 8 9 CRL stands for constant row length (that is the same number of columns 10 is kept (padded with zeros) for each row of the sparse matrix. 11 */ 12 #include <../src/mat/impls/aij/seq/crl/crl.h> 13 14 PetscErrorCode MatDestroy_SeqAIJCRL(Mat A) 15 { 16 Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 17 18 PetscFunctionBegin; 19 /* Free everything in the Mat_AIJCRL data structure. */ 20 if (aijcrl) PetscCall(PetscFree2(aijcrl->acols,aijcrl->icols)); 21 PetscCall(PetscFree(A->spptr)); 22 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 23 PetscCall(MatDestroy_SeqAIJ(A)); 24 PetscFunctionReturn(0); 25 } 26 27 PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M) 28 { 29 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet"); 30 } 31 32 PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A) 33 { 34 Mat_SeqAIJ *a = (Mat_SeqAIJ*)(A)->data; 35 Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 36 PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 37 PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 38 PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 39 MatScalar *aa = a->a; 40 PetscScalar *acols; 41 42 PetscFunctionBegin; 43 aijcrl->nz = a->nz; 44 aijcrl->m = A->rmap->n; 45 aijcrl->rmax = rmax; 46 47 PetscCall(PetscFree2(aijcrl->acols,aijcrl->icols)); 48 PetscCall(PetscMalloc2(rmax*m,&aijcrl->acols,rmax*m,&aijcrl->icols)); 49 acols = aijcrl->acols; 50 icols = aijcrl->icols; 51 for (i=0; i<m; i++) { 52 for (j=0; j<ilen[i]; j++) { 53 acols[j*m+i] = *aa++; 54 icols[j*m+i] = *aj++; 55 } 56 for (; j<rmax; j++) { /* empty column entries */ 57 acols[j*m+i] = 0.0; 58 icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 59 } 60 } 61 PetscCall(PetscInfo(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode) 66 { 67 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 68 69 PetscFunctionBegin; 70 a->inode.use = PETSC_FALSE; 71 72 PetscCall(MatAssemblyEnd_SeqAIJ(A,mode)); 73 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 74 75 /* Now calculate the permutation and grouping information. */ 76 PetscCall(MatSeqAIJCRL_create_aijcrl(A)); 77 PetscFunctionReturn(0); 78 } 79 80 #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h> 81 82 /* 83 Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL 84 - the scatter is used only in the parallel version 85 86 */ 87 PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy) 88 { 89 Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 90 PetscInt m = aijcrl->m; /* Number of rows in the matrix. */ 91 PetscInt rmax = aijcrl->rmax,*icols = aijcrl->icols; 92 PetscScalar *acols = aijcrl->acols; 93 PetscScalar *y; 94 const PetscScalar *x; 95 #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 96 PetscInt i,j,ii; 97 #endif 98 99 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 100 #pragma disjoint(*x,*y,*aa) 101 #endif 102 103 PetscFunctionBegin; 104 if (aijcrl->xscat) { 105 PetscCall(VecCopy(xx,aijcrl->xwork)); 106 /* get remote values needed for local part of multiply */ 107 PetscCall(VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD)); 108 PetscCall(VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD)); 109 xx = aijcrl->xwork; 110 } 111 112 PetscCall(VecGetArrayRead(xx,&x)); 113 PetscCall(VecGetArray(yy,&y)); 114 115 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 116 fortranmultcrl_(&m,&rmax,x,y,icols,acols); 117 #else 118 119 /* first column */ 120 for (j=0; j<m; j++) y[j] = acols[j]*x[icols[j]]; 121 122 /* other columns */ 123 #if defined(PETSC_HAVE_CRAY_VECTOR) 124 #pragma _CRI preferstream 125 #endif 126 for (i=1; i<rmax; i++) { 127 ii = i*m; 128 #if defined(PETSC_HAVE_CRAY_VECTOR) 129 #pragma _CRI prefervector 130 #endif 131 for (j=0; j<m; j++) y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 132 } 133 #if defined(PETSC_HAVE_CRAY_VECTOR) 134 #pragma _CRI ivdep 135 #endif 136 137 #endif 138 PetscCall(PetscLogFlops(2.0*aijcrl->nz - m)); 139 PetscCall(VecRestoreArrayRead(xx,&x)); 140 PetscCall(VecRestoreArray(yy,&y)); 141 PetscFunctionReturn(0); 142 } 143 144 /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a 145 * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL() 146 * routine, but can also be used to convert an assembled SeqAIJ matrix 147 * into a SeqAIJCRL one. */ 148 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 149 { 150 Mat B = *newmat; 151 Mat_AIJCRL *aijcrl; 152 PetscBool sametype; 153 154 PetscFunctionBegin; 155 if (reuse == MAT_INITIAL_MATRIX) { 156 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 157 } 158 PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 159 if (sametype) PetscFunctionReturn(0); 160 161 PetscCall(PetscNewLog(B,&aijcrl)); 162 B->spptr = (void*) aijcrl; 163 164 /* Set function pointers for methods that we inherit from AIJ but override. */ 165 B->ops->duplicate = MatDuplicate_AIJCRL; 166 B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL; 167 B->ops->destroy = MatDestroy_SeqAIJCRL; 168 B->ops->mult = MatMult_AIJCRL; 169 170 /* If A has already been assembled, compute the permutation. */ 171 if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B)); 172 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL)); 173 *newmat = B; 174 PetscFunctionReturn(0); 175 } 176 177 /*@C 178 MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL. 179 This type inherits from AIJ, but stores some additional 180 information that is used to allow better vectorization of 181 the matrix-vector product. At the cost of increased storage, the AIJ formatted 182 matrix can be copied to a format in which pieces of the matrix are 183 stored in ELLPACK format, allowing the vectorized matrix multiply 184 routine to use stride-1 memory accesses. As with the AIJ type, it is 185 important to preallocate matrix storage in order to get good assembly 186 performance. 187 188 Collective 189 190 Input Parameters: 191 + comm - MPI communicator, set to PETSC_COMM_SELF 192 . m - number of rows 193 . n - number of columns 194 . nz - number of nonzeros per row (same for all rows) 195 - nnz - array containing the number of nonzeros in the various rows 196 (possibly different for each row) or NULL 197 198 Output Parameter: 199 . A - the matrix 200 201 Notes: 202 If nnz is given then nz is ignored 203 204 Level: intermediate 205 206 .seealso: `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()` 207 @*/ 208 PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 209 { 210 PetscFunctionBegin; 211 PetscCall(MatCreate(comm,A)); 212 PetscCall(MatSetSizes(*A,m,n,m,n)); 213 PetscCall(MatSetType(*A,MATSEQAIJCRL)); 214 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz)); 215 PetscFunctionReturn(0); 216 } 217 218 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A) 219 { 220 PetscFunctionBegin; 221 PetscCall(MatSetType(A,MATSEQAIJ)); 222 PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_INPLACE_MATRIX,&A)); 223 PetscFunctionReturn(0); 224 } 225