181824310SBarry Smith #define PETSCMAT_DLL 281824310SBarry Smith 381824310SBarry Smith /* 481824310SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 581824310SBarry Smith This class is derived from the MATSEQAIJ class and retains the 681824310SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 781824310SBarry Smith it with a column oriented storage that is more efficient for 881824310SBarry Smith matrix vector products on Vector machines. 981824310SBarry Smith 1081824310SBarry Smith CRL stands for constant row length (that is the same number of columns 1181824310SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 1281824310SBarry Smith */ 137c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/crl/crl.h" 1481824310SBarry Smith 1581824310SBarry Smith #undef __FUNCT__ 1681824310SBarry Smith #define __FUNCT__ "MatDestroy_SeqCRL" 1781824310SBarry Smith PetscErrorCode MatDestroy_SeqCRL(Mat A) 1881824310SBarry Smith { 1981824310SBarry Smith PetscErrorCode ierr; 20c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL *) A->spptr; 2181824310SBarry Smith 22c02b24c5SBarry Smith /* Free everything in the Mat_CRL data structure. */ 2381824310SBarry Smith ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr); 2405b42c5fSBarry Smith 2581824310SBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 26*4723c594SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 2781824310SBarry Smith PetscFunctionReturn(0); 2881824310SBarry Smith } 2981824310SBarry Smith 30c02b24c5SBarry Smith PetscErrorCode MatDuplicate_CRL(Mat A, MatDuplicateOption op, Mat *M) 3181824310SBarry Smith { 3281824310SBarry Smith PetscFunctionBegin; 3381824310SBarry Smith SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet"); 3481824310SBarry Smith PetscFunctionReturn(0); 3581824310SBarry Smith } 3681824310SBarry Smith 3781824310SBarry Smith #undef __FUNCT__ 3881824310SBarry Smith #define __FUNCT__ "SeqCRL_create_crl" 3981824310SBarry Smith PetscErrorCode SeqCRL_create_crl(Mat A) 4081824310SBarry Smith { 4181824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 42c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 43d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 4481824310SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 4581824310SBarry Smith PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 46dd6ea824SBarry Smith MatScalar *aa = a->a; 47dd6ea824SBarry Smith PetscScalar *acols; 4881824310SBarry Smith PetscErrorCode ierr; 4981824310SBarry Smith 5081824310SBarry Smith PetscFunctionBegin; 51c02b24c5SBarry Smith crl->nz = a->nz; 52d0f46423SBarry Smith crl->m = A->rmap->n; 53c02b24c5SBarry Smith crl->rmax = rmax; 5481824310SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr); 5581824310SBarry Smith acols = crl->acols; 5681824310SBarry Smith icols = crl->icols; 5781824310SBarry Smith for (i=0; i<m; i++) { 5881824310SBarry Smith for (j=0; j<ilen[i]; j++) { 5981824310SBarry Smith acols[j*m+i] = *aa++; 6081824310SBarry Smith icols[j*m+i] = *aj++; 6181824310SBarry Smith } 6281824310SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 6381824310SBarry Smith acols[j*m+i] = 0.0; 6481824310SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 6581824310SBarry Smith } 6681824310SBarry Smith } 67ae15b995SBarry Smith ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax); 6881824310SBarry Smith PetscFunctionReturn(0); 6981824310SBarry Smith } 7081824310SBarry Smith 71*4723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 72*4723c594SBarry Smith 7381824310SBarry Smith #undef __FUNCT__ 7481824310SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqCRL" 7581824310SBarry Smith PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode) 7681824310SBarry Smith { 7781824310SBarry Smith PetscErrorCode ierr; 7881824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7981824310SBarry Smith 80c02b24c5SBarry Smith PetscFunctionBegin; 8181824310SBarry Smith a->inode.use = PETSC_FALSE; 82*4723c594SBarry Smith ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 83*4723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 8481824310SBarry Smith 8581824310SBarry Smith /* Now calculate the permutation and grouping information. */ 8681824310SBarry Smith ierr = SeqCRL_create_crl(A);CHKERRQ(ierr); 8781824310SBarry Smith PetscFunctionReturn(0); 8881824310SBarry Smith } 8981824310SBarry Smith 90a4005a5dSBarry Smith #include "../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h" 915aeacdfdSBarry Smith 9281824310SBarry Smith #undef __FUNCT__ 93c02b24c5SBarry Smith #define __FUNCT__ "MatMult_CRL" 94*4723c594SBarry Smith /* 95*4723c594SBarry Smith Shared by both sequential and parallel versions of CRL matrix: MATMPICRL and MATSEQCRL 96*4723c594SBarry Smith - the scatter is used only in the parallel version 97*4723c594SBarry Smith 98*4723c594SBarry Smith */ 99c02b24c5SBarry Smith PetscErrorCode MatMult_CRL(Mat A,Vec xx,Vec yy) 10081824310SBarry Smith { 101c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 102c02b24c5SBarry Smith PetscInt m = crl->m; /* Number of rows in the matrix. */ 103c02b24c5SBarry Smith PetscInt rmax = crl->rmax,*icols = crl->icols; 10481824310SBarry Smith PetscScalar *acols = crl->acols; 10581824310SBarry Smith PetscErrorCode ierr; 10681824310SBarry Smith PetscScalar *x,*y; 10781824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 10881824310SBarry Smith PetscInt i,j,ii; 10981824310SBarry Smith #endif 11081824310SBarry Smith 11181824310SBarry Smith 11281824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 11381824310SBarry Smith #pragma disjoint(*x,*y,*aa) 11481824310SBarry Smith #endif 11581824310SBarry Smith 11681824310SBarry Smith PetscFunctionBegin; 117c02b24c5SBarry Smith if (crl->xscat) { 118c02b24c5SBarry Smith ierr = VecCopy(xx,crl->xwork);CHKERRQ(ierr); 119c02b24c5SBarry Smith /* get remote values needed for local part of multiply */ 120ca9f406cSSatish Balay ierr = VecScatterBegin(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 121ca9f406cSSatish Balay ierr = VecScatterEnd(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122c02b24c5SBarry Smith xx = crl->xwork; 123c02b24c5SBarry Smith }; 124c02b24c5SBarry Smith 12581824310SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12681824310SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 12781824310SBarry Smith 12881824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 12981824310SBarry Smith fortranmultcrl_(&m,&rmax,x,y,icols,acols); 13081824310SBarry Smith #else 13181824310SBarry Smith 1324be65460SBarry Smith /* first column */ 1334be65460SBarry Smith for (j=0; j<m; j++) { 1344be65460SBarry Smith y[j] = acols[j]*x[icols[j]]; 1354be65460SBarry Smith } 1364be65460SBarry Smith 1374be65460SBarry Smith /* other columns */ 13881824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 13981824310SBarry Smith #pragma _CRI preferstream 14081824310SBarry Smith #endif 1414be65460SBarry Smith for (i=1; i<rmax; i++) { 14281824310SBarry Smith ii = i*m; 14381824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 14481824310SBarry Smith #pragma _CRI prefervector 14581824310SBarry Smith #endif 14681824310SBarry Smith for (j=0; j<m; j++) { 14781824310SBarry Smith y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 14881824310SBarry Smith } 14981824310SBarry Smith } 15081824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 15181824310SBarry Smith #pragma _CRI ivdep 15281824310SBarry Smith #endif 15381824310SBarry Smith 15481824310SBarry Smith #endif 155dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*crl->nz - m);CHKERRQ(ierr); 15681824310SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15781824310SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15881824310SBarry Smith PetscFunctionReturn(0); 15981824310SBarry Smith } 16081824310SBarry Smith 16181824310SBarry Smith 16281824310SBarry Smith /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a 16381824310SBarry Smith * SeqCRL matrix. This routine is called by the MatCreate_SeqCRL() 16481824310SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 16581824310SBarry Smith * into a SeqCRL one. */ 16681824310SBarry Smith EXTERN_C_BEGIN 16781824310SBarry Smith #undef __FUNCT__ 16881824310SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL" 1698cf70c4bSSatish Balay PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 17081824310SBarry Smith { 17181824310SBarry Smith PetscErrorCode ierr; 17281824310SBarry Smith Mat B = *newmat; 173c02b24c5SBarry Smith Mat_CRL *crl; 17481824310SBarry Smith 17581824310SBarry Smith PetscFunctionBegin; 17681824310SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 17781824310SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 17881824310SBarry Smith } 17981824310SBarry Smith 18038f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_CRL,&crl);CHKERRQ(ierr); 18181824310SBarry Smith B->spptr = (void *) crl; 18281824310SBarry Smith 18317667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 184c02b24c5SBarry Smith B->ops->duplicate = MatDuplicate_CRL; 18581824310SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqCRL; 18681824310SBarry Smith B->ops->destroy = MatDestroy_SeqCRL; 187c02b24c5SBarry Smith B->ops->mult = MatMult_CRL; 18881824310SBarry Smith 18981824310SBarry Smith /* If A has already been assembled, compute the permutation. */ 1904eeb8337SBarry Smith if (A->assembled) { 19181824310SBarry Smith ierr = SeqCRL_create_crl(B);CHKERRQ(ierr); 19281824310SBarry Smith } 19381824310SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr); 19481824310SBarry Smith *newmat = B; 19581824310SBarry Smith PetscFunctionReturn(0); 19681824310SBarry Smith } 19781824310SBarry Smith EXTERN_C_END 19881824310SBarry Smith 19981824310SBarry Smith 20081824310SBarry Smith #undef __FUNCT__ 20181824310SBarry Smith #define __FUNCT__ "MatCreateSeqCRL" 20281824310SBarry Smith /*@C 20381824310SBarry Smith MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL. 20481824310SBarry Smith This type inherits from AIJ, but stores some additional 20581824310SBarry Smith information that is used to allow better vectorization of 20681824310SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 20781824310SBarry Smith matrix can be copied to a format in which pieces of the matrix are 20881824310SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 20981824310SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 21081824310SBarry Smith important to preallocate matrix storage in order to get good assembly 21181824310SBarry Smith performance. 21281824310SBarry Smith 21381824310SBarry Smith Collective on MPI_Comm 21481824310SBarry Smith 21581824310SBarry Smith Input Parameters: 21681824310SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 21781824310SBarry Smith . m - number of rows 21881824310SBarry Smith . n - number of columns 21981824310SBarry Smith . nz - number of nonzeros per row (same for all rows) 22081824310SBarry Smith - nnz - array containing the number of nonzeros in the various rows 22181824310SBarry Smith (possibly different for each row) or PETSC_NULL 22281824310SBarry Smith 22381824310SBarry Smith Output Parameter: 22481824310SBarry Smith . A - the matrix 22581824310SBarry Smith 22681824310SBarry Smith Notes: 22781824310SBarry Smith If nnz is given then nz is ignored 22881824310SBarry Smith 22981824310SBarry Smith Level: intermediate 23081824310SBarry Smith 23181824310SBarry Smith .keywords: matrix, cray, sparse, parallel 23281824310SBarry Smith 23381824310SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues() 23481824310SBarry Smith @*/ 23581824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 23681824310SBarry Smith { 23781824310SBarry Smith PetscErrorCode ierr; 23881824310SBarry Smith 23981824310SBarry Smith PetscFunctionBegin; 24081824310SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 24181824310SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 24281824310SBarry Smith ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr); 24381824310SBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 24481824310SBarry Smith PetscFunctionReturn(0); 24581824310SBarry Smith } 24681824310SBarry Smith 24781824310SBarry Smith 24881824310SBarry Smith EXTERN_C_BEGIN 24981824310SBarry Smith #undef __FUNCT__ 25081824310SBarry Smith #define __FUNCT__ "MatCreate_SeqCRL" 25181824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A) 25281824310SBarry Smith { 25381824310SBarry Smith PetscErrorCode ierr; 25481824310SBarry Smith 25581824310SBarry Smith PetscFunctionBegin; 25681824310SBarry Smith ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 25781824310SBarry Smith ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 25881824310SBarry Smith PetscFunctionReturn(0); 25981824310SBarry Smith } 26081824310SBarry Smith EXTERN_C_END 26181824310SBarry Smith 262