181824310SBarry Smith 281824310SBarry Smith /* 381824310SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 481824310SBarry Smith This class is derived from the MATSEQAIJ class and retains the 581824310SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 681824310SBarry Smith it with a column oriented storage that is more efficient for 781824310SBarry Smith matrix vector products on Vector machines. 881824310SBarry Smith 981824310SBarry Smith CRL stands for constant row length (that is the same number of columns 1081824310SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 1181824310SBarry Smith */ 12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h> 1381824310SBarry Smith 1481824310SBarry Smith #undef __FUNCT__ 155a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_SeqAIJCRL" 165a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A) 1781824310SBarry Smith { 1881824310SBarry Smith PetscErrorCode ierr; 195a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 2081824310SBarry Smith 215a11e1b2SBarry Smith /* Free everything in the Mat_AIJCRL data structure. */ 22bf0cc555SLisandro Dalcin if (aijcrl) { 235a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 24bf0cc555SLisandro Dalcin } 25c31cb41cSBarry Smith ierr = PetscFree(A->spptr);CHKERRQ(ierr); 2681824310SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 274723c594SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 2881824310SBarry Smith PetscFunctionReturn(0); 2981824310SBarry Smith } 3081824310SBarry Smith 315a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M) 3281824310SBarry Smith { 3381824310SBarry Smith PetscFunctionBegin; 345a11e1b2SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet"); 3581824310SBarry Smith PetscFunctionReturn(0); 3681824310SBarry Smith } 3781824310SBarry Smith 3881824310SBarry Smith #undef __FUNCT__ 3996b95a6bSBarry Smith #define __FUNCT__ "MatSeqAIJCRL_create_aijcrl" 4096b95a6bSBarry Smith PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A) 4181824310SBarry Smith { 4281824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)(A)->data; 435a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 44d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 4581824310SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 4681824310SBarry Smith PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 47dd6ea824SBarry Smith MatScalar *aa = a->a; 48dd6ea824SBarry Smith PetscScalar *acols; 4981824310SBarry Smith PetscErrorCode ierr; 5081824310SBarry Smith 5181824310SBarry Smith PetscFunctionBegin; 525a11e1b2SBarry Smith aijcrl->nz = a->nz; 535a11e1b2SBarry Smith aijcrl->m = A->rmap->n; 545a11e1b2SBarry Smith aijcrl->rmax = rmax; 552205254eSKarl Rupp 565a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 57*dcca6d9dSJed Brown ierr = PetscMalloc2(rmax*m,&aijcrl->acols,rmax*m,&aijcrl->icols);CHKERRQ(ierr); 585a11e1b2SBarry Smith acols = aijcrl->acols; 595a11e1b2SBarry Smith icols = aijcrl->icols; 6081824310SBarry Smith for (i=0; i<m; i++) { 6181824310SBarry Smith for (j=0; j<ilen[i]; j++) { 6281824310SBarry Smith acols[j*m+i] = *aa++; 6381824310SBarry Smith icols[j*m+i] = *aj++; 6481824310SBarry Smith } 6581824310SBarry Smith for (; j<rmax; j++) { /* empty column entries */ 6681824310SBarry Smith acols[j*m+i] = 0.0; 6781824310SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 6881824310SBarry Smith } 6981824310SBarry Smith } 70a2ea699eSBarry Smith ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);CHKERRQ(ierr); 7181824310SBarry Smith PetscFunctionReturn(0); 7281824310SBarry Smith } 7381824310SBarry Smith 744723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 754723c594SBarry Smith 7681824310SBarry Smith #undef __FUNCT__ 775a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqAIJCRL" 785a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode) 7981824310SBarry Smith { 8081824310SBarry Smith PetscErrorCode ierr; 8181824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8281824310SBarry Smith 83c02b24c5SBarry Smith PetscFunctionBegin; 8481824310SBarry Smith a->inode.use = PETSC_FALSE; 852205254eSKarl Rupp 864723c594SBarry Smith ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 874723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 8881824310SBarry Smith 8981824310SBarry Smith /* Now calculate the permutation and grouping information. */ 9096b95a6bSBarry Smith ierr = MatSeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr); 9181824310SBarry Smith PetscFunctionReturn(0); 9281824310SBarry Smith } 9381824310SBarry Smith 94c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h> 955aeacdfdSBarry Smith 9681824310SBarry Smith #undef __FUNCT__ 975a11e1b2SBarry Smith #define __FUNCT__ "MatMult_AIJCRL" 984723c594SBarry Smith /* 995a11e1b2SBarry Smith Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL 1004723c594SBarry Smith - the scatter is used only in the parallel version 1014723c594SBarry Smith 1024723c594SBarry Smith */ 1035a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy) 10481824310SBarry Smith { 1055a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 1065a11e1b2SBarry Smith PetscInt m = aijcrl->m; /* Number of rows in the matrix. */ 1075a11e1b2SBarry Smith PetscInt rmax = aijcrl->rmax,*icols = aijcrl->icols; 1085a11e1b2SBarry Smith PetscScalar *acols = aijcrl->acols; 10981824310SBarry Smith PetscErrorCode ierr; 11081824310SBarry Smith PetscScalar *x,*y; 11181824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 11281824310SBarry Smith PetscInt i,j,ii; 11381824310SBarry Smith #endif 11481824310SBarry Smith 11581824310SBarry Smith 11681824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 11781824310SBarry Smith #pragma disjoint(*x,*y,*aa) 11881824310SBarry Smith #endif 11981824310SBarry Smith 12081824310SBarry Smith PetscFunctionBegin; 1215a11e1b2SBarry Smith if (aijcrl->xscat) { 1225a11e1b2SBarry Smith ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr); 123c02b24c5SBarry Smith /* get remote values needed for local part of multiply */ 1245a11e1b2SBarry Smith ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1255a11e1b2SBarry Smith ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265a11e1b2SBarry Smith xx = aijcrl->xwork; 1272205254eSKarl Rupp } 128c02b24c5SBarry Smith 12981824310SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13081824310SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 13181824310SBarry Smith 13281824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 13381824310SBarry Smith fortranmultcrl_(&m,&rmax,x,y,icols,acols); 13481824310SBarry Smith #else 13581824310SBarry Smith 1364be65460SBarry Smith /* first column */ 1372205254eSKarl Rupp for (j=0; j<m; j++) y[j] = acols[j]*x[icols[j]]; 1384be65460SBarry Smith 1394be65460SBarry Smith /* other columns */ 140b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 14181824310SBarry Smith #pragma _CRI preferstream 14281824310SBarry Smith #endif 1434be65460SBarry Smith for (i=1; i<rmax; i++) { 14481824310SBarry Smith ii = i*m; 145b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 14681824310SBarry Smith #pragma _CRI prefervector 14781824310SBarry Smith #endif 1482205254eSKarl Rupp for (j=0; j<m; j++) y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 14981824310SBarry Smith } 150b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 15181824310SBarry Smith #pragma _CRI ivdep 15281824310SBarry Smith #endif 15381824310SBarry Smith 15481824310SBarry Smith #endif 1555a11e1b2SBarry Smith ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr); 15681824310SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15781824310SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15881824310SBarry Smith PetscFunctionReturn(0); 15981824310SBarry Smith } 16081824310SBarry Smith 16181824310SBarry Smith 1625a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a 1635a11e1b2SBarry Smith * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL() 16481824310SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 1655a11e1b2SBarry Smith * into a SeqAIJCRL one. */ 16681824310SBarry Smith #undef __FUNCT__ 1675a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJCRL" 1688cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 16981824310SBarry Smith { 17081824310SBarry Smith PetscErrorCode ierr; 17181824310SBarry Smith Mat B = *newmat; 1725a11e1b2SBarry Smith Mat_AIJCRL *aijcrl; 17381824310SBarry Smith 17481824310SBarry Smith PetscFunctionBegin; 17581824310SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 17681824310SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 17781824310SBarry Smith } 17881824310SBarry Smith 1795a11e1b2SBarry Smith ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr); 1805a11e1b2SBarry Smith B->spptr = (void*) aijcrl; 18181824310SBarry Smith 18217667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1835a11e1b2SBarry Smith B->ops->duplicate = MatDuplicate_AIJCRL; 1845a11e1b2SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL; 1855a11e1b2SBarry Smith B->ops->destroy = MatDestroy_SeqAIJCRL; 1865a11e1b2SBarry Smith B->ops->mult = MatMult_AIJCRL; 18781824310SBarry Smith 18881824310SBarry Smith /* If A has already been assembled, compute the permutation. */ 1894eeb8337SBarry Smith if (A->assembled) { 19096b95a6bSBarry Smith ierr = MatSeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr); 19181824310SBarry Smith } 1925a11e1b2SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr); 19381824310SBarry Smith *newmat = B; 19481824310SBarry Smith PetscFunctionReturn(0); 19581824310SBarry Smith } 19681824310SBarry Smith 19781824310SBarry Smith #undef __FUNCT__ 1985a11e1b2SBarry Smith #define __FUNCT__ "MatCreateSeqAIJCRL" 19981824310SBarry Smith /*@C 2005a11e1b2SBarry Smith MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL. 20181824310SBarry Smith This type inherits from AIJ, but stores some additional 20281824310SBarry Smith information that is used to allow better vectorization of 20381824310SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 20481824310SBarry Smith matrix can be copied to a format in which pieces of the matrix are 20581824310SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 20681824310SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 20781824310SBarry Smith important to preallocate matrix storage in order to get good assembly 20881824310SBarry Smith performance. 20981824310SBarry Smith 21081824310SBarry Smith Collective on MPI_Comm 21181824310SBarry Smith 21281824310SBarry Smith Input Parameters: 21381824310SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 21481824310SBarry Smith . m - number of rows 21581824310SBarry Smith . n - number of columns 21681824310SBarry Smith . nz - number of nonzeros per row (same for all rows) 21781824310SBarry Smith - nnz - array containing the number of nonzeros in the various rows 2180298fd71SBarry Smith (possibly different for each row) or NULL 21981824310SBarry Smith 22081824310SBarry Smith Output Parameter: 22181824310SBarry Smith . A - the matrix 22281824310SBarry Smith 22381824310SBarry Smith Notes: 22481824310SBarry Smith If nnz is given then nz is ignored 22581824310SBarry Smith 22681824310SBarry Smith Level: intermediate 22781824310SBarry Smith 22881824310SBarry Smith .keywords: matrix, cray, sparse, parallel 22981824310SBarry Smith 2305a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues() 23181824310SBarry Smith @*/ 2327087cfbeSBarry Smith PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 23381824310SBarry Smith { 23481824310SBarry Smith PetscErrorCode ierr; 23581824310SBarry Smith 23681824310SBarry Smith PetscFunctionBegin; 23781824310SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 23881824310SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2395a11e1b2SBarry Smith ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr); 240d28bb7d2SJed Brown ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 24181824310SBarry Smith PetscFunctionReturn(0); 24281824310SBarry Smith } 24381824310SBarry Smith 24481824310SBarry Smith #undef __FUNCT__ 2455a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_SeqAIJCRL" 2468cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A) 24781824310SBarry Smith { 24881824310SBarry Smith PetscErrorCode ierr; 24981824310SBarry Smith 25081824310SBarry Smith PetscFunctionBegin; 25181824310SBarry Smith ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 2525a11e1b2SBarry Smith ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 25381824310SBarry Smith PetscFunctionReturn(0); 25481824310SBarry Smith } 255b2573a8aSBarry Smith 25681824310SBarry Smith 257