181824310SBarry Smith #define PETSCMAT_DLL 281824310SBarry Smith 381824310SBarry Smith /* 481824310SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 581824310SBarry Smith This class is derived from the MATSEQAIJ class and retains the 681824310SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 781824310SBarry Smith it with a column oriented storage that is more efficient for 881824310SBarry Smith matrix vector products on Vector machines. 981824310SBarry Smith 1081824310SBarry Smith CRL stands for constant row length (that is the same number of columns 1181824310SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 1281824310SBarry Smith */ 13c02b24c5SBarry Smith #include "src/mat/impls/aij/seq/crl/crl.h" 1481824310SBarry Smith 1581824310SBarry Smith #undef __FUNCT__ 1681824310SBarry Smith #define __FUNCT__ "MatDestroy_SeqCRL" 1781824310SBarry Smith PetscErrorCode MatDestroy_SeqCRL(Mat A) 1881824310SBarry Smith { 1981824310SBarry Smith PetscErrorCode ierr; 20c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL *) A->spptr; 2181824310SBarry Smith 2281824310SBarry Smith /* We are going to convert A back into a SEQAIJ matrix, since we are 23c02b24c5SBarry Smith * eventually going to use MatDestroy() to destroy everything 2481824310SBarry Smith * that is not specific to CRL. 2581824310SBarry Smith * In preparation for this, reset the operations pointers in A to 2681824310SBarry Smith * their SeqAIJ versions. */ 27c02b24c5SBarry Smith A->ops->assemblyend = crl->AssemblyEnd; 28c02b24c5SBarry Smith A->ops->destroy = crl->MatDestroy; 29c02b24c5SBarry Smith A->ops->duplicate = crl->MatDuplicate; 3081824310SBarry Smith 31c02b24c5SBarry Smith /* Free everything in the Mat_CRL data structure. */ 3281824310SBarry Smith ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr); 3305b42c5fSBarry Smith 34c02b24c5SBarry Smith /* Change the type of A back to SEQAIJ and use MatDestroy() 3581824310SBarry Smith * to destroy everything that remains. */ 3681824310SBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 3781824310SBarry Smith /* Note that I don't call MatSetType(). I believe this is because that 3881824310SBarry Smith * is only to be called when *building* a matrix. */ 3981824310SBarry Smith ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 4081824310SBarry Smith PetscFunctionReturn(0); 4181824310SBarry Smith } 4281824310SBarry Smith 43c02b24c5SBarry Smith PetscErrorCode MatDuplicate_CRL(Mat A, MatDuplicateOption op, Mat *M) 4481824310SBarry Smith { 4581824310SBarry Smith PetscErrorCode ierr; 46c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL *) A->spptr; 4781824310SBarry Smith 4881824310SBarry Smith PetscFunctionBegin; 49c02b24c5SBarry Smith ierr = (*crl->MatDuplicate)(A,op,M);CHKERRQ(ierr); 5081824310SBarry Smith SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet"); 5181824310SBarry Smith PetscFunctionReturn(0); 5281824310SBarry Smith } 5381824310SBarry Smith 5481824310SBarry Smith #undef __FUNCT__ 5581824310SBarry Smith #define __FUNCT__ "SeqCRL_create_crl" 5681824310SBarry Smith PetscErrorCode SeqCRL_create_crl(Mat A) 5781824310SBarry Smith { 5881824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 59c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 60899cda47SBarry Smith PetscInt m = A->rmap.n; /* Number of rows in the matrix. */ 6181824310SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 6281824310SBarry Smith PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 6381824310SBarry Smith PetscScalar *aa = a->a,*acols; 6481824310SBarry Smith PetscErrorCode ierr; 6581824310SBarry Smith 6681824310SBarry Smith PetscFunctionBegin; 67c02b24c5SBarry Smith crl->nz = a->nz; 68899cda47SBarry Smith crl->m = A->rmap.n; 69c02b24c5SBarry Smith crl->rmax = rmax; 7081824310SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr); 7181824310SBarry Smith acols = crl->acols; 7281824310SBarry Smith icols = crl->icols; 7381824310SBarry Smith for (i=0; i<m; i++) { 7481824310SBarry Smith for (j=0; j<ilen[i]; j++) { 7581824310SBarry Smith acols[j*m+i] = *aa++; 7681824310SBarry Smith icols[j*m+i] = *aj++; 7781824310SBarry Smith } 7881824310SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 7981824310SBarry Smith acols[j*m+i] = 0.0; 8081824310SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 8181824310SBarry Smith } 8281824310SBarry Smith } 83ae15b995SBarry Smith ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax); 8481824310SBarry Smith PetscFunctionReturn(0); 8581824310SBarry Smith } 8681824310SBarry Smith 8781824310SBarry Smith #undef __FUNCT__ 8881824310SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqCRL" 8981824310SBarry Smith PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode) 9081824310SBarry Smith { 9181824310SBarry Smith PetscErrorCode ierr; 92c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 9381824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 9481824310SBarry Smith 95c02b24c5SBarry Smith PetscFunctionBegin; 9681824310SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 9781824310SBarry Smith 9881824310SBarry Smith /* Since a MATSEQCRL matrix is really just a MATSEQAIJ with some 9981824310SBarry Smith * extra information, call the AssemblyEnd routine for a MATSEQAIJ. 10081824310SBarry Smith * I'm not sure if this is the best way to do this, but it avoids 10181824310SBarry Smith * a lot of code duplication. 10281824310SBarry Smith * I also note that currently MATSEQCRL doesn't know anything about 10381824310SBarry Smith * the Mat_CompressedRow data structure that SeqAIJ now uses when there 10481824310SBarry Smith * are many zero rows. If the SeqAIJ assembly end routine decides to use 10581824310SBarry Smith * this, this may break things. (Don't know... haven't looked at it.) */ 10681824310SBarry Smith a->inode.use = PETSC_FALSE; 107c02b24c5SBarry Smith (*crl->AssemblyEnd)(A, mode); 10881824310SBarry Smith 10981824310SBarry Smith /* Now calculate the permutation and grouping information. */ 11081824310SBarry Smith ierr = SeqCRL_create_crl(A);CHKERRQ(ierr); 11181824310SBarry Smith PetscFunctionReturn(0); 11281824310SBarry Smith } 11381824310SBarry Smith 1145aeacdfdSBarry Smith #include "src/inline/dot.h" 1155aeacdfdSBarry Smith 11681824310SBarry Smith #undef __FUNCT__ 117c02b24c5SBarry Smith #define __FUNCT__ "MatMult_CRL" 118c02b24c5SBarry Smith PetscErrorCode MatMult_CRL(Mat A,Vec xx,Vec yy) 11981824310SBarry Smith { 120c02b24c5SBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 121c02b24c5SBarry Smith PetscInt m = crl->m; /* Number of rows in the matrix. */ 122c02b24c5SBarry Smith PetscInt rmax = crl->rmax,*icols = crl->icols; 12381824310SBarry Smith PetscScalar *acols = crl->acols; 12481824310SBarry Smith PetscErrorCode ierr; 12581824310SBarry Smith PetscScalar *x,*y; 12681824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 12781824310SBarry Smith PetscInt i,j,ii; 12881824310SBarry Smith #endif 12981824310SBarry Smith 13081824310SBarry Smith 13181824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 13281824310SBarry Smith #pragma disjoint(*x,*y,*aa) 13381824310SBarry Smith #endif 13481824310SBarry Smith 13581824310SBarry Smith PetscFunctionBegin; 136c02b24c5SBarry Smith if (crl->xscat) { 137c02b24c5SBarry Smith ierr = VecCopy(xx,crl->xwork);CHKERRQ(ierr); 138c02b24c5SBarry Smith /* get remote values needed for local part of multiply */ 139ca9f406cSSatish Balay ierr = VecScatterBegin(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 140ca9f406cSSatish Balay ierr = VecScatterEnd(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 141c02b24c5SBarry Smith xx = crl->xwork; 142c02b24c5SBarry Smith }; 143c02b24c5SBarry Smith 14481824310SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 14581824310SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 14681824310SBarry Smith 14781824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 14881824310SBarry Smith fortranmultcrl_(&m,&rmax,x,y,icols,acols); 14981824310SBarry Smith #else 15081824310SBarry Smith 1514be65460SBarry Smith /* first column */ 1524be65460SBarry Smith for (j=0; j<m; j++) { 1534be65460SBarry Smith y[j] = acols[j]*x[icols[j]]; 1544be65460SBarry Smith } 1554be65460SBarry Smith 1564be65460SBarry Smith /* other columns */ 15781824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 15881824310SBarry Smith #pragma _CRI preferstream 15981824310SBarry Smith #endif 1604be65460SBarry Smith for (i=1; i<rmax; i++) { 16181824310SBarry Smith ii = i*m; 16281824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 16381824310SBarry Smith #pragma _CRI prefervector 16481824310SBarry Smith #endif 16581824310SBarry Smith for (j=0; j<m; j++) { 16681824310SBarry Smith y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 16781824310SBarry Smith } 16881824310SBarry Smith } 16981824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 17081824310SBarry Smith #pragma _CRI ivdep 17181824310SBarry Smith #endif 17281824310SBarry Smith 17381824310SBarry Smith #endif 174483e0693SBarry Smith ierr = PetscLogFlops(2*crl->nz - m);CHKERRQ(ierr); 17581824310SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 17681824310SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 17781824310SBarry Smith PetscFunctionReturn(0); 17881824310SBarry Smith } 17981824310SBarry Smith 18081824310SBarry Smith 18181824310SBarry Smith /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a 18281824310SBarry Smith * SeqCRL matrix. This routine is called by the MatCreate_SeqCRL() 18381824310SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 18481824310SBarry Smith * into a SeqCRL one. */ 18581824310SBarry Smith EXTERN_C_BEGIN 18681824310SBarry Smith #undef __FUNCT__ 18781824310SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL" 18881824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 18981824310SBarry Smith { 19081824310SBarry Smith PetscErrorCode ierr; 19181824310SBarry Smith Mat B = *newmat; 192c02b24c5SBarry Smith Mat_CRL *crl; 19381824310SBarry Smith 19481824310SBarry Smith PetscFunctionBegin; 19581824310SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 19681824310SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 19781824310SBarry Smith } 19881824310SBarry Smith 199*38f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_CRL,&crl);CHKERRQ(ierr); 20081824310SBarry Smith B->spptr = (void *) crl; 20181824310SBarry Smith 202c02b24c5SBarry Smith crl->AssemblyEnd = A->ops->assemblyend; 203c02b24c5SBarry Smith crl->MatDestroy = A->ops->destroy; 204c02b24c5SBarry Smith crl->MatDuplicate = A->ops->duplicate; 20581824310SBarry Smith 20617667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 207c02b24c5SBarry Smith B->ops->duplicate = MatDuplicate_CRL; 20881824310SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqCRL; 20981824310SBarry Smith B->ops->destroy = MatDestroy_SeqCRL; 210c02b24c5SBarry Smith B->ops->mult = MatMult_CRL; 21181824310SBarry Smith 21281824310SBarry Smith /* If A has already been assembled, compute the permutation. */ 21381824310SBarry Smith if (A->assembled == PETSC_TRUE) { 21481824310SBarry Smith ierr = SeqCRL_create_crl(B);CHKERRQ(ierr); 21581824310SBarry Smith } 21681824310SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr); 21781824310SBarry Smith *newmat = B; 21881824310SBarry Smith PetscFunctionReturn(0); 21981824310SBarry Smith } 22081824310SBarry Smith EXTERN_C_END 22181824310SBarry Smith 22281824310SBarry Smith 22381824310SBarry Smith #undef __FUNCT__ 22481824310SBarry Smith #define __FUNCT__ "MatCreateSeqCRL" 22581824310SBarry Smith /*@C 22681824310SBarry Smith MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL. 22781824310SBarry Smith This type inherits from AIJ, but stores some additional 22881824310SBarry Smith information that is used to allow better vectorization of 22981824310SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 23081824310SBarry Smith matrix can be copied to a format in which pieces of the matrix are 23181824310SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 23281824310SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 23381824310SBarry Smith important to preallocate matrix storage in order to get good assembly 23481824310SBarry Smith performance. 23581824310SBarry Smith 23681824310SBarry Smith Collective on MPI_Comm 23781824310SBarry Smith 23881824310SBarry Smith Input Parameters: 23981824310SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 24081824310SBarry Smith . m - number of rows 24181824310SBarry Smith . n - number of columns 24281824310SBarry Smith . nz - number of nonzeros per row (same for all rows) 24381824310SBarry Smith - nnz - array containing the number of nonzeros in the various rows 24481824310SBarry Smith (possibly different for each row) or PETSC_NULL 24581824310SBarry Smith 24681824310SBarry Smith Output Parameter: 24781824310SBarry Smith . A - the matrix 24881824310SBarry Smith 24981824310SBarry Smith Notes: 25081824310SBarry Smith If nnz is given then nz is ignored 25181824310SBarry Smith 25281824310SBarry Smith Level: intermediate 25381824310SBarry Smith 25481824310SBarry Smith .keywords: matrix, cray, sparse, parallel 25581824310SBarry Smith 25681824310SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues() 25781824310SBarry Smith @*/ 25881824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 25981824310SBarry Smith { 26081824310SBarry Smith PetscErrorCode ierr; 26181824310SBarry Smith 26281824310SBarry Smith PetscFunctionBegin; 26381824310SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 26481824310SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 26581824310SBarry Smith ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr); 26681824310SBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 26781824310SBarry Smith PetscFunctionReturn(0); 26881824310SBarry Smith } 26981824310SBarry Smith 27081824310SBarry Smith 27181824310SBarry Smith EXTERN_C_BEGIN 27281824310SBarry Smith #undef __FUNCT__ 27381824310SBarry Smith #define __FUNCT__ "MatCreate_SeqCRL" 27481824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A) 27581824310SBarry Smith { 27681824310SBarry Smith PetscErrorCode ierr; 27781824310SBarry Smith 27881824310SBarry Smith PetscFunctionBegin; 27981824310SBarry Smith ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 28081824310SBarry Smith ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 28181824310SBarry Smith PetscFunctionReturn(0); 28281824310SBarry Smith } 28381824310SBarry Smith EXTERN_C_END 28481824310SBarry Smith 285