181824310SBarry Smith 281824310SBarry Smith /* 381824310SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 481824310SBarry Smith This class is derived from the MATSEQAIJ class and retains the 581824310SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 681824310SBarry Smith it with a column oriented storage that is more efficient for 781824310SBarry Smith matrix vector products on Vector machines. 881824310SBarry Smith 981824310SBarry Smith CRL stands for constant row length (that is the same number of columns 1081824310SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 1181824310SBarry Smith */ 12*c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h> 1381824310SBarry Smith 1481824310SBarry Smith #undef __FUNCT__ 155a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_SeqAIJCRL" 165a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A) 1781824310SBarry Smith { 1881824310SBarry Smith PetscErrorCode ierr; 195a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL *) A->spptr; 2081824310SBarry Smith 215a11e1b2SBarry Smith /* Free everything in the Mat_AIJCRL data structure. */ 225a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 23c31cb41cSBarry Smith ierr = PetscFree(A->spptr);CHKERRQ(ierr); 2405b42c5fSBarry Smith 2581824310SBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 264723c594SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 2781824310SBarry Smith PetscFunctionReturn(0); 2881824310SBarry Smith } 2981824310SBarry Smith 305a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M) 3181824310SBarry Smith { 3281824310SBarry Smith PetscFunctionBegin; 335a11e1b2SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet"); 3481824310SBarry Smith PetscFunctionReturn(0); 3581824310SBarry Smith } 3681824310SBarry Smith 3781824310SBarry Smith #undef __FUNCT__ 385a11e1b2SBarry Smith #define __FUNCT__ "SeqAIJCRL_create_aijcrl" 395a11e1b2SBarry Smith PetscErrorCode SeqAIJCRL_create_aijcrl(Mat A) 4081824310SBarry Smith { 4181824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 425a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 43d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 4481824310SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 4581824310SBarry Smith PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 46dd6ea824SBarry Smith MatScalar *aa = a->a; 47dd6ea824SBarry Smith PetscScalar *acols; 4881824310SBarry Smith PetscErrorCode ierr; 4981824310SBarry Smith 5081824310SBarry Smith PetscFunctionBegin; 515a11e1b2SBarry Smith aijcrl->nz = a->nz; 525a11e1b2SBarry Smith aijcrl->m = A->rmap->n; 535a11e1b2SBarry Smith aijcrl->rmax = rmax; 545a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 555a11e1b2SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr); 565a11e1b2SBarry Smith acols = aijcrl->acols; 575a11e1b2SBarry Smith icols = aijcrl->icols; 5881824310SBarry Smith for (i=0; i<m; i++) { 5981824310SBarry Smith for (j=0; j<ilen[i]; j++) { 6081824310SBarry Smith acols[j*m+i] = *aa++; 6181824310SBarry Smith icols[j*m+i] = *aj++; 6281824310SBarry Smith } 6381824310SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 6481824310SBarry Smith acols[j*m+i] = 0.0; 6581824310SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 6681824310SBarry Smith } 6781824310SBarry Smith } 68ae15b995SBarry Smith ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax); 6981824310SBarry Smith PetscFunctionReturn(0); 7081824310SBarry Smith } 7181824310SBarry Smith 724723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 734723c594SBarry Smith 7481824310SBarry Smith #undef __FUNCT__ 755a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqAIJCRL" 765a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode) 7781824310SBarry Smith { 7881824310SBarry Smith PetscErrorCode ierr; 7981824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8081824310SBarry Smith 81c02b24c5SBarry Smith PetscFunctionBegin; 8281824310SBarry Smith a->inode.use = PETSC_FALSE; 834723c594SBarry Smith ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 844723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 8581824310SBarry Smith 8681824310SBarry Smith /* Now calculate the permutation and grouping information. */ 875a11e1b2SBarry Smith ierr = SeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr); 8881824310SBarry Smith PetscFunctionReturn(0); 8981824310SBarry Smith } 9081824310SBarry Smith 91*c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h> 925aeacdfdSBarry Smith 9381824310SBarry Smith #undef __FUNCT__ 945a11e1b2SBarry Smith #define __FUNCT__ "MatMult_AIJCRL" 954723c594SBarry Smith /* 965a11e1b2SBarry Smith Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL 974723c594SBarry Smith - the scatter is used only in the parallel version 984723c594SBarry Smith 994723c594SBarry Smith */ 1005a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy) 10181824310SBarry Smith { 1025a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 1035a11e1b2SBarry Smith PetscInt m = aijcrl->m; /* Number of rows in the matrix. */ 1045a11e1b2SBarry Smith PetscInt rmax = aijcrl->rmax,*icols = aijcrl->icols; 1055a11e1b2SBarry Smith PetscScalar *acols = aijcrl->acols; 10681824310SBarry Smith PetscErrorCode ierr; 10781824310SBarry Smith PetscScalar *x,*y; 10881824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 10981824310SBarry Smith PetscInt i,j,ii; 11081824310SBarry Smith #endif 11181824310SBarry Smith 11281824310SBarry Smith 11381824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 11481824310SBarry Smith #pragma disjoint(*x,*y,*aa) 11581824310SBarry Smith #endif 11681824310SBarry Smith 11781824310SBarry Smith PetscFunctionBegin; 1185a11e1b2SBarry Smith if (aijcrl->xscat) { 1195a11e1b2SBarry Smith ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr); 120c02b24c5SBarry Smith /* get remote values needed for local part of multiply */ 1215a11e1b2SBarry Smith ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225a11e1b2SBarry Smith ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235a11e1b2SBarry Smith xx = aijcrl->xwork; 124c02b24c5SBarry Smith }; 125c02b24c5SBarry Smith 12681824310SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 12781824310SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 12881824310SBarry Smith 12981824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 13081824310SBarry Smith fortranmultcrl_(&m,&rmax,x,y,icols,acols); 13181824310SBarry Smith #else 13281824310SBarry Smith 1334be65460SBarry Smith /* first column */ 1344be65460SBarry Smith for (j=0; j<m; j++) { 1354be65460SBarry Smith y[j] = acols[j]*x[icols[j]]; 1364be65460SBarry Smith } 1374be65460SBarry Smith 1384be65460SBarry Smith /* other columns */ 139b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 14081824310SBarry Smith #pragma _CRI preferstream 14181824310SBarry Smith #endif 1424be65460SBarry Smith for (i=1; i<rmax; i++) { 14381824310SBarry Smith ii = i*m; 144b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 14581824310SBarry Smith #pragma _CRI prefervector 14681824310SBarry Smith #endif 14781824310SBarry Smith for (j=0; j<m; j++) { 14881824310SBarry Smith y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 14981824310SBarry Smith } 15081824310SBarry Smith } 151b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 15281824310SBarry Smith #pragma _CRI ivdep 15381824310SBarry Smith #endif 15481824310SBarry Smith 15581824310SBarry Smith #endif 1565a11e1b2SBarry Smith ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr); 15781824310SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15881824310SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 15981824310SBarry Smith PetscFunctionReturn(0); 16081824310SBarry Smith } 16181824310SBarry Smith 16281824310SBarry Smith 1635a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a 1645a11e1b2SBarry Smith * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL() 16581824310SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 1665a11e1b2SBarry Smith * into a SeqAIJCRL one. */ 16781824310SBarry Smith EXTERN_C_BEGIN 16881824310SBarry Smith #undef __FUNCT__ 1695a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJCRL" 1707087cfbeSBarry Smith PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 17181824310SBarry Smith { 17281824310SBarry Smith PetscErrorCode ierr; 17381824310SBarry Smith Mat B = *newmat; 1745a11e1b2SBarry Smith Mat_AIJCRL *aijcrl; 17581824310SBarry Smith 17681824310SBarry Smith PetscFunctionBegin; 17781824310SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 17881824310SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 17981824310SBarry Smith } 18081824310SBarry Smith 1815a11e1b2SBarry Smith ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr); 1825a11e1b2SBarry Smith B->spptr = (void *) aijcrl; 18381824310SBarry Smith 18417667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1855a11e1b2SBarry Smith B->ops->duplicate = MatDuplicate_AIJCRL; 1865a11e1b2SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL; 1875a11e1b2SBarry Smith B->ops->destroy = MatDestroy_SeqAIJCRL; 1885a11e1b2SBarry Smith B->ops->mult = MatMult_AIJCRL; 18981824310SBarry Smith 19081824310SBarry Smith /* If A has already been assembled, compute the permutation. */ 1914eeb8337SBarry Smith if (A->assembled) { 1925a11e1b2SBarry Smith ierr = SeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr); 19381824310SBarry Smith } 1945a11e1b2SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr); 19581824310SBarry Smith *newmat = B; 19681824310SBarry Smith PetscFunctionReturn(0); 19781824310SBarry Smith } 19881824310SBarry Smith EXTERN_C_END 19981824310SBarry Smith 20081824310SBarry Smith 20181824310SBarry Smith #undef __FUNCT__ 2025a11e1b2SBarry Smith #define __FUNCT__ "MatCreateSeqAIJCRL" 20381824310SBarry Smith /*@C 2045a11e1b2SBarry Smith MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL. 20581824310SBarry Smith This type inherits from AIJ, but stores some additional 20681824310SBarry Smith information that is used to allow better vectorization of 20781824310SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 20881824310SBarry Smith matrix can be copied to a format in which pieces of the matrix are 20981824310SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 21081824310SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 21181824310SBarry Smith important to preallocate matrix storage in order to get good assembly 21281824310SBarry Smith performance. 21381824310SBarry Smith 21481824310SBarry Smith Collective on MPI_Comm 21581824310SBarry Smith 21681824310SBarry Smith Input Parameters: 21781824310SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 21881824310SBarry Smith . m - number of rows 21981824310SBarry Smith . n - number of columns 22081824310SBarry Smith . nz - number of nonzeros per row (same for all rows) 22181824310SBarry Smith - nnz - array containing the number of nonzeros in the various rows 22281824310SBarry Smith (possibly different for each row) or PETSC_NULL 22381824310SBarry Smith 22481824310SBarry Smith Output Parameter: 22581824310SBarry Smith . A - the matrix 22681824310SBarry Smith 22781824310SBarry Smith Notes: 22881824310SBarry Smith If nnz is given then nz is ignored 22981824310SBarry Smith 23081824310SBarry Smith Level: intermediate 23181824310SBarry Smith 23281824310SBarry Smith .keywords: matrix, cray, sparse, parallel 23381824310SBarry Smith 2345a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues() 23581824310SBarry Smith @*/ 2367087cfbeSBarry Smith PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 23781824310SBarry Smith { 23881824310SBarry Smith PetscErrorCode ierr; 23981824310SBarry Smith 24081824310SBarry Smith PetscFunctionBegin; 24181824310SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 24281824310SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2435a11e1b2SBarry Smith ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr); 244d28bb7d2SJed Brown ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 24581824310SBarry Smith PetscFunctionReturn(0); 24681824310SBarry Smith } 24781824310SBarry Smith 24881824310SBarry Smith 24981824310SBarry Smith EXTERN_C_BEGIN 25081824310SBarry Smith #undef __FUNCT__ 2515a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_SeqAIJCRL" 2527087cfbeSBarry Smith PetscErrorCode MatCreate_SeqAIJCRL(Mat A) 25381824310SBarry Smith { 25481824310SBarry Smith PetscErrorCode ierr; 25581824310SBarry Smith 25681824310SBarry Smith PetscFunctionBegin; 25781824310SBarry Smith ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 2585a11e1b2SBarry Smith ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 25981824310SBarry Smith PetscFunctionReturn(0); 26081824310SBarry Smith } 26181824310SBarry Smith EXTERN_C_END 26281824310SBarry Smith 263