181824310SBarry Smith 281824310SBarry Smith /* 381824310SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 481824310SBarry Smith This class is derived from the MATSEQAIJ class and retains the 581824310SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 681824310SBarry Smith it with a column oriented storage that is more efficient for 781824310SBarry Smith matrix vector products on Vector machines. 881824310SBarry Smith 981824310SBarry Smith CRL stands for constant row length (that is the same number of columns 1081824310SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 1181824310SBarry Smith */ 12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h> 1381824310SBarry Smith 145a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A) 1581824310SBarry Smith { 1681824310SBarry Smith PetscErrorCode ierr; 175a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 1881824310SBarry Smith 19*362febeeSStefano Zampini PetscFunctionBegin; 205a11e1b2SBarry Smith /* Free everything in the Mat_AIJCRL data structure. */ 21bf0cc555SLisandro Dalcin if (aijcrl) { 225a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 23bf0cc555SLisandro Dalcin } 24c31cb41cSBarry Smith ierr = PetscFree(A->spptr);CHKERRQ(ierr); 2581824310SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 264723c594SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 2781824310SBarry Smith PetscFunctionReturn(0); 2881824310SBarry Smith } 2981824310SBarry Smith 305a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M) 3181824310SBarry Smith { 325a11e1b2SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet"); 3381824310SBarry Smith } 3481824310SBarry Smith 3596b95a6bSBarry Smith PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A) 3681824310SBarry Smith { 3781824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)(A)->data; 385a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 39d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 4081824310SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 4181824310SBarry Smith PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 42dd6ea824SBarry Smith MatScalar *aa = a->a; 43dd6ea824SBarry Smith PetscScalar *acols; 4481824310SBarry Smith PetscErrorCode ierr; 4581824310SBarry Smith 4681824310SBarry Smith PetscFunctionBegin; 475a11e1b2SBarry Smith aijcrl->nz = a->nz; 485a11e1b2SBarry Smith aijcrl->m = A->rmap->n; 495a11e1b2SBarry Smith aijcrl->rmax = rmax; 502205254eSKarl Rupp 515a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 52dcca6d9dSJed Brown ierr = PetscMalloc2(rmax*m,&aijcrl->acols,rmax*m,&aijcrl->icols);CHKERRQ(ierr); 535a11e1b2SBarry Smith acols = aijcrl->acols; 545a11e1b2SBarry Smith icols = aijcrl->icols; 5581824310SBarry Smith for (i=0; i<m; i++) { 5681824310SBarry Smith for (j=0; j<ilen[i]; j++) { 5781824310SBarry Smith acols[j*m+i] = *aa++; 5881824310SBarry Smith icols[j*m+i] = *aj++; 5981824310SBarry Smith } 6081824310SBarry Smith for (; j<rmax; j++) { /* empty column entries */ 6181824310SBarry Smith acols[j*m+i] = 0.0; 6281824310SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 6381824310SBarry Smith } 6481824310SBarry Smith } 65a2ea699eSBarry Smith ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);CHKERRQ(ierr); 6681824310SBarry Smith PetscFunctionReturn(0); 6781824310SBarry Smith } 6881824310SBarry Smith 695a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode) 7081824310SBarry Smith { 7181824310SBarry Smith PetscErrorCode ierr; 7281824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7381824310SBarry Smith 74c02b24c5SBarry Smith PetscFunctionBegin; 7581824310SBarry Smith a->inode.use = PETSC_FALSE; 762205254eSKarl Rupp 774723c594SBarry Smith ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 784723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 7981824310SBarry Smith 8081824310SBarry Smith /* Now calculate the permutation and grouping information. */ 8196b95a6bSBarry Smith ierr = MatSeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr); 8281824310SBarry Smith PetscFunctionReturn(0); 8381824310SBarry Smith } 8481824310SBarry Smith 85c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h> 865aeacdfdSBarry Smith 874723c594SBarry Smith /* 885a11e1b2SBarry Smith Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL 894723c594SBarry Smith - the scatter is used only in the parallel version 904723c594SBarry Smith 914723c594SBarry Smith */ 925a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy) 9381824310SBarry Smith { 945a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 955a11e1b2SBarry Smith PetscInt m = aijcrl->m; /* Number of rows in the matrix. */ 965a11e1b2SBarry Smith PetscInt rmax = aijcrl->rmax,*icols = aijcrl->icols; 975a11e1b2SBarry Smith PetscScalar *acols = aijcrl->acols; 9881824310SBarry Smith PetscErrorCode ierr; 99d9ca1df4SBarry Smith PetscScalar *y; 100d9ca1df4SBarry Smith const PetscScalar *x; 10181824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 10281824310SBarry Smith PetscInt i,j,ii; 10381824310SBarry Smith #endif 10481824310SBarry Smith 10581824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 10681824310SBarry Smith #pragma disjoint(*x,*y,*aa) 10781824310SBarry Smith #endif 10881824310SBarry Smith 10981824310SBarry Smith PetscFunctionBegin; 1105a11e1b2SBarry Smith if (aijcrl->xscat) { 1115a11e1b2SBarry Smith ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr); 112c02b24c5SBarry Smith /* get remote values needed for local part of multiply */ 1135a11e1b2SBarry Smith ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145a11e1b2SBarry Smith ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155a11e1b2SBarry Smith xx = aijcrl->xwork; 1162205254eSKarl Rupp } 117c02b24c5SBarry Smith 118d9ca1df4SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 11981824310SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 12081824310SBarry Smith 12181824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 12281824310SBarry Smith fortranmultcrl_(&m,&rmax,x,y,icols,acols); 12381824310SBarry Smith #else 12481824310SBarry Smith 1254be65460SBarry Smith /* first column */ 1262205254eSKarl Rupp for (j=0; j<m; j++) y[j] = acols[j]*x[icols[j]]; 1274be65460SBarry Smith 1284be65460SBarry Smith /* other columns */ 129b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 13081824310SBarry Smith #pragma _CRI preferstream 13181824310SBarry Smith #endif 1324be65460SBarry Smith for (i=1; i<rmax; i++) { 13381824310SBarry Smith ii = i*m; 134b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 13581824310SBarry Smith #pragma _CRI prefervector 13681824310SBarry Smith #endif 1372205254eSKarl Rupp for (j=0; j<m; j++) y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 13881824310SBarry Smith } 139b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 14081824310SBarry Smith #pragma _CRI ivdep 14181824310SBarry Smith #endif 14281824310SBarry Smith 14381824310SBarry Smith #endif 1445a11e1b2SBarry Smith ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr); 145d9ca1df4SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 14681824310SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 14781824310SBarry Smith PetscFunctionReturn(0); 14881824310SBarry Smith } 14981824310SBarry Smith 1505a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a 1515a11e1b2SBarry Smith * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL() 15281824310SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 1535a11e1b2SBarry Smith * into a SeqAIJCRL one. */ 154cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 15581824310SBarry Smith { 15681824310SBarry Smith PetscErrorCode ierr; 15781824310SBarry Smith Mat B = *newmat; 1585a11e1b2SBarry Smith Mat_AIJCRL *aijcrl; 1594099cc6bSBarry Smith PetscBool sametype; 16081824310SBarry Smith 16181824310SBarry Smith PetscFunctionBegin; 16281824310SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 16381824310SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 16481824310SBarry Smith } 1654099cc6bSBarry Smith ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1664099cc6bSBarry Smith if (sametype) PetscFunctionReturn(0); 16781824310SBarry Smith 168b00a9115SJed Brown ierr = PetscNewLog(B,&aijcrl);CHKERRQ(ierr); 1695a11e1b2SBarry Smith B->spptr = (void*) aijcrl; 17081824310SBarry Smith 17117667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1725a11e1b2SBarry Smith B->ops->duplicate = MatDuplicate_AIJCRL; 1735a11e1b2SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL; 1745a11e1b2SBarry Smith B->ops->destroy = MatDestroy_SeqAIJCRL; 1755a11e1b2SBarry Smith B->ops->mult = MatMult_AIJCRL; 17681824310SBarry Smith 17781824310SBarry Smith /* If A has already been assembled, compute the permutation. */ 1784eeb8337SBarry Smith if (A->assembled) { 17996b95a6bSBarry Smith ierr = MatSeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr); 18081824310SBarry Smith } 1815a11e1b2SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr); 18281824310SBarry Smith *newmat = B; 18381824310SBarry Smith PetscFunctionReturn(0); 18481824310SBarry Smith } 18581824310SBarry Smith 18681824310SBarry Smith /*@C 1875a11e1b2SBarry Smith MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL. 18881824310SBarry Smith This type inherits from AIJ, but stores some additional 18981824310SBarry Smith information that is used to allow better vectorization of 19081824310SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 19181824310SBarry Smith matrix can be copied to a format in which pieces of the matrix are 19281824310SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 19381824310SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 19481824310SBarry Smith important to preallocate matrix storage in order to get good assembly 19581824310SBarry Smith performance. 19681824310SBarry Smith 197d083f849SBarry Smith Collective 19881824310SBarry Smith 19981824310SBarry Smith Input Parameters: 20081824310SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 20181824310SBarry Smith . m - number of rows 20281824310SBarry Smith . n - number of columns 20381824310SBarry Smith . nz - number of nonzeros per row (same for all rows) 20481824310SBarry Smith - nnz - array containing the number of nonzeros in the various rows 2050298fd71SBarry Smith (possibly different for each row) or NULL 20681824310SBarry Smith 20781824310SBarry Smith Output Parameter: 20881824310SBarry Smith . A - the matrix 20981824310SBarry Smith 21081824310SBarry Smith Notes: 21181824310SBarry Smith If nnz is given then nz is ignored 21281824310SBarry Smith 21381824310SBarry Smith Level: intermediate 21481824310SBarry Smith 2155a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues() 21681824310SBarry Smith @*/ 2177087cfbeSBarry Smith PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 21881824310SBarry Smith { 21981824310SBarry Smith PetscErrorCode ierr; 22081824310SBarry Smith 22181824310SBarry Smith PetscFunctionBegin; 22281824310SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 22381824310SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2245a11e1b2SBarry Smith ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr); 225d28bb7d2SJed Brown ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 22681824310SBarry Smith PetscFunctionReturn(0); 22781824310SBarry Smith } 22881824310SBarry Smith 2298cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A) 23081824310SBarry Smith { 23181824310SBarry Smith PetscErrorCode ierr; 23281824310SBarry Smith 23381824310SBarry Smith PetscFunctionBegin; 23481824310SBarry Smith ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 235511c6705SHong Zhang ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 23681824310SBarry Smith PetscFunctionReturn(0); 23781824310SBarry Smith } 238b2573a8aSBarry Smith 239