15fa1c062SBarry Smith #define PETSCMAT_DLL 25fa1c062SBarry Smith 35fa1c062SBarry Smith /* 41472f72bSBarry Smith Defines a matrix-vector product for the MATMPIAIJCRL matrix class. 51472f72bSBarry Smith This class is derived from the MATMPIAIJ class and retains the 65fa1c062SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 75fa1c062SBarry Smith it with a column oriented storage that is more efficient for 85fa1c062SBarry Smith matrix vector products on Vector machines. 95fa1c062SBarry Smith 105fa1c062SBarry Smith CRL stands for constant row length (that is the same number of columns 115fa1c062SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 121472f72bSBarry Smith 131472f72bSBarry Smith See src/mat/impls/aij/seq/crl/crl.c for the sequential version 145fa1c062SBarry Smith */ 155fa1c062SBarry Smith 167c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h" 177c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/crl/crl.h" 185fa1c062SBarry Smith 195fa1c062SBarry Smith #undef __FUNCT__ 201472f72bSBarry Smith #define __FUNCT__ "MatDestroy_MPICRL" 211472f72bSBarry Smith PetscErrorCode MatDestroy_MPICRL(Mat A) 225fa1c062SBarry Smith { 235fa1c062SBarry Smith PetscErrorCode ierr; 241472f72bSBarry Smith Mat_CRL *crl = (Mat_CRL *) A->spptr; 255fa1c062SBarry Smith 261472f72bSBarry Smith /* We are going to convert A back into a MPIAIJ matrix, since we are 271472f72bSBarry Smith * eventually going to use MatDestroy_MPIAIJ() to destroy everything 285fa1c062SBarry Smith * that is not specific to CRL. 295fa1c062SBarry Smith * In preparation for this, reset the operations pointers in A to 301472f72bSBarry Smith * their MPIAIJ versions. */ 311472f72bSBarry Smith A->ops->assemblyend = crl->AssemblyEnd; 321472f72bSBarry Smith A->ops->destroy = crl->MatDestroy; 331472f72bSBarry Smith A->ops->duplicate = crl->MatDuplicate; 345fa1c062SBarry Smith 351472f72bSBarry Smith /* Free everything in the Mat_CRL data structure. */ 365fa1c062SBarry Smith ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr); 379222b4acSBarry Smith if (crl->fwork) { 389222b4acSBarry Smith ierr = VecDestroy(crl->fwork);CHKERRQ(ierr); 399222b4acSBarry Smith } 409222b4acSBarry Smith if (crl->xwork) { 419222b4acSBarry Smith ierr = VecDestroy(crl->xwork);CHKERRQ(ierr); 429222b4acSBarry Smith } 439222b4acSBarry Smith ierr = PetscFree(crl->array);CHKERRQ(ierr); 445fa1c062SBarry Smith ierr = PetscFree(crl);CHKERRQ(ierr); 45c7c224ccSLisandro Dalcin A->spptr = 0; 465fa1c062SBarry Smith 471472f72bSBarry Smith /* Change the type of A back to MPIAIJ and use MatDestroy_MPIAIJ() 485fa1c062SBarry Smith * to destroy everything that remains. */ 491472f72bSBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATMPIAIJ);CHKERRQ(ierr); 505fa1c062SBarry Smith /* Note that I don't call MatSetType(). I believe this is because that 515fa1c062SBarry Smith * is only to be called when *building* a matrix. */ 525fa1c062SBarry Smith ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 535fa1c062SBarry Smith PetscFunctionReturn(0); 545fa1c062SBarry Smith } 555fa1c062SBarry Smith 565fa1c062SBarry Smith #undef __FUNCT__ 571472f72bSBarry Smith #define __FUNCT__ "MPICRL_create_crl" 581472f72bSBarry Smith PetscErrorCode MPICRL_create_crl(Mat A) 595fa1c062SBarry Smith { 601472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)(A)->data; 6111285404SBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data); 621472f72bSBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 63d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 64d0f46423SBarry Smith PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ 651472f72bSBarry Smith PetscInt *aj = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ 661472f72bSBarry Smith PetscInt i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen; 676873f782SBarry Smith PetscScalar *aa = Aij->a,*ba = Bij->a,*acols,*array; 685fa1c062SBarry Smith PetscErrorCode ierr; 695fa1c062SBarry Smith 705fa1c062SBarry Smith PetscFunctionBegin; 711472f72bSBarry Smith /* determine the row with the most columns */ 721472f72bSBarry Smith for (i=0; i<m; i++) { 731472f72bSBarry Smith rmax = PetscMax(rmax,ailen[i]+bilen[i]); 741472f72bSBarry Smith } 751472f72bSBarry Smith crl->nz = Aij->nz+Bij->nz; 76d0f46423SBarry Smith crl->m = A->rmap->n; 771472f72bSBarry Smith crl->rmax = rmax; 785fa1c062SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr); 795fa1c062SBarry Smith acols = crl->acols; 805fa1c062SBarry Smith icols = crl->icols; 815fa1c062SBarry Smith for (i=0; i<m; i++) { 821472f72bSBarry Smith for (j=0; j<ailen[i]; j++) { 835fa1c062SBarry Smith acols[j*m+i] = *aa++; 845fa1c062SBarry Smith icols[j*m+i] = *aj++; 855fa1c062SBarry Smith } 861472f72bSBarry Smith for (;j<ailen[i]+bilen[i]; j++) { 871472f72bSBarry Smith acols[j*m+i] = *ba++; 8811285404SBarry Smith icols[j*m+i] = nd + *bj++; 891472f72bSBarry Smith } 905fa1c062SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 915fa1c062SBarry Smith acols[j*m+i] = 0.0; 925fa1c062SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 935fa1c062SBarry Smith } 945fa1c062SBarry Smith } 95ae15b995SBarry Smith ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(crl->nz))/((double)(rmax*m))); 961472f72bSBarry Smith 97d0f46423SBarry Smith ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr); 98483e0693SBarry Smith /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ 997adad957SLisandro Dalcin ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,nd,PETSC_DECIDE,array,&crl->xwork);CHKERRQ(ierr); 100d0f46423SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,a->B->cmap->n,array+nd,&crl->fwork);CHKERRQ(ierr); 1019222b4acSBarry Smith crl->array = array; 10211285404SBarry Smith crl->xscat = a->Mvctx; 1035fa1c062SBarry Smith PetscFunctionReturn(0); 1045fa1c062SBarry Smith } 1055fa1c062SBarry Smith 1065fa1c062SBarry Smith #undef __FUNCT__ 1071472f72bSBarry Smith #define __FUNCT__ "MatAssemblyEnd_MPICRL" 1081472f72bSBarry Smith PetscErrorCode MatAssemblyEnd_MPICRL(Mat A, MatAssemblyType mode) 1095fa1c062SBarry Smith { 1105fa1c062SBarry Smith PetscErrorCode ierr; 1111472f72bSBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 1121472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1131472f72bSBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->A->data); 1145fa1c062SBarry Smith 1155fa1c062SBarry Smith PetscFunctionBegin; 1161472f72bSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1175fa1c062SBarry Smith 1181472f72bSBarry Smith /* Since a MATMPICRL matrix is really just a MATMPIAIJ with some 1191472f72bSBarry Smith * extra information, call the AssemblyEnd routine for a MATMPIAIJ. 1201472f72bSBarry Smith * I'm not sure if this is the best way to do this, but it avoids 1211472f72bSBarry Smith * a lot of code duplication. 1221472f72bSBarry Smith * I also note that currently MATMPICRL doesn't know anything about 1231472f72bSBarry Smith * the Mat_CompressedRow data structure that MPIAIJ now uses when there 1241472f72bSBarry Smith * are many zero rows. If the MPIAIJ assembly end routine decides to use 1251472f72bSBarry Smith * this, this may break things. (Don't know... haven't looked at it.) */ 1261472f72bSBarry Smith Aij->inode.use = PETSC_FALSE; 1271472f72bSBarry Smith Bij->inode.use = PETSC_FALSE; 1281472f72bSBarry Smith (*crl->AssemblyEnd)(A, mode); 1295fa1c062SBarry Smith 1301472f72bSBarry Smith /* Now calculate the permutation and grouping information. */ 1311472f72bSBarry Smith ierr = MPICRL_create_crl(A);CHKERRQ(ierr); 1325fa1c062SBarry Smith PetscFunctionReturn(0); 1335fa1c062SBarry Smith } 1345fa1c062SBarry Smith 1351472f72bSBarry Smith extern PetscErrorCode MatMult_CRL(Mat,Vec,Vec); 1361472f72bSBarry Smith extern PetscErrorCode MatDuplicate_CRL(Mat,MatDuplicateOption,Mat*); 1375fa1c062SBarry Smith 1381472f72bSBarry Smith /* MatConvert_MPIAIJ_MPICRL converts a MPIAIJ matrix into a 1391472f72bSBarry Smith * MPICRL matrix. This routine is called by the MatCreate_MPICRL() 1401472f72bSBarry Smith * routine, but can also be used to convert an assembled MPIAIJ matrix 1411472f72bSBarry Smith * into a MPICRL one. */ 1425fa1c062SBarry Smith EXTERN_C_BEGIN 1435fa1c062SBarry Smith #undef __FUNCT__ 1441472f72bSBarry Smith #define __FUNCT__ "MatConvert_MPIAIJ_MPICRL" 1458cf70c4bSSatish Balay PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 1465fa1c062SBarry Smith { 1475fa1c062SBarry Smith PetscErrorCode ierr; 1485fa1c062SBarry Smith Mat B = *newmat; 1491472f72bSBarry Smith Mat_CRL *crl; 1505fa1c062SBarry Smith 1515fa1c062SBarry Smith PetscFunctionBegin; 1525fa1c062SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1535fa1c062SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1545fa1c062SBarry Smith } 1555fa1c062SBarry Smith 15638f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_CRL,&crl);CHKERRQ(ierr); 1575fa1c062SBarry Smith B->spptr = (void *) crl; 1585fa1c062SBarry Smith 1591472f72bSBarry Smith crl->AssemblyEnd = A->ops->assemblyend; 1601472f72bSBarry Smith crl->MatDestroy = A->ops->destroy; 1611472f72bSBarry Smith crl->MatDuplicate = A->ops->duplicate; 1625fa1c062SBarry Smith 16317667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1641472f72bSBarry Smith B->ops->duplicate = MatDuplicate_CRL; 1651472f72bSBarry Smith B->ops->assemblyend = MatAssemblyEnd_MPICRL; 1661472f72bSBarry Smith B->ops->destroy = MatDestroy_MPICRL; 1671472f72bSBarry Smith B->ops->mult = MatMult_CRL; 1685fa1c062SBarry Smith 1695fa1c062SBarry Smith /* If A has already been assembled, compute the permutation. */ 170*4eeb8337SBarry Smith if (A->assembled) { 1711472f72bSBarry Smith ierr = MPICRL_create_crl(B);CHKERRQ(ierr); 1725fa1c062SBarry Smith } 1731472f72bSBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPICRL);CHKERRQ(ierr); 1745fa1c062SBarry Smith *newmat = B; 1755fa1c062SBarry Smith PetscFunctionReturn(0); 1765fa1c062SBarry Smith } 1775fa1c062SBarry Smith EXTERN_C_END 1785fa1c062SBarry Smith 1795fa1c062SBarry Smith 1805fa1c062SBarry Smith #undef __FUNCT__ 1811472f72bSBarry Smith #define __FUNCT__ "MatCreateMPICRL" 1825fa1c062SBarry Smith /*@C 1831472f72bSBarry Smith MatCreateMPICRL - Creates a sparse matrix of type MPICRL. 1845fa1c062SBarry Smith This type inherits from AIJ, but stores some additional 1855fa1c062SBarry Smith information that is used to allow better vectorization of 1865fa1c062SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 1875fa1c062SBarry Smith matrix can be copied to a format in which pieces of the matrix are 1885fa1c062SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 1895fa1c062SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 1905fa1c062SBarry Smith important to preallocate matrix storage in order to get good assembly 1915fa1c062SBarry Smith performance. 1925fa1c062SBarry Smith 1935fa1c062SBarry Smith Collective on MPI_Comm 1945fa1c062SBarry Smith 1955fa1c062SBarry Smith Input Parameters: 1965fa1c062SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1975fa1c062SBarry Smith . m - number of rows 1985fa1c062SBarry Smith . n - number of columns 1995fa1c062SBarry Smith . nz - number of nonzeros per row (same for all rows) 2005fa1c062SBarry Smith - nnz - array containing the number of nonzeros in the various rows 2015fa1c062SBarry Smith (possibly different for each row) or PETSC_NULL 2025fa1c062SBarry Smith 2035fa1c062SBarry Smith Output Parameter: 2045fa1c062SBarry Smith . A - the matrix 2055fa1c062SBarry Smith 2065fa1c062SBarry Smith Notes: 2075fa1c062SBarry Smith If nnz is given then nz is ignored 2085fa1c062SBarry Smith 2095fa1c062SBarry Smith Level: intermediate 2105fa1c062SBarry Smith 2115fa1c062SBarry Smith .keywords: matrix, cray, sparse, parallel 2125fa1c062SBarry Smith 2135fa1c062SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues() 2145fa1c062SBarry Smith @*/ 2151472f72bSBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPICRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],PetscInt onz,const PetscInt onnz[],Mat *A) 2165fa1c062SBarry Smith { 2175fa1c062SBarry Smith PetscErrorCode ierr; 2185fa1c062SBarry Smith 2195fa1c062SBarry Smith PetscFunctionBegin; 2205fa1c062SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 2215fa1c062SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2221472f72bSBarry Smith ierr = MatSetType(*A,MATMPICRL);CHKERRQ(ierr); 2231472f72bSBarry Smith ierr = MatMPIAIJSetPreallocation_MPIAIJ(*A,nz,(PetscInt*)nnz,onz,(PetscInt*)onnz);CHKERRQ(ierr); 2245fa1c062SBarry Smith PetscFunctionReturn(0); 2255fa1c062SBarry Smith } 2265fa1c062SBarry Smith 2275fa1c062SBarry Smith 2285fa1c062SBarry Smith EXTERN_C_BEGIN 2295fa1c062SBarry Smith #undef __FUNCT__ 2301472f72bSBarry Smith #define __FUNCT__ "MatCreate_MPICRL" 2311472f72bSBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPICRL(Mat A) 2325fa1c062SBarry Smith { 2335fa1c062SBarry Smith PetscErrorCode ierr; 2345fa1c062SBarry Smith 2355fa1c062SBarry Smith PetscFunctionBegin; 2361472f72bSBarry Smith ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 2371472f72bSBarry Smith ierr = MatConvert_MPIAIJ_MPICRL(A,MATMPICRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 2385fa1c062SBarry Smith PetscFunctionReturn(0); 2395fa1c062SBarry Smith } 2405fa1c062SBarry Smith EXTERN_C_END 2415fa1c062SBarry Smith 242