15fa1c062SBarry Smith #define PETSCMAT_DLL 25fa1c062SBarry Smith 35fa1c062SBarry Smith /* 41472f72bSBarry Smith Defines a matrix-vector product for the MATMPIAIJCRL matrix class. 51472f72bSBarry Smith This class is derived from the MATMPIAIJ class and retains the 65fa1c062SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 75fa1c062SBarry Smith it with a column oriented storage that is more efficient for 85fa1c062SBarry Smith matrix vector products on Vector machines. 95fa1c062SBarry Smith 105fa1c062SBarry Smith CRL stands for constant row length (that is the same number of columns 115fa1c062SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 121472f72bSBarry Smith 131472f72bSBarry Smith See src/mat/impls/aij/seq/crl/crl.c for the sequential version 145fa1c062SBarry Smith */ 155fa1c062SBarry Smith 167c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h" 177c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/crl/crl.h" 185fa1c062SBarry Smith 19*4723c594SBarry Smith extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 20*4723c594SBarry Smith 215fa1c062SBarry Smith #undef __FUNCT__ 221472f72bSBarry Smith #define __FUNCT__ "MatDestroy_MPICRL" 231472f72bSBarry Smith PetscErrorCode MatDestroy_MPICRL(Mat A) 245fa1c062SBarry Smith { 255fa1c062SBarry Smith PetscErrorCode ierr; 261472f72bSBarry Smith Mat_CRL *crl = (Mat_CRL *) A->spptr; 275fa1c062SBarry Smith 281472f72bSBarry Smith /* Free everything in the Mat_CRL data structure. */ 295fa1c062SBarry Smith ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr); 309222b4acSBarry Smith if (crl->fwork) { 319222b4acSBarry Smith ierr = VecDestroy(crl->fwork);CHKERRQ(ierr); 329222b4acSBarry Smith } 339222b4acSBarry Smith if (crl->xwork) { 349222b4acSBarry Smith ierr = VecDestroy(crl->xwork);CHKERRQ(ierr); 359222b4acSBarry Smith } 369222b4acSBarry Smith ierr = PetscFree(crl->array);CHKERRQ(ierr); 375fa1c062SBarry Smith ierr = PetscFree(crl);CHKERRQ(ierr); 38c7c224ccSLisandro Dalcin A->spptr = 0; 395fa1c062SBarry Smith 401472f72bSBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATMPIAIJ);CHKERRQ(ierr); 41*4723c594SBarry Smith ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 425fa1c062SBarry Smith PetscFunctionReturn(0); 435fa1c062SBarry Smith } 445fa1c062SBarry Smith 455fa1c062SBarry Smith #undef __FUNCT__ 461472f72bSBarry Smith #define __FUNCT__ "MPICRL_create_crl" 471472f72bSBarry Smith PetscErrorCode MPICRL_create_crl(Mat A) 485fa1c062SBarry Smith { 491472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)(A)->data; 5011285404SBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data); 511472f72bSBarry Smith Mat_CRL *crl = (Mat_CRL*) A->spptr; 52d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 53d0f46423SBarry Smith PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ 541472f72bSBarry Smith PetscInt *aj = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ 551472f72bSBarry Smith PetscInt i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen; 566873f782SBarry Smith PetscScalar *aa = Aij->a,*ba = Bij->a,*acols,*array; 575fa1c062SBarry Smith PetscErrorCode ierr; 585fa1c062SBarry Smith 595fa1c062SBarry Smith PetscFunctionBegin; 601472f72bSBarry Smith /* determine the row with the most columns */ 611472f72bSBarry Smith for (i=0; i<m; i++) { 621472f72bSBarry Smith rmax = PetscMax(rmax,ailen[i]+bilen[i]); 631472f72bSBarry Smith } 641472f72bSBarry Smith crl->nz = Aij->nz+Bij->nz; 65d0f46423SBarry Smith crl->m = A->rmap->n; 661472f72bSBarry Smith crl->rmax = rmax; 675fa1c062SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr); 685fa1c062SBarry Smith acols = crl->acols; 695fa1c062SBarry Smith icols = crl->icols; 705fa1c062SBarry Smith for (i=0; i<m; i++) { 711472f72bSBarry Smith for (j=0; j<ailen[i]; j++) { 725fa1c062SBarry Smith acols[j*m+i] = *aa++; 735fa1c062SBarry Smith icols[j*m+i] = *aj++; 745fa1c062SBarry Smith } 751472f72bSBarry Smith for (;j<ailen[i]+bilen[i]; j++) { 761472f72bSBarry Smith acols[j*m+i] = *ba++; 7711285404SBarry Smith icols[j*m+i] = nd + *bj++; 781472f72bSBarry Smith } 795fa1c062SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 805fa1c062SBarry Smith acols[j*m+i] = 0.0; 815fa1c062SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 825fa1c062SBarry Smith } 835fa1c062SBarry Smith } 84ae15b995SBarry Smith ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(crl->nz))/((double)(rmax*m))); 851472f72bSBarry Smith 86d0f46423SBarry Smith ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr); 87483e0693SBarry Smith /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ 887adad957SLisandro Dalcin ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,nd,PETSC_DECIDE,array,&crl->xwork);CHKERRQ(ierr); 89d0f46423SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,a->B->cmap->n,array+nd,&crl->fwork);CHKERRQ(ierr); 909222b4acSBarry Smith crl->array = array; 9111285404SBarry Smith crl->xscat = a->Mvctx; 925fa1c062SBarry Smith PetscFunctionReturn(0); 935fa1c062SBarry Smith } 945fa1c062SBarry Smith 95*4723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat,MatAssemblyType); 96*4723c594SBarry Smith 975fa1c062SBarry Smith #undef __FUNCT__ 981472f72bSBarry Smith #define __FUNCT__ "MatAssemblyEnd_MPICRL" 991472f72bSBarry Smith PetscErrorCode MatAssemblyEnd_MPICRL(Mat A, MatAssemblyType mode) 1005fa1c062SBarry Smith { 1015fa1c062SBarry Smith PetscErrorCode ierr; 1021472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1031472f72bSBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->A->data); 1045fa1c062SBarry Smith 1055fa1c062SBarry Smith PetscFunctionBegin; 1061472f72bSBarry Smith Aij->inode.use = PETSC_FALSE; 1071472f72bSBarry Smith Bij->inode.use = PETSC_FALSE; 108*4723c594SBarry Smith ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr); 109*4723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1105fa1c062SBarry Smith 1111472f72bSBarry Smith /* Now calculate the permutation and grouping information. */ 1121472f72bSBarry Smith ierr = MPICRL_create_crl(A);CHKERRQ(ierr); 1135fa1c062SBarry Smith PetscFunctionReturn(0); 1145fa1c062SBarry Smith } 1155fa1c062SBarry Smith 1161472f72bSBarry Smith extern PetscErrorCode MatMult_CRL(Mat,Vec,Vec); 1171472f72bSBarry Smith extern PetscErrorCode MatDuplicate_CRL(Mat,MatDuplicateOption,Mat*); 1185fa1c062SBarry Smith 1191472f72bSBarry Smith /* MatConvert_MPIAIJ_MPICRL converts a MPIAIJ matrix into a 1201472f72bSBarry Smith * MPICRL matrix. This routine is called by the MatCreate_MPICRL() 1211472f72bSBarry Smith * routine, but can also be used to convert an assembled MPIAIJ matrix 1221472f72bSBarry Smith * into a MPICRL one. */ 1235fa1c062SBarry Smith EXTERN_C_BEGIN 1245fa1c062SBarry Smith #undef __FUNCT__ 1251472f72bSBarry Smith #define __FUNCT__ "MatConvert_MPIAIJ_MPICRL" 1268cf70c4bSSatish Balay PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 1275fa1c062SBarry Smith { 1285fa1c062SBarry Smith PetscErrorCode ierr; 1295fa1c062SBarry Smith Mat B = *newmat; 1301472f72bSBarry Smith Mat_CRL *crl; 1315fa1c062SBarry Smith 1325fa1c062SBarry Smith PetscFunctionBegin; 1335fa1c062SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1345fa1c062SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1355fa1c062SBarry Smith } 1365fa1c062SBarry Smith 13738f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_CRL,&crl);CHKERRQ(ierr); 1385fa1c062SBarry Smith B->spptr = (void *) crl; 1395fa1c062SBarry Smith 14017667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1411472f72bSBarry Smith B->ops->duplicate = MatDuplicate_CRL; 1421472f72bSBarry Smith B->ops->assemblyend = MatAssemblyEnd_MPICRL; 1431472f72bSBarry Smith B->ops->destroy = MatDestroy_MPICRL; 1441472f72bSBarry Smith B->ops->mult = MatMult_CRL; 1455fa1c062SBarry Smith 1465fa1c062SBarry Smith /* If A has already been assembled, compute the permutation. */ 1474eeb8337SBarry Smith if (A->assembled) { 1481472f72bSBarry Smith ierr = MPICRL_create_crl(B);CHKERRQ(ierr); 1495fa1c062SBarry Smith } 1501472f72bSBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPICRL);CHKERRQ(ierr); 1515fa1c062SBarry Smith *newmat = B; 1525fa1c062SBarry Smith PetscFunctionReturn(0); 1535fa1c062SBarry Smith } 1545fa1c062SBarry Smith EXTERN_C_END 1555fa1c062SBarry Smith 1565fa1c062SBarry Smith 1575fa1c062SBarry Smith #undef __FUNCT__ 1581472f72bSBarry Smith #define __FUNCT__ "MatCreateMPICRL" 1595fa1c062SBarry Smith /*@C 1601472f72bSBarry Smith MatCreateMPICRL - Creates a sparse matrix of type MPICRL. 1615fa1c062SBarry Smith This type inherits from AIJ, but stores some additional 1625fa1c062SBarry Smith information that is used to allow better vectorization of 1635fa1c062SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 1645fa1c062SBarry Smith matrix can be copied to a format in which pieces of the matrix are 1655fa1c062SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 1665fa1c062SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 1675fa1c062SBarry Smith important to preallocate matrix storage in order to get good assembly 1685fa1c062SBarry Smith performance. 1695fa1c062SBarry Smith 1705fa1c062SBarry Smith Collective on MPI_Comm 1715fa1c062SBarry Smith 1725fa1c062SBarry Smith Input Parameters: 1735fa1c062SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1745fa1c062SBarry Smith . m - number of rows 1755fa1c062SBarry Smith . n - number of columns 1765fa1c062SBarry Smith . nz - number of nonzeros per row (same for all rows) 1775fa1c062SBarry Smith - nnz - array containing the number of nonzeros in the various rows 1785fa1c062SBarry Smith (possibly different for each row) or PETSC_NULL 1795fa1c062SBarry Smith 1805fa1c062SBarry Smith Output Parameter: 1815fa1c062SBarry Smith . A - the matrix 1825fa1c062SBarry Smith 1835fa1c062SBarry Smith Notes: 1845fa1c062SBarry Smith If nnz is given then nz is ignored 1855fa1c062SBarry Smith 1865fa1c062SBarry Smith Level: intermediate 1875fa1c062SBarry Smith 1885fa1c062SBarry Smith .keywords: matrix, cray, sparse, parallel 1895fa1c062SBarry Smith 1905fa1c062SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues() 1915fa1c062SBarry Smith @*/ 1921472f72bSBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPICRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],PetscInt onz,const PetscInt onnz[],Mat *A) 1935fa1c062SBarry Smith { 1945fa1c062SBarry Smith PetscErrorCode ierr; 1955fa1c062SBarry Smith 1965fa1c062SBarry Smith PetscFunctionBegin; 1975fa1c062SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 1985fa1c062SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1991472f72bSBarry Smith ierr = MatSetType(*A,MATMPICRL);CHKERRQ(ierr); 2001472f72bSBarry Smith ierr = MatMPIAIJSetPreallocation_MPIAIJ(*A,nz,(PetscInt*)nnz,onz,(PetscInt*)onnz);CHKERRQ(ierr); 2015fa1c062SBarry Smith PetscFunctionReturn(0); 2025fa1c062SBarry Smith } 2035fa1c062SBarry Smith 2045fa1c062SBarry Smith 2055fa1c062SBarry Smith EXTERN_C_BEGIN 2065fa1c062SBarry Smith #undef __FUNCT__ 2071472f72bSBarry Smith #define __FUNCT__ "MatCreate_MPICRL" 2081472f72bSBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPICRL(Mat A) 2095fa1c062SBarry Smith { 2105fa1c062SBarry Smith PetscErrorCode ierr; 2115fa1c062SBarry Smith 2125fa1c062SBarry Smith PetscFunctionBegin; 2131472f72bSBarry Smith ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 2141472f72bSBarry Smith ierr = MatConvert_MPIAIJ_MPICRL(A,MATMPICRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 2155fa1c062SBarry Smith PetscFunctionReturn(0); 2165fa1c062SBarry Smith } 2175fa1c062SBarry Smith EXTERN_C_END 2185fa1c062SBarry Smith 219