15fa1c062SBarry Smith #define PETSCMAT_DLL 25fa1c062SBarry Smith 35fa1c062SBarry Smith /* 41472f72bSBarry Smith Defines a matrix-vector product for the MATMPIAIJCRL matrix class. 51472f72bSBarry Smith This class is derived from the MATMPIAIJ class and retains the 65fa1c062SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 75fa1c062SBarry Smith it with a column oriented storage that is more efficient for 85fa1c062SBarry Smith matrix vector products on Vector machines. 95fa1c062SBarry Smith 105fa1c062SBarry Smith CRL stands for constant row length (that is the same number of columns 115fa1c062SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 121472f72bSBarry Smith 131472f72bSBarry Smith See src/mat/impls/aij/seq/crl/crl.c for the sequential version 145fa1c062SBarry Smith */ 155fa1c062SBarry Smith 167c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h" 177c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/crl/crl.h" 185fa1c062SBarry Smith 194723c594SBarry Smith extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 204723c594SBarry Smith 215fa1c062SBarry Smith #undef __FUNCT__ 225a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_MPIAIJCRL" 235a11e1b2SBarry Smith PetscErrorCode MatDestroy_MPIAIJCRL(Mat A) 245fa1c062SBarry Smith { 255fa1c062SBarry Smith PetscErrorCode ierr; 265a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL *) A->spptr; 275fa1c062SBarry Smith 285a11e1b2SBarry Smith /* Free everything in the Mat_AIJCRL data structure. */ 295a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 305a11e1b2SBarry Smith if (aijcrl->fwork) { 315a11e1b2SBarry Smith ierr = VecDestroy(aijcrl->fwork);CHKERRQ(ierr); 329222b4acSBarry Smith } 335a11e1b2SBarry Smith if (aijcrl->xwork) { 345a11e1b2SBarry Smith ierr = VecDestroy(aijcrl->xwork);CHKERRQ(ierr); 359222b4acSBarry Smith } 365a11e1b2SBarry Smith ierr = PetscFree(aijcrl->array);CHKERRQ(ierr); 37*c31cb41cSBarry Smith ierr = PetscFree(A->spptr);CHKERRQ(ierr); 385fa1c062SBarry Smith 391472f72bSBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATMPIAIJ);CHKERRQ(ierr); 404723c594SBarry Smith ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 415fa1c062SBarry Smith PetscFunctionReturn(0); 425fa1c062SBarry Smith } 435fa1c062SBarry Smith 445fa1c062SBarry Smith #undef __FUNCT__ 455a11e1b2SBarry Smith #define __FUNCT__ "MPIAIJCRL_create_aijcrl" 465a11e1b2SBarry Smith PetscErrorCode MPIAIJCRL_create_aijcrl(Mat A) 475fa1c062SBarry Smith { 481472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)(A)->data; 4911285404SBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data); 505a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 51d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 52d0f46423SBarry Smith PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ 531472f72bSBarry Smith PetscInt *aj = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ 541472f72bSBarry Smith PetscInt i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen; 556873f782SBarry Smith PetscScalar *aa = Aij->a,*ba = Bij->a,*acols,*array; 565fa1c062SBarry Smith PetscErrorCode ierr; 575fa1c062SBarry Smith 585fa1c062SBarry Smith PetscFunctionBegin; 591472f72bSBarry Smith /* determine the row with the most columns */ 601472f72bSBarry Smith for (i=0; i<m; i++) { 611472f72bSBarry Smith rmax = PetscMax(rmax,ailen[i]+bilen[i]); 621472f72bSBarry Smith } 635a11e1b2SBarry Smith aijcrl->nz = Aij->nz+Bij->nz; 645a11e1b2SBarry Smith aijcrl->m = A->rmap->n; 655a11e1b2SBarry Smith aijcrl->rmax = rmax; 665a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 675a11e1b2SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr); 685a11e1b2SBarry Smith acols = aijcrl->acols; 695a11e1b2SBarry Smith icols = aijcrl->icols; 705fa1c062SBarry Smith for (i=0; i<m; i++) { 711472f72bSBarry Smith for (j=0; j<ailen[i]; j++) { 725fa1c062SBarry Smith acols[j*m+i] = *aa++; 735fa1c062SBarry Smith icols[j*m+i] = *aj++; 745fa1c062SBarry Smith } 751472f72bSBarry Smith for (;j<ailen[i]+bilen[i]; j++) { 761472f72bSBarry Smith acols[j*m+i] = *ba++; 7711285404SBarry Smith icols[j*m+i] = nd + *bj++; 781472f72bSBarry Smith } 795fa1c062SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 805fa1c062SBarry Smith acols[j*m+i] = 0.0; 815fa1c062SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 825fa1c062SBarry Smith } 835fa1c062SBarry Smith } 845a11e1b2SBarry Smith ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m))); 851472f72bSBarry Smith 865a11e1b2SBarry Smith ierr = PetscFree(aijcrl->array);CHKERRQ(ierr); 87d0f46423SBarry Smith ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr); 88483e0693SBarry Smith /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ 895a11e1b2SBarry Smith if (aijcrl->xwork) {ierr = VecDestroy(aijcrl->xwork);CHKERRQ(ierr);} 905a11e1b2SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,nd,PETSC_DECIDE,array,&aijcrl->xwork);CHKERRQ(ierr); 915a11e1b2SBarry Smith if (aijcrl->fwork) {ierr = VecDestroy(aijcrl->fwork);CHKERRQ(ierr);} 925a11e1b2SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,a->B->cmap->n,array+nd,&aijcrl->fwork);CHKERRQ(ierr); 935a11e1b2SBarry Smith aijcrl->array = array; 945a11e1b2SBarry Smith aijcrl->xscat = a->Mvctx; 955fa1c062SBarry Smith PetscFunctionReturn(0); 965fa1c062SBarry Smith } 975fa1c062SBarry Smith 984723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat,MatAssemblyType); 994723c594SBarry Smith 1005fa1c062SBarry Smith #undef __FUNCT__ 1015a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_MPIAIJCRL" 1025a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJCRL(Mat A, MatAssemblyType mode) 1035fa1c062SBarry Smith { 1045fa1c062SBarry Smith PetscErrorCode ierr; 1051472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1061472f72bSBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->A->data); 1075fa1c062SBarry Smith 1085fa1c062SBarry Smith PetscFunctionBegin; 1091472f72bSBarry Smith Aij->inode.use = PETSC_FALSE; 1101472f72bSBarry Smith Bij->inode.use = PETSC_FALSE; 1114723c594SBarry Smith ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr); 1124723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1135fa1c062SBarry Smith 1141472f72bSBarry Smith /* Now calculate the permutation and grouping information. */ 1155a11e1b2SBarry Smith ierr = MPIAIJCRL_create_aijcrl(A);CHKERRQ(ierr); 1165fa1c062SBarry Smith PetscFunctionReturn(0); 1175fa1c062SBarry Smith } 1185fa1c062SBarry Smith 1195a11e1b2SBarry Smith extern PetscErrorCode MatMult_AIJCRL(Mat,Vec,Vec); 1205a11e1b2SBarry Smith extern PetscErrorCode MatDuplicate_AIJCRL(Mat,MatDuplicateOption,Mat*); 1215fa1c062SBarry Smith 1225a11e1b2SBarry Smith /* MatConvert_MPIAIJ_MPIAIJCRL converts a MPIAIJ matrix into a 1235a11e1b2SBarry Smith * MPIAIJCRL matrix. This routine is called by the MatCreate_MPIAIJCRL() 1241472f72bSBarry Smith * routine, but can also be used to convert an assembled MPIAIJ matrix 1255a11e1b2SBarry Smith * into a MPIAIJCRL one. */ 1265fa1c062SBarry Smith EXTERN_C_BEGIN 1275fa1c062SBarry Smith #undef __FUNCT__ 1285a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_MPIAIJ_MPIAIJCRL" 1297087cfbeSBarry Smith PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat) 1305fa1c062SBarry Smith { 1315fa1c062SBarry Smith PetscErrorCode ierr; 1325fa1c062SBarry Smith Mat B = *newmat; 1335a11e1b2SBarry Smith Mat_AIJCRL *aijcrl; 1345fa1c062SBarry Smith 1355fa1c062SBarry Smith PetscFunctionBegin; 1365fa1c062SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1375fa1c062SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1385fa1c062SBarry Smith } 1395fa1c062SBarry Smith 1405a11e1b2SBarry Smith ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr); 1415a11e1b2SBarry Smith B->spptr = (void *) aijcrl; 1425fa1c062SBarry Smith 14317667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1445a11e1b2SBarry Smith B->ops->duplicate = MatDuplicate_AIJCRL; 1455a11e1b2SBarry Smith B->ops->assemblyend = MatAssemblyEnd_MPIAIJCRL; 1465a11e1b2SBarry Smith B->ops->destroy = MatDestroy_MPIAIJCRL; 1475a11e1b2SBarry Smith B->ops->mult = MatMult_AIJCRL; 1485fa1c062SBarry Smith 1495fa1c062SBarry Smith /* If A has already been assembled, compute the permutation. */ 1504eeb8337SBarry Smith if (A->assembled) { 1515a11e1b2SBarry Smith ierr = MPIAIJCRL_create_aijcrl(B);CHKERRQ(ierr); 1525fa1c062SBarry Smith } 1535a11e1b2SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCRL);CHKERRQ(ierr); 1545fa1c062SBarry Smith *newmat = B; 1555fa1c062SBarry Smith PetscFunctionReturn(0); 1565fa1c062SBarry Smith } 1575fa1c062SBarry Smith EXTERN_C_END 1585fa1c062SBarry Smith 1595fa1c062SBarry Smith 1605fa1c062SBarry Smith #undef __FUNCT__ 1615a11e1b2SBarry Smith #define __FUNCT__ "MatCreateMPIAIJCRL" 1625fa1c062SBarry Smith /*@C 1635a11e1b2SBarry Smith MatCreateMPIAIJCRL - Creates a sparse matrix of type MPIAIJCRL. 1645fa1c062SBarry Smith This type inherits from AIJ, but stores some additional 1655fa1c062SBarry Smith information that is used to allow better vectorization of 1665fa1c062SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 1675fa1c062SBarry Smith matrix can be copied to a format in which pieces of the matrix are 1685fa1c062SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 1695fa1c062SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 1705fa1c062SBarry Smith important to preallocate matrix storage in order to get good assembly 1715fa1c062SBarry Smith performance. 1725fa1c062SBarry Smith 1735fa1c062SBarry Smith Collective on MPI_Comm 1745fa1c062SBarry Smith 1755fa1c062SBarry Smith Input Parameters: 1765fa1c062SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1775fa1c062SBarry Smith . m - number of rows 1785fa1c062SBarry Smith . n - number of columns 1795fa1c062SBarry Smith . nz - number of nonzeros per row (same for all rows) 1805fa1c062SBarry Smith - nnz - array containing the number of nonzeros in the various rows 1815fa1c062SBarry Smith (possibly different for each row) or PETSC_NULL 1825fa1c062SBarry Smith 1835fa1c062SBarry Smith Output Parameter: 1845fa1c062SBarry Smith . A - the matrix 1855fa1c062SBarry Smith 1865fa1c062SBarry Smith Notes: 1875fa1c062SBarry Smith If nnz is given then nz is ignored 1885fa1c062SBarry Smith 1895fa1c062SBarry Smith Level: intermediate 1905fa1c062SBarry Smith 1915fa1c062SBarry Smith .keywords: matrix, cray, sparse, parallel 1925fa1c062SBarry Smith 1935a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues() 1945fa1c062SBarry Smith @*/ 1957087cfbeSBarry Smith PetscErrorCode MatCreateMPIAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],PetscInt onz,const PetscInt onnz[],Mat *A) 1965fa1c062SBarry Smith { 1975fa1c062SBarry Smith PetscErrorCode ierr; 1985fa1c062SBarry Smith 1995fa1c062SBarry Smith PetscFunctionBegin; 2005fa1c062SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 2015fa1c062SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2025a11e1b2SBarry Smith ierr = MatSetType(*A,MATMPIAIJCRL);CHKERRQ(ierr); 2031472f72bSBarry Smith ierr = MatMPIAIJSetPreallocation_MPIAIJ(*A,nz,(PetscInt*)nnz,onz,(PetscInt*)onnz);CHKERRQ(ierr); 2045fa1c062SBarry Smith PetscFunctionReturn(0); 2055fa1c062SBarry Smith } 2065fa1c062SBarry Smith 2075fa1c062SBarry Smith 2085fa1c062SBarry Smith EXTERN_C_BEGIN 2095fa1c062SBarry Smith #undef __FUNCT__ 2105a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_MPIAIJCRL" 2117087cfbeSBarry Smith PetscErrorCode MatCreate_MPIAIJCRL(Mat A) 2125fa1c062SBarry Smith { 2135fa1c062SBarry Smith PetscErrorCode ierr; 2145fa1c062SBarry Smith 2155fa1c062SBarry Smith PetscFunctionBegin; 2161472f72bSBarry Smith ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 2175a11e1b2SBarry Smith ierr = MatConvert_MPIAIJ_MPIAIJCRL(A,MATMPIAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 2185fa1c062SBarry Smith PetscFunctionReturn(0); 2195fa1c062SBarry Smith } 2205fa1c062SBarry Smith EXTERN_C_END 2215fa1c062SBarry Smith 222