15fa1c062SBarry Smith 25fa1c062SBarry Smith /* 31472f72bSBarry Smith Defines a matrix-vector product for the MATMPIAIJCRL matrix class. 41472f72bSBarry Smith This class is derived from the MATMPIAIJ class and retains the 55fa1c062SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 65fa1c062SBarry Smith it with a column oriented storage that is more efficient for 75fa1c062SBarry Smith matrix vector products on Vector machines. 85fa1c062SBarry Smith 95fa1c062SBarry Smith CRL stands for constant row length (that is the same number of columns 105fa1c062SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 111472f72bSBarry Smith 121472f72bSBarry Smith See src/mat/impls/aij/seq/crl/crl.c for the sequential version 135fa1c062SBarry Smith */ 145fa1c062SBarry Smith 15c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 16c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h> 175fa1c062SBarry Smith 184723c594SBarry Smith extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 194723c594SBarry Smith 205fa1c062SBarry Smith #undef __FUNCT__ 215a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_MPIAIJCRL" 225a11e1b2SBarry Smith PetscErrorCode MatDestroy_MPIAIJCRL(Mat A) 235fa1c062SBarry Smith { 245fa1c062SBarry Smith PetscErrorCode ierr; 255a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL *) A->spptr; 265fa1c062SBarry Smith 275a11e1b2SBarry Smith /* Free everything in the Mat_AIJCRL data structure. */ 28bf0cc555SLisandro Dalcin if (aijcrl) { 295a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 306bf464f9SBarry Smith ierr = VecDestroy(&aijcrl->fwork);CHKERRQ(ierr); 316bf464f9SBarry Smith ierr = VecDestroy(&aijcrl->xwork);CHKERRQ(ierr); 325a11e1b2SBarry Smith ierr = PetscFree(aijcrl->array);CHKERRQ(ierr); 33bf0cc555SLisandro Dalcin } 34c31cb41cSBarry Smith ierr = PetscFree(A->spptr);CHKERRQ(ierr); 355fa1c062SBarry Smith 361472f72bSBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATMPIAIJ);CHKERRQ(ierr); 374723c594SBarry Smith ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 385fa1c062SBarry Smith PetscFunctionReturn(0); 395fa1c062SBarry Smith } 405fa1c062SBarry Smith 415fa1c062SBarry Smith #undef __FUNCT__ 4296b95a6bSBarry Smith #define __FUNCT__ "MatMPIAIJCRL_create_aijcrl" 4396b95a6bSBarry Smith PetscErrorCode MatMPIAIJCRL_create_aijcrl(Mat A) 445fa1c062SBarry Smith { 451472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)(A)->data; 4611285404SBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data); 475a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 48d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 49d0f46423SBarry Smith PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ 501472f72bSBarry Smith PetscInt *aj = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ 511472f72bSBarry Smith PetscInt i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen; 526873f782SBarry Smith PetscScalar *aa = Aij->a,*ba = Bij->a,*acols,*array; 535fa1c062SBarry Smith PetscErrorCode ierr; 545fa1c062SBarry Smith 555fa1c062SBarry Smith PetscFunctionBegin; 561472f72bSBarry Smith /* determine the row with the most columns */ 571472f72bSBarry Smith for (i=0; i<m; i++) { 581472f72bSBarry Smith rmax = PetscMax(rmax,ailen[i]+bilen[i]); 591472f72bSBarry Smith } 605a11e1b2SBarry Smith aijcrl->nz = Aij->nz+Bij->nz; 615a11e1b2SBarry Smith aijcrl->m = A->rmap->n; 625a11e1b2SBarry Smith aijcrl->rmax = rmax; 635a11e1b2SBarry Smith ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr); 645a11e1b2SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr); 655a11e1b2SBarry Smith acols = aijcrl->acols; 665a11e1b2SBarry Smith icols = aijcrl->icols; 675fa1c062SBarry Smith for (i=0; i<m; i++) { 681472f72bSBarry Smith for (j=0; j<ailen[i]; j++) { 695fa1c062SBarry Smith acols[j*m+i] = *aa++; 705fa1c062SBarry Smith icols[j*m+i] = *aj++; 715fa1c062SBarry Smith } 721472f72bSBarry Smith for (;j<ailen[i]+bilen[i]; j++) { 731472f72bSBarry Smith acols[j*m+i] = *ba++; 7411285404SBarry Smith icols[j*m+i] = nd + *bj++; 751472f72bSBarry Smith } 765fa1c062SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 775fa1c062SBarry Smith acols[j*m+i] = 0.0; 785fa1c062SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 795fa1c062SBarry Smith } 805fa1c062SBarry Smith } 815a11e1b2SBarry Smith ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m))); 821472f72bSBarry Smith 835a11e1b2SBarry Smith ierr = PetscFree(aijcrl->array);CHKERRQ(ierr); 84d0f46423SBarry Smith ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr); 85483e0693SBarry Smith /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ 866bf464f9SBarry Smith ierr = VecDestroy(&aijcrl->xwork);CHKERRQ(ierr); 87778a2246SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,1,nd,PETSC_DECIDE,array,&aijcrl->xwork);CHKERRQ(ierr); 886bf464f9SBarry Smith ierr = VecDestroy(&aijcrl->fwork);CHKERRQ(ierr); 89778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,array+nd,&aijcrl->fwork);CHKERRQ(ierr); 905a11e1b2SBarry Smith aijcrl->array = array; 915a11e1b2SBarry Smith aijcrl->xscat = a->Mvctx; 925fa1c062SBarry Smith PetscFunctionReturn(0); 935fa1c062SBarry Smith } 945fa1c062SBarry Smith 954723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat,MatAssemblyType); 964723c594SBarry Smith 975fa1c062SBarry Smith #undef __FUNCT__ 985a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_MPIAIJCRL" 995a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJCRL(Mat A, MatAssemblyType mode) 1005fa1c062SBarry Smith { 1015fa1c062SBarry Smith PetscErrorCode ierr; 1021472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1031472f72bSBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->A->data); 1045fa1c062SBarry Smith 1055fa1c062SBarry Smith PetscFunctionBegin; 1061472f72bSBarry Smith Aij->inode.use = PETSC_FALSE; 1071472f72bSBarry Smith Bij->inode.use = PETSC_FALSE; 1084723c594SBarry Smith ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr); 1094723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1105fa1c062SBarry Smith 1111472f72bSBarry Smith /* Now calculate the permutation and grouping information. */ 11296b95a6bSBarry Smith ierr = MatMPIAIJCRL_create_aijcrl(A);CHKERRQ(ierr); 1135fa1c062SBarry Smith PetscFunctionReturn(0); 1145fa1c062SBarry Smith } 1155fa1c062SBarry Smith 1165a11e1b2SBarry Smith extern PetscErrorCode MatMult_AIJCRL(Mat,Vec,Vec); 1175a11e1b2SBarry Smith extern PetscErrorCode MatDuplicate_AIJCRL(Mat,MatDuplicateOption,Mat*); 1185fa1c062SBarry Smith 1195a11e1b2SBarry Smith /* MatConvert_MPIAIJ_MPIAIJCRL converts a MPIAIJ matrix into a 1205a11e1b2SBarry Smith * MPIAIJCRL matrix. This routine is called by the MatCreate_MPIAIJCRL() 1211472f72bSBarry Smith * routine, but can also be used to convert an assembled MPIAIJ matrix 1225a11e1b2SBarry Smith * into a MPIAIJCRL one. */ 1235fa1c062SBarry Smith EXTERN_C_BEGIN 1245fa1c062SBarry Smith #undef __FUNCT__ 1255a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_MPIAIJ_MPIAIJCRL" 126*19fd82e9SBarry Smith PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1275fa1c062SBarry Smith { 1285fa1c062SBarry Smith PetscErrorCode ierr; 1295fa1c062SBarry Smith Mat B = *newmat; 1305a11e1b2SBarry Smith Mat_AIJCRL *aijcrl; 1315fa1c062SBarry Smith 1325fa1c062SBarry Smith PetscFunctionBegin; 1335fa1c062SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1345fa1c062SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1355fa1c062SBarry Smith } 1365fa1c062SBarry Smith 1375a11e1b2SBarry Smith ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr); 1385a11e1b2SBarry Smith B->spptr = (void *) aijcrl; 1395fa1c062SBarry Smith 14017667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1415a11e1b2SBarry Smith B->ops->duplicate = MatDuplicate_AIJCRL; 1425a11e1b2SBarry Smith B->ops->assemblyend = MatAssemblyEnd_MPIAIJCRL; 1435a11e1b2SBarry Smith B->ops->destroy = MatDestroy_MPIAIJCRL; 1445a11e1b2SBarry Smith B->ops->mult = MatMult_AIJCRL; 1455fa1c062SBarry Smith 1465fa1c062SBarry Smith /* If A has already been assembled, compute the permutation. */ 1474eeb8337SBarry Smith if (A->assembled) { 14896b95a6bSBarry Smith ierr = MatMPIAIJCRL_create_aijcrl(B);CHKERRQ(ierr); 1495fa1c062SBarry Smith } 1505a11e1b2SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCRL);CHKERRQ(ierr); 1515fa1c062SBarry Smith *newmat = B; 1525fa1c062SBarry Smith PetscFunctionReturn(0); 1535fa1c062SBarry Smith } 1545fa1c062SBarry Smith EXTERN_C_END 1555fa1c062SBarry Smith 1565fa1c062SBarry Smith 1575fa1c062SBarry Smith #undef __FUNCT__ 1585a11e1b2SBarry Smith #define __FUNCT__ "MatCreateMPIAIJCRL" 1595fa1c062SBarry Smith /*@C 1605a11e1b2SBarry Smith MatCreateMPIAIJCRL - Creates a sparse matrix of type MPIAIJCRL. 1615fa1c062SBarry Smith This type inherits from AIJ, but stores some additional 1625fa1c062SBarry Smith information that is used to allow better vectorization of 1635fa1c062SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 1645fa1c062SBarry Smith matrix can be copied to a format in which pieces of the matrix are 1655fa1c062SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 1665fa1c062SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 1675fa1c062SBarry Smith important to preallocate matrix storage in order to get good assembly 1685fa1c062SBarry Smith performance. 1695fa1c062SBarry Smith 1705fa1c062SBarry Smith Collective on MPI_Comm 1715fa1c062SBarry Smith 1725fa1c062SBarry Smith Input Parameters: 1735fa1c062SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1745fa1c062SBarry Smith . m - number of rows 1755fa1c062SBarry Smith . n - number of columns 1765fa1c062SBarry Smith . nz - number of nonzeros per row (same for all rows) 1775fa1c062SBarry Smith - nnz - array containing the number of nonzeros in the various rows 1785fa1c062SBarry Smith (possibly different for each row) or PETSC_NULL 1795fa1c062SBarry Smith 1805fa1c062SBarry Smith Output Parameter: 1815fa1c062SBarry Smith . A - the matrix 1825fa1c062SBarry Smith 1835fa1c062SBarry Smith Notes: 1845fa1c062SBarry Smith If nnz is given then nz is ignored 1855fa1c062SBarry Smith 1865fa1c062SBarry Smith Level: intermediate 1875fa1c062SBarry Smith 1885fa1c062SBarry Smith .keywords: matrix, cray, sparse, parallel 1895fa1c062SBarry Smith 1905a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues() 1915fa1c062SBarry Smith @*/ 1927087cfbeSBarry Smith PetscErrorCode MatCreateMPIAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],PetscInt onz,const PetscInt onnz[],Mat *A) 1935fa1c062SBarry Smith { 1945fa1c062SBarry Smith PetscErrorCode ierr; 1955fa1c062SBarry Smith 1965fa1c062SBarry Smith PetscFunctionBegin; 1975fa1c062SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 1985fa1c062SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1995a11e1b2SBarry Smith ierr = MatSetType(*A,MATMPIAIJCRL);CHKERRQ(ierr); 2001472f72bSBarry Smith ierr = MatMPIAIJSetPreallocation_MPIAIJ(*A,nz,(PetscInt*)nnz,onz,(PetscInt*)onnz);CHKERRQ(ierr); 2015fa1c062SBarry Smith PetscFunctionReturn(0); 2025fa1c062SBarry Smith } 2035fa1c062SBarry Smith 2045fa1c062SBarry Smith 2055fa1c062SBarry Smith EXTERN_C_BEGIN 2065fa1c062SBarry Smith #undef __FUNCT__ 2075a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_MPIAIJCRL" 2087087cfbeSBarry Smith PetscErrorCode MatCreate_MPIAIJCRL(Mat A) 2095fa1c062SBarry Smith { 2105fa1c062SBarry Smith PetscErrorCode ierr; 2115fa1c062SBarry Smith 2125fa1c062SBarry Smith PetscFunctionBegin; 2131472f72bSBarry Smith ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 2145a11e1b2SBarry Smith ierr = MatConvert_MPIAIJ_MPIAIJCRL(A,MATMPIAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 2155fa1c062SBarry Smith PetscFunctionReturn(0); 2165fa1c062SBarry Smith } 2175fa1c062SBarry Smith EXTERN_C_END 2185fa1c062SBarry Smith 219