15fa1c062SBarry Smith 25fa1c062SBarry Smith /* 31472f72bSBarry Smith Defines a matrix-vector product for the MATMPIAIJCRL matrix class. 41472f72bSBarry Smith This class is derived from the MATMPIAIJ class and retains the 55fa1c062SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 65fa1c062SBarry Smith it with a column oriented storage that is more efficient for 75fa1c062SBarry Smith matrix vector products on Vector machines. 85fa1c062SBarry Smith 95fa1c062SBarry Smith CRL stands for constant row length (that is the same number of columns 105fa1c062SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 111472f72bSBarry Smith 121472f72bSBarry Smith See src/mat/impls/aij/seq/crl/crl.c for the sequential version 135fa1c062SBarry Smith */ 145fa1c062SBarry Smith 15c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 16c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h> 175fa1c062SBarry Smith 185a11e1b2SBarry Smith PetscErrorCode MatDestroy_MPIAIJCRL(Mat A) 195fa1c062SBarry Smith { 205a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 215fa1c062SBarry Smith 22362febeeSStefano Zampini PetscFunctionBegin; 23bf0cc555SLisandro Dalcin if (aijcrl) { 24*9566063dSJacob Faibussowitsch PetscCall(PetscFree2(aijcrl->acols,aijcrl->icols)); 25*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aijcrl->fwork)); 26*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aijcrl->xwork)); 27*9566063dSJacob Faibussowitsch PetscCall(PetscFree(aijcrl->array)); 28bf0cc555SLisandro Dalcin } 29*9566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 305fa1c062SBarry Smith 31*9566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJ)); 32*9566063dSJacob Faibussowitsch PetscCall(MatDestroy_MPIAIJ(A)); 335fa1c062SBarry Smith PetscFunctionReturn(0); 345fa1c062SBarry Smith } 355fa1c062SBarry Smith 3696b95a6bSBarry Smith PetscErrorCode MatMPIAIJCRL_create_aijcrl(Mat A) 375fa1c062SBarry Smith { 381472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)(A)->data; 3911285404SBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data); 405a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; 41d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 42d0f46423SBarry Smith PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ 431472f72bSBarry Smith PetscInt *aj = Aij->j,*bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ 441472f72bSBarry Smith PetscInt i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen; 456873f782SBarry Smith PetscScalar *aa = Aij->a,*ba = Bij->a,*acols,*array; 465fa1c062SBarry Smith 475fa1c062SBarry Smith PetscFunctionBegin; 481472f72bSBarry Smith /* determine the row with the most columns */ 491472f72bSBarry Smith for (i=0; i<m; i++) { 501472f72bSBarry Smith rmax = PetscMax(rmax,ailen[i]+bilen[i]); 511472f72bSBarry Smith } 525a11e1b2SBarry Smith aijcrl->nz = Aij->nz+Bij->nz; 535a11e1b2SBarry Smith aijcrl->m = A->rmap->n; 545a11e1b2SBarry Smith aijcrl->rmax = rmax; 552205254eSKarl Rupp 56*9566063dSJacob Faibussowitsch PetscCall(PetscFree2(aijcrl->acols,aijcrl->icols)); 57*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rmax*m,&aijcrl->acols,rmax*m,&aijcrl->icols)); 585a11e1b2SBarry Smith acols = aijcrl->acols; 595a11e1b2SBarry Smith icols = aijcrl->icols; 605fa1c062SBarry Smith for (i=0; i<m; i++) { 611472f72bSBarry Smith for (j=0; j<ailen[i]; j++) { 625fa1c062SBarry Smith acols[j*m+i] = *aa++; 635fa1c062SBarry Smith icols[j*m+i] = *aj++; 645fa1c062SBarry Smith } 651472f72bSBarry Smith for (; j<ailen[i]+bilen[i]; j++) { 661472f72bSBarry Smith acols[j*m+i] = *ba++; 6711285404SBarry Smith icols[j*m+i] = nd + *bj++; 681472f72bSBarry Smith } 695fa1c062SBarry Smith for (; j<rmax; j++) { /* empty column entries */ 705fa1c062SBarry Smith acols[j*m+i] = 0.0; 715fa1c062SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 725fa1c062SBarry Smith } 735fa1c062SBarry Smith } 74*9566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m)))); 751472f72bSBarry Smith 76*9566063dSJacob Faibussowitsch PetscCall(PetscFree(aijcrl->array)); 77*9566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->B->cmap->n+nd,&array)); 78483e0693SBarry Smith /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ 79*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aijcrl->xwork)); 80*9566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),1,nd,PETSC_DECIDE,array,&aijcrl->xwork)); 81*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aijcrl->fwork)); 82*9566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,a->B->cmap->n,array+nd,&aijcrl->fwork)); 832205254eSKarl Rupp 845a11e1b2SBarry Smith aijcrl->array = array; 855a11e1b2SBarry Smith aijcrl->xscat = a->Mvctx; 865fa1c062SBarry Smith PetscFunctionReturn(0); 875fa1c062SBarry Smith } 885fa1c062SBarry Smith 895a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJCRL(Mat A, MatAssemblyType mode) 905fa1c062SBarry Smith { 911472f72bSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 921472f72bSBarry Smith Mat_SeqAIJ *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->A->data); 935fa1c062SBarry Smith 945fa1c062SBarry Smith PetscFunctionBegin; 951472f72bSBarry Smith Aij->inode.use = PETSC_FALSE; 961472f72bSBarry Smith Bij->inode.use = PETSC_FALSE; 972205254eSKarl Rupp 98*9566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_MPIAIJ(A,mode)); 994723c594SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1005fa1c062SBarry Smith 1011472f72bSBarry Smith /* Now calculate the permutation and grouping information. */ 102*9566063dSJacob Faibussowitsch PetscCall(MatMPIAIJCRL_create_aijcrl(A)); 1035fa1c062SBarry Smith PetscFunctionReturn(0); 1045fa1c062SBarry Smith } 1055fa1c062SBarry Smith 1065a11e1b2SBarry Smith extern PetscErrorCode MatMult_AIJCRL(Mat,Vec,Vec); 1075a11e1b2SBarry Smith extern PetscErrorCode MatDuplicate_AIJCRL(Mat,MatDuplicateOption,Mat*); 1085fa1c062SBarry Smith 1095a11e1b2SBarry Smith /* MatConvert_MPIAIJ_MPIAIJCRL converts a MPIAIJ matrix into a 1105a11e1b2SBarry Smith * MPIAIJCRL matrix. This routine is called by the MatCreate_MPIAIJCRL() 1111472f72bSBarry Smith * routine, but can also be used to convert an assembled MPIAIJ matrix 1125a11e1b2SBarry Smith * into a MPIAIJCRL one. */ 113b2573a8aSBarry Smith 114cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1155fa1c062SBarry Smith { 1165fa1c062SBarry Smith Mat B = *newmat; 1175a11e1b2SBarry Smith Mat_AIJCRL *aijcrl; 1185fa1c062SBarry Smith 1195fa1c062SBarry Smith PetscFunctionBegin; 1205fa1c062SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 121*9566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 1225fa1c062SBarry Smith } 1235fa1c062SBarry Smith 124*9566063dSJacob Faibussowitsch PetscCall(PetscNewLog(B,&aijcrl)); 1255a11e1b2SBarry Smith B->spptr = (void*) aijcrl; 1265fa1c062SBarry Smith 12717667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1285a11e1b2SBarry Smith B->ops->duplicate = MatDuplicate_AIJCRL; 1295a11e1b2SBarry Smith B->ops->assemblyend = MatAssemblyEnd_MPIAIJCRL; 1305a11e1b2SBarry Smith B->ops->destroy = MatDestroy_MPIAIJCRL; 1315a11e1b2SBarry Smith B->ops->mult = MatMult_AIJCRL; 1325fa1c062SBarry Smith 1335fa1c062SBarry Smith /* If A has already been assembled, compute the permutation. */ 1344eeb8337SBarry Smith if (A->assembled) { 135*9566063dSJacob Faibussowitsch PetscCall(MatMPIAIJCRL_create_aijcrl(B)); 1365fa1c062SBarry Smith } 137*9566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCRL)); 1385fa1c062SBarry Smith *newmat = B; 1395fa1c062SBarry Smith PetscFunctionReturn(0); 1405fa1c062SBarry Smith } 1415fa1c062SBarry Smith 1425fa1c062SBarry Smith /*@C 1435a11e1b2SBarry Smith MatCreateMPIAIJCRL - Creates a sparse matrix of type MPIAIJCRL. 1445fa1c062SBarry Smith This type inherits from AIJ, but stores some additional 1455fa1c062SBarry Smith information that is used to allow better vectorization of 1465fa1c062SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 1475fa1c062SBarry Smith matrix can be copied to a format in which pieces of the matrix are 1485fa1c062SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 1495fa1c062SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 1505fa1c062SBarry Smith important to preallocate matrix storage in order to get good assembly 1515fa1c062SBarry Smith performance. 1525fa1c062SBarry Smith 153d083f849SBarry Smith Collective 1545fa1c062SBarry Smith 1555fa1c062SBarry Smith Input Parameters: 1565fa1c062SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1575fa1c062SBarry Smith . m - number of rows 1585fa1c062SBarry Smith . n - number of columns 1595fa1c062SBarry Smith . nz - number of nonzeros per row (same for all rows) 1605fa1c062SBarry Smith - nnz - array containing the number of nonzeros in the various rows 1610298fd71SBarry Smith (possibly different for each row) or NULL 1625fa1c062SBarry Smith 1635fa1c062SBarry Smith Output Parameter: 1645fa1c062SBarry Smith . A - the matrix 1655fa1c062SBarry Smith 1665fa1c062SBarry Smith Notes: 1675fa1c062SBarry Smith If nnz is given then nz is ignored 1685fa1c062SBarry Smith 1695fa1c062SBarry Smith Level: intermediate 1705fa1c062SBarry Smith 1715a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues() 1725fa1c062SBarry Smith @*/ 1737087cfbeSBarry Smith PetscErrorCode MatCreateMPIAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],PetscInt onz,const PetscInt onnz[],Mat *A) 1745fa1c062SBarry Smith { 1755fa1c062SBarry Smith PetscFunctionBegin; 176*9566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,A)); 177*9566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A,m,n,m,n)); 178*9566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATMPIAIJCRL)); 179*9566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation_MPIAIJ(*A,nz,(PetscInt*)nnz,onz,(PetscInt*)onnz)); 1805fa1c062SBarry Smith PetscFunctionReturn(0); 1815fa1c062SBarry Smith } 1825fa1c062SBarry Smith 1838cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJCRL(Mat A) 1845fa1c062SBarry Smith { 1855fa1c062SBarry Smith PetscFunctionBegin; 186*9566063dSJacob Faibussowitsch PetscCall(MatSetType(A,MATMPIAIJ)); 187*9566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIAIJ_MPIAIJCRL(A,MATMPIAIJCRL,MAT_INPLACE_MATRIX,&A)); 1885fa1c062SBarry Smith PetscFunctionReturn(0); 1895fa1c062SBarry Smith } 190