xref: /petsc/src/mat/impls/aij/mpi/crl/mcrl.c (revision 5a11e1b2b5531745063fa49d13371cfa0d0b861d)
15fa1c062SBarry Smith #define PETSCMAT_DLL
25fa1c062SBarry Smith 
35fa1c062SBarry Smith /*
41472f72bSBarry Smith   Defines a matrix-vector product for the MATMPIAIJCRL matrix class.
51472f72bSBarry Smith   This class is derived from the MATMPIAIJ class and retains the
65fa1c062SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
75fa1c062SBarry Smith   it with a column oriented storage that is more efficient for
85fa1c062SBarry Smith   matrix vector products on Vector machines.
95fa1c062SBarry Smith 
105fa1c062SBarry Smith   CRL stands for constant row length (that is the same number of columns
115fa1c062SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
121472f72bSBarry Smith 
131472f72bSBarry Smith    See src/mat/impls/aij/seq/crl/crl.c for the sequential version
145fa1c062SBarry Smith */
155fa1c062SBarry Smith 
167c4f633dSBarry Smith #include "../src/mat/impls/aij/mpi/mpiaij.h"
177c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/crl/crl.h"
185fa1c062SBarry Smith 
194723c594SBarry Smith extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
204723c594SBarry Smith 
215fa1c062SBarry Smith #undef __FUNCT__
22*5a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_MPIAIJCRL"
23*5a11e1b2SBarry Smith PetscErrorCode MatDestroy_MPIAIJCRL(Mat A)
245fa1c062SBarry Smith {
255fa1c062SBarry Smith   PetscErrorCode ierr;
26*5a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL *) A->spptr;
275fa1c062SBarry Smith 
28*5a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
29*5a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
30*5a11e1b2SBarry Smith   if (aijcrl->fwork) {
31*5a11e1b2SBarry Smith     ierr = VecDestroy(aijcrl->fwork);CHKERRQ(ierr);
329222b4acSBarry Smith   }
33*5a11e1b2SBarry Smith   if (aijcrl->xwork) {
34*5a11e1b2SBarry Smith     ierr = VecDestroy(aijcrl->xwork);CHKERRQ(ierr);
359222b4acSBarry Smith   }
36*5a11e1b2SBarry Smith   ierr = PetscFree(aijcrl->array);CHKERRQ(ierr);
37*5a11e1b2SBarry Smith   ierr = PetscFree(aijcrl);CHKERRQ(ierr);
38c7c224ccSLisandro Dalcin   A->spptr = 0;
395fa1c062SBarry Smith 
401472f72bSBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATMPIAIJ);CHKERRQ(ierr);
414723c594SBarry Smith   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
425fa1c062SBarry Smith   PetscFunctionReturn(0);
435fa1c062SBarry Smith }
445fa1c062SBarry Smith 
455fa1c062SBarry Smith #undef __FUNCT__
46*5a11e1b2SBarry Smith #define __FUNCT__ "MPIAIJCRL_create_aijcrl"
47*5a11e1b2SBarry Smith PetscErrorCode MPIAIJCRL_create_aijcrl(Mat A)
485fa1c062SBarry Smith {
491472f72bSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)(A)->data;
5011285404SBarry Smith   Mat_SeqAIJ     *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data);
51*5a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
52d0f46423SBarry Smith   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
53d0f46423SBarry Smith   PetscInt       nd = a->A->cmap->n; /* number of columns in diagonal portion */
541472f72bSBarry Smith   PetscInt       *aj = Aij->j,*bj = Bij->j;  /* From the CSR representation; points to the beginning  of each row. */
551472f72bSBarry Smith   PetscInt       i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen;
566873f782SBarry Smith   PetscScalar    *aa = Aij->a,*ba = Bij->a,*acols,*array;
575fa1c062SBarry Smith   PetscErrorCode ierr;
585fa1c062SBarry Smith 
595fa1c062SBarry Smith   PetscFunctionBegin;
601472f72bSBarry Smith   /* determine the row with the most columns */
611472f72bSBarry Smith   for (i=0; i<m; i++) {
621472f72bSBarry Smith     rmax = PetscMax(rmax,ailen[i]+bilen[i]);
631472f72bSBarry Smith   }
64*5a11e1b2SBarry Smith   aijcrl->nz   = Aij->nz+Bij->nz;
65*5a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
66*5a11e1b2SBarry Smith   aijcrl->rmax = rmax;
67*5a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
68*5a11e1b2SBarry Smith   ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr);
69*5a11e1b2SBarry Smith   acols = aijcrl->acols;
70*5a11e1b2SBarry Smith   icols = aijcrl->icols;
715fa1c062SBarry Smith   for (i=0; i<m; i++) {
721472f72bSBarry Smith     for (j=0; j<ailen[i]; j++) {
735fa1c062SBarry Smith       acols[j*m+i] = *aa++;
745fa1c062SBarry Smith       icols[j*m+i] = *aj++;
755fa1c062SBarry Smith     }
761472f72bSBarry Smith     for (;j<ailen[i]+bilen[i]; j++) {
771472f72bSBarry Smith       acols[j*m+i] = *ba++;
7811285404SBarry Smith       icols[j*m+i] = nd + *bj++;
791472f72bSBarry Smith     }
805fa1c062SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
815fa1c062SBarry Smith       acols[j*m+i] = 0.0;
825fa1c062SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
835fa1c062SBarry Smith     }
845fa1c062SBarry Smith   }
85*5a11e1b2SBarry Smith   ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m)));
861472f72bSBarry Smith 
87*5a11e1b2SBarry Smith   ierr = PetscFree(aijcrl->array);CHKERRQ(ierr);
88d0f46423SBarry Smith   ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr);
89483e0693SBarry Smith   /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */
90*5a11e1b2SBarry Smith   if (aijcrl->xwork) {ierr = VecDestroy(aijcrl->xwork);CHKERRQ(ierr);}
91*5a11e1b2SBarry Smith   ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,nd,PETSC_DECIDE,array,&aijcrl->xwork);CHKERRQ(ierr);
92*5a11e1b2SBarry Smith   if (aijcrl->fwork) {ierr = VecDestroy(aijcrl->fwork);CHKERRQ(ierr);}
93*5a11e1b2SBarry Smith   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,a->B->cmap->n,array+nd,&aijcrl->fwork);CHKERRQ(ierr);
94*5a11e1b2SBarry Smith   aijcrl->array = array;
95*5a11e1b2SBarry Smith   aijcrl->xscat = a->Mvctx;
965fa1c062SBarry Smith   PetscFunctionReturn(0);
975fa1c062SBarry Smith }
985fa1c062SBarry Smith 
994723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat,MatAssemblyType);
1004723c594SBarry Smith 
1015fa1c062SBarry Smith #undef __FUNCT__
102*5a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_MPIAIJCRL"
103*5a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJCRL(Mat A, MatAssemblyType mode)
1045fa1c062SBarry Smith {
1055fa1c062SBarry Smith   PetscErrorCode ierr;
1061472f72bSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071472f72bSBarry Smith   Mat_SeqAIJ     *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->A->data);
1085fa1c062SBarry Smith 
1095fa1c062SBarry Smith   PetscFunctionBegin;
1101472f72bSBarry Smith   Aij->inode.use = PETSC_FALSE;
1111472f72bSBarry Smith   Bij->inode.use = PETSC_FALSE;
1124723c594SBarry Smith   ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr);
1134723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1145fa1c062SBarry Smith 
1151472f72bSBarry Smith   /* Now calculate the permutation and grouping information. */
116*5a11e1b2SBarry Smith   ierr = MPIAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
1175fa1c062SBarry Smith   PetscFunctionReturn(0);
1185fa1c062SBarry Smith }
1195fa1c062SBarry Smith 
120*5a11e1b2SBarry Smith extern PetscErrorCode MatMult_AIJCRL(Mat,Vec,Vec);
121*5a11e1b2SBarry Smith extern PetscErrorCode MatDuplicate_AIJCRL(Mat,MatDuplicateOption,Mat*);
1225fa1c062SBarry Smith 
123*5a11e1b2SBarry Smith /* MatConvert_MPIAIJ_MPIAIJCRL converts a MPIAIJ matrix into a
124*5a11e1b2SBarry Smith  * MPIAIJCRL matrix.  This routine is called by the MatCreate_MPIAIJCRL()
1251472f72bSBarry Smith  * routine, but can also be used to convert an assembled MPIAIJ matrix
126*5a11e1b2SBarry Smith  * into a MPIAIJCRL one. */
1275fa1c062SBarry Smith EXTERN_C_BEGIN
1285fa1c062SBarry Smith #undef __FUNCT__
129*5a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_MPIAIJ_MPIAIJCRL"
130*5a11e1b2SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
1315fa1c062SBarry Smith {
1325fa1c062SBarry Smith   PetscErrorCode ierr;
1335fa1c062SBarry Smith   Mat            B = *newmat;
134*5a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
1355fa1c062SBarry Smith 
1365fa1c062SBarry Smith   PetscFunctionBegin;
1375fa1c062SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
1385fa1c062SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1395fa1c062SBarry Smith   }
1405fa1c062SBarry Smith 
141*5a11e1b2SBarry Smith   ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr);
142*5a11e1b2SBarry Smith   B->spptr = (void *) aijcrl;
1435fa1c062SBarry Smith 
14417667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
145*5a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
146*5a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_MPIAIJCRL;
147*5a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_MPIAIJCRL;
148*5a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
1495fa1c062SBarry Smith 
1505fa1c062SBarry Smith   /* If A has already been assembled, compute the permutation. */
1514eeb8337SBarry Smith   if (A->assembled) {
152*5a11e1b2SBarry Smith     ierr = MPIAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
1535fa1c062SBarry Smith   }
154*5a11e1b2SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCRL);CHKERRQ(ierr);
1555fa1c062SBarry Smith   *newmat = B;
1565fa1c062SBarry Smith   PetscFunctionReturn(0);
1575fa1c062SBarry Smith }
1585fa1c062SBarry Smith EXTERN_C_END
1595fa1c062SBarry Smith 
1605fa1c062SBarry Smith 
1615fa1c062SBarry Smith #undef __FUNCT__
162*5a11e1b2SBarry Smith #define __FUNCT__ "MatCreateMPIAIJCRL"
1635fa1c062SBarry Smith /*@C
164*5a11e1b2SBarry Smith    MatCreateMPIAIJCRL - Creates a sparse matrix of type MPIAIJCRL.
1655fa1c062SBarry Smith    This type inherits from AIJ, but stores some additional
1665fa1c062SBarry Smith    information that is used to allow better vectorization of
1675fa1c062SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
1685fa1c062SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
1695fa1c062SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
1705fa1c062SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
1715fa1c062SBarry Smith    important to preallocate matrix storage in order to get good assembly
1725fa1c062SBarry Smith    performance.
1735fa1c062SBarry Smith 
1745fa1c062SBarry Smith    Collective on MPI_Comm
1755fa1c062SBarry Smith 
1765fa1c062SBarry Smith    Input Parameters:
1775fa1c062SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
1785fa1c062SBarry Smith .  m - number of rows
1795fa1c062SBarry Smith .  n - number of columns
1805fa1c062SBarry Smith .  nz - number of nonzeros per row (same for all rows)
1815fa1c062SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
1825fa1c062SBarry Smith          (possibly different for each row) or PETSC_NULL
1835fa1c062SBarry Smith 
1845fa1c062SBarry Smith    Output Parameter:
1855fa1c062SBarry Smith .  A - the matrix
1865fa1c062SBarry Smith 
1875fa1c062SBarry Smith    Notes:
1885fa1c062SBarry Smith    If nnz is given then nz is ignored
1895fa1c062SBarry Smith 
1905fa1c062SBarry Smith    Level: intermediate
1915fa1c062SBarry Smith 
1925fa1c062SBarry Smith .keywords: matrix, cray, sparse, parallel
1935fa1c062SBarry Smith 
194*5a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
1955fa1c062SBarry Smith @*/
196*5a11e1b2SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],PetscInt onz,const PetscInt onnz[],Mat *A)
1975fa1c062SBarry Smith {
1985fa1c062SBarry Smith   PetscErrorCode ierr;
1995fa1c062SBarry Smith 
2005fa1c062SBarry Smith   PetscFunctionBegin;
2015fa1c062SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2025fa1c062SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
203*5a11e1b2SBarry Smith   ierr = MatSetType(*A,MATMPIAIJCRL);CHKERRQ(ierr);
2041472f72bSBarry Smith   ierr = MatMPIAIJSetPreallocation_MPIAIJ(*A,nz,(PetscInt*)nnz,onz,(PetscInt*)onnz);CHKERRQ(ierr);
2055fa1c062SBarry Smith   PetscFunctionReturn(0);
2065fa1c062SBarry Smith }
2075fa1c062SBarry Smith 
2085fa1c062SBarry Smith 
2095fa1c062SBarry Smith EXTERN_C_BEGIN
2105fa1c062SBarry Smith #undef __FUNCT__
211*5a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_MPIAIJCRL"
212*5a11e1b2SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJCRL(Mat A)
2135fa1c062SBarry Smith {
2145fa1c062SBarry Smith   PetscErrorCode ierr;
2155fa1c062SBarry Smith 
2165fa1c062SBarry Smith   PetscFunctionBegin;
2171472f72bSBarry Smith   ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
218*5a11e1b2SBarry Smith   ierr = MatConvert_MPIAIJ_MPIAIJCRL(A,MATMPIAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
2195fa1c062SBarry Smith   PetscFunctionReturn(0);
2205fa1c062SBarry Smith }
2215fa1c062SBarry Smith EXTERN_C_END
2225fa1c062SBarry Smith 
223