xref: /petsc/src/mat/impls/aij/mpi/crl/mcrl.c (revision c6db04a5321582041def2b1e244c75985478b3ef)
15fa1c062SBarry Smith 
25fa1c062SBarry Smith /*
31472f72bSBarry Smith   Defines a matrix-vector product for the MATMPIAIJCRL matrix class.
41472f72bSBarry Smith   This class is derived from the MATMPIAIJ class and retains the
55fa1c062SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
65fa1c062SBarry Smith   it with a column oriented storage that is more efficient for
75fa1c062SBarry Smith   matrix vector products on Vector machines.
85fa1c062SBarry Smith 
95fa1c062SBarry Smith   CRL stands for constant row length (that is the same number of columns
105fa1c062SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
111472f72bSBarry Smith 
121472f72bSBarry Smith    See src/mat/impls/aij/seq/crl/crl.c for the sequential version
135fa1c062SBarry Smith */
145fa1c062SBarry Smith 
15*c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
16*c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
175fa1c062SBarry Smith 
184723c594SBarry Smith extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
194723c594SBarry Smith 
205fa1c062SBarry Smith #undef __FUNCT__
215a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_MPIAIJCRL"
225a11e1b2SBarry Smith PetscErrorCode MatDestroy_MPIAIJCRL(Mat A)
235fa1c062SBarry Smith {
245fa1c062SBarry Smith   PetscErrorCode ierr;
255a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL *) A->spptr;
265fa1c062SBarry Smith 
275a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
285a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
295a11e1b2SBarry Smith   if (aijcrl->fwork) {
305a11e1b2SBarry Smith     ierr = VecDestroy(aijcrl->fwork);CHKERRQ(ierr);
319222b4acSBarry Smith   }
325a11e1b2SBarry Smith   if (aijcrl->xwork) {
335a11e1b2SBarry Smith     ierr = VecDestroy(aijcrl->xwork);CHKERRQ(ierr);
349222b4acSBarry Smith   }
355a11e1b2SBarry Smith   ierr = PetscFree(aijcrl->array);CHKERRQ(ierr);
36c31cb41cSBarry Smith   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
375fa1c062SBarry Smith 
381472f72bSBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATMPIAIJ);CHKERRQ(ierr);
394723c594SBarry Smith   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
405fa1c062SBarry Smith   PetscFunctionReturn(0);
415fa1c062SBarry Smith }
425fa1c062SBarry Smith 
435fa1c062SBarry Smith #undef __FUNCT__
445a11e1b2SBarry Smith #define __FUNCT__ "MPIAIJCRL_create_aijcrl"
455a11e1b2SBarry Smith PetscErrorCode MPIAIJCRL_create_aijcrl(Mat A)
465fa1c062SBarry Smith {
471472f72bSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)(A)->data;
4811285404SBarry Smith   Mat_SeqAIJ     *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->B->data);
495a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
50d0f46423SBarry Smith   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
51d0f46423SBarry Smith   PetscInt       nd = a->A->cmap->n; /* number of columns in diagonal portion */
521472f72bSBarry Smith   PetscInt       *aj = Aij->j,*bj = Bij->j;  /* From the CSR representation; points to the beginning  of each row. */
531472f72bSBarry Smith   PetscInt       i, j,rmax = 0,*icols, *ailen = Aij->ilen, *bilen = Bij->ilen;
546873f782SBarry Smith   PetscScalar    *aa = Aij->a,*ba = Bij->a,*acols,*array;
555fa1c062SBarry Smith   PetscErrorCode ierr;
565fa1c062SBarry Smith 
575fa1c062SBarry Smith   PetscFunctionBegin;
581472f72bSBarry Smith   /* determine the row with the most columns */
591472f72bSBarry Smith   for (i=0; i<m; i++) {
601472f72bSBarry Smith     rmax = PetscMax(rmax,ailen[i]+bilen[i]);
611472f72bSBarry Smith   }
625a11e1b2SBarry Smith   aijcrl->nz   = Aij->nz+Bij->nz;
635a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
645a11e1b2SBarry Smith   aijcrl->rmax = rmax;
655a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
665a11e1b2SBarry Smith   ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr);
675a11e1b2SBarry Smith   acols = aijcrl->acols;
685a11e1b2SBarry Smith   icols = aijcrl->icols;
695fa1c062SBarry Smith   for (i=0; i<m; i++) {
701472f72bSBarry Smith     for (j=0; j<ailen[i]; j++) {
715fa1c062SBarry Smith       acols[j*m+i] = *aa++;
725fa1c062SBarry Smith       icols[j*m+i] = *aj++;
735fa1c062SBarry Smith     }
741472f72bSBarry Smith     for (;j<ailen[i]+bilen[i]; j++) {
751472f72bSBarry Smith       acols[j*m+i] = *ba++;
7611285404SBarry Smith       icols[j*m+i] = nd + *bj++;
771472f72bSBarry Smith     }
785fa1c062SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
795fa1c062SBarry Smith       acols[j*m+i] = 0.0;
805fa1c062SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
815fa1c062SBarry Smith     }
825fa1c062SBarry Smith   }
835a11e1b2SBarry Smith   ierr = PetscInfo1(A,"Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)(aijcrl->nz))/((double)(rmax*m)));
841472f72bSBarry Smith 
855a11e1b2SBarry Smith   ierr = PetscFree(aijcrl->array);CHKERRQ(ierr);
86d0f46423SBarry Smith   ierr = PetscMalloc((a->B->cmap->n+nd)*sizeof(PetscScalar),&array);CHKERRQ(ierr);
87483e0693SBarry Smith   /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */
885a11e1b2SBarry Smith   if (aijcrl->xwork) {ierr = VecDestroy(aijcrl->xwork);CHKERRQ(ierr);}
895a11e1b2SBarry Smith   ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,nd,PETSC_DECIDE,array,&aijcrl->xwork);CHKERRQ(ierr);
905a11e1b2SBarry Smith   if (aijcrl->fwork) {ierr = VecDestroy(aijcrl->fwork);CHKERRQ(ierr);}
915a11e1b2SBarry Smith   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,a->B->cmap->n,array+nd,&aijcrl->fwork);CHKERRQ(ierr);
925a11e1b2SBarry Smith   aijcrl->array = array;
935a11e1b2SBarry Smith   aijcrl->xscat = a->Mvctx;
945fa1c062SBarry Smith   PetscFunctionReturn(0);
955fa1c062SBarry Smith }
965fa1c062SBarry Smith 
974723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat,MatAssemblyType);
984723c594SBarry Smith 
995fa1c062SBarry Smith #undef __FUNCT__
1005a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_MPIAIJCRL"
1015a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJCRL(Mat A, MatAssemblyType mode)
1025fa1c062SBarry Smith {
1035fa1c062SBarry Smith   PetscErrorCode ierr;
1041472f72bSBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1051472f72bSBarry Smith   Mat_SeqAIJ     *Aij = (Mat_SeqAIJ*)(a->A->data), *Bij = (Mat_SeqAIJ*)(a->A->data);
1065fa1c062SBarry Smith 
1075fa1c062SBarry Smith   PetscFunctionBegin;
1081472f72bSBarry Smith   Aij->inode.use = PETSC_FALSE;
1091472f72bSBarry Smith   Bij->inode.use = PETSC_FALSE;
1104723c594SBarry Smith   ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr);
1114723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1125fa1c062SBarry Smith 
1131472f72bSBarry Smith   /* Now calculate the permutation and grouping information. */
1145a11e1b2SBarry Smith   ierr = MPIAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
1155fa1c062SBarry Smith   PetscFunctionReturn(0);
1165fa1c062SBarry Smith }
1175fa1c062SBarry Smith 
1185a11e1b2SBarry Smith extern PetscErrorCode MatMult_AIJCRL(Mat,Vec,Vec);
1195a11e1b2SBarry Smith extern PetscErrorCode MatDuplicate_AIJCRL(Mat,MatDuplicateOption,Mat*);
1205fa1c062SBarry Smith 
1215a11e1b2SBarry Smith /* MatConvert_MPIAIJ_MPIAIJCRL converts a MPIAIJ matrix into a
1225a11e1b2SBarry Smith  * MPIAIJCRL matrix.  This routine is called by the MatCreate_MPIAIJCRL()
1231472f72bSBarry Smith  * routine, but can also be used to convert an assembled MPIAIJ matrix
1245a11e1b2SBarry Smith  * into a MPIAIJCRL one. */
1255fa1c062SBarry Smith EXTERN_C_BEGIN
1265fa1c062SBarry Smith #undef __FUNCT__
1275a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_MPIAIJ_MPIAIJCRL"
1287087cfbeSBarry Smith PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
1295fa1c062SBarry Smith {
1305fa1c062SBarry Smith   PetscErrorCode ierr;
1315fa1c062SBarry Smith   Mat            B = *newmat;
1325a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
1335fa1c062SBarry Smith 
1345fa1c062SBarry Smith   PetscFunctionBegin;
1355fa1c062SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
1365fa1c062SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1375fa1c062SBarry Smith   }
1385fa1c062SBarry Smith 
1395a11e1b2SBarry Smith   ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr);
1405a11e1b2SBarry Smith   B->spptr = (void *) aijcrl;
1415fa1c062SBarry Smith 
14217667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1435a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1445a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_MPIAIJCRL;
1455a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_MPIAIJCRL;
1465a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
1475fa1c062SBarry Smith 
1485fa1c062SBarry Smith   /* If A has already been assembled, compute the permutation. */
1494eeb8337SBarry Smith   if (A->assembled) {
1505a11e1b2SBarry Smith     ierr = MPIAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
1515fa1c062SBarry Smith   }
1525a11e1b2SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCRL);CHKERRQ(ierr);
1535fa1c062SBarry Smith   *newmat = B;
1545fa1c062SBarry Smith   PetscFunctionReturn(0);
1555fa1c062SBarry Smith }
1565fa1c062SBarry Smith EXTERN_C_END
1575fa1c062SBarry Smith 
1585fa1c062SBarry Smith 
1595fa1c062SBarry Smith #undef __FUNCT__
1605a11e1b2SBarry Smith #define __FUNCT__ "MatCreateMPIAIJCRL"
1615fa1c062SBarry Smith /*@C
1625a11e1b2SBarry Smith    MatCreateMPIAIJCRL - Creates a sparse matrix of type MPIAIJCRL.
1635fa1c062SBarry Smith    This type inherits from AIJ, but stores some additional
1645fa1c062SBarry Smith    information that is used to allow better vectorization of
1655fa1c062SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
1665fa1c062SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
1675fa1c062SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
1685fa1c062SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
1695fa1c062SBarry Smith    important to preallocate matrix storage in order to get good assembly
1705fa1c062SBarry Smith    performance.
1715fa1c062SBarry Smith 
1725fa1c062SBarry Smith    Collective on MPI_Comm
1735fa1c062SBarry Smith 
1745fa1c062SBarry Smith    Input Parameters:
1755fa1c062SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
1765fa1c062SBarry Smith .  m - number of rows
1775fa1c062SBarry Smith .  n - number of columns
1785fa1c062SBarry Smith .  nz - number of nonzeros per row (same for all rows)
1795fa1c062SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
1805fa1c062SBarry Smith          (possibly different for each row) or PETSC_NULL
1815fa1c062SBarry Smith 
1825fa1c062SBarry Smith    Output Parameter:
1835fa1c062SBarry Smith .  A - the matrix
1845fa1c062SBarry Smith 
1855fa1c062SBarry Smith    Notes:
1865fa1c062SBarry Smith    If nnz is given then nz is ignored
1875fa1c062SBarry Smith 
1885fa1c062SBarry Smith    Level: intermediate
1895fa1c062SBarry Smith 
1905fa1c062SBarry Smith .keywords: matrix, cray, sparse, parallel
1915fa1c062SBarry Smith 
1925a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
1935fa1c062SBarry Smith @*/
1947087cfbeSBarry Smith PetscErrorCode  MatCreateMPIAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],PetscInt onz,const PetscInt onnz[],Mat *A)
1955fa1c062SBarry Smith {
1965fa1c062SBarry Smith   PetscErrorCode ierr;
1975fa1c062SBarry Smith 
1985fa1c062SBarry Smith   PetscFunctionBegin;
1995fa1c062SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2005fa1c062SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2015a11e1b2SBarry Smith   ierr = MatSetType(*A,MATMPIAIJCRL);CHKERRQ(ierr);
2021472f72bSBarry Smith   ierr = MatMPIAIJSetPreallocation_MPIAIJ(*A,nz,(PetscInt*)nnz,onz,(PetscInt*)onnz);CHKERRQ(ierr);
2035fa1c062SBarry Smith   PetscFunctionReturn(0);
2045fa1c062SBarry Smith }
2055fa1c062SBarry Smith 
2065fa1c062SBarry Smith 
2075fa1c062SBarry Smith EXTERN_C_BEGIN
2085fa1c062SBarry Smith #undef __FUNCT__
2095a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_MPIAIJCRL"
2107087cfbeSBarry Smith PetscErrorCode  MatCreate_MPIAIJCRL(Mat A)
2115fa1c062SBarry Smith {
2125fa1c062SBarry Smith   PetscErrorCode ierr;
2135fa1c062SBarry Smith 
2145fa1c062SBarry Smith   PetscFunctionBegin;
2151472f72bSBarry Smith   ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
2165a11e1b2SBarry Smith   ierr = MatConvert_MPIAIJ_MPIAIJCRL(A,MATMPIAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
2175fa1c062SBarry Smith   PetscFunctionReturn(0);
2185fa1c062SBarry Smith }
2195fa1c062SBarry Smith EXTERN_C_END
2205fa1c062SBarry Smith 
221