xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision 4de1be5d4e8a7e05f57e7ce97d410e55b26b7781)
181824310SBarry Smith 
281824310SBarry Smith /*
381824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
481824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
581824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
681824310SBarry Smith   it with a column oriented storage that is more efficient for
781824310SBarry Smith   matrix vector products on Vector machines.
881824310SBarry Smith 
981824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1081824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1181824310SBarry Smith */
12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1381824310SBarry Smith 
1481824310SBarry Smith #undef __FUNCT__
155a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_SeqAIJCRL"
165a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
1781824310SBarry Smith {
1881824310SBarry Smith   PetscErrorCode ierr;
195a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL *) A->spptr;
2081824310SBarry Smith 
215a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
22bf0cc555SLisandro Dalcin   if (aijcrl) {
235a11e1b2SBarry Smith     ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
24bf0cc555SLisandro Dalcin   }
25c31cb41cSBarry Smith   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
2681824310SBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
274723c594SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
2881824310SBarry Smith   PetscFunctionReturn(0);
2981824310SBarry Smith }
3081824310SBarry Smith 
315a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
3281824310SBarry Smith {
3381824310SBarry Smith   PetscFunctionBegin;
345a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet");
3581824310SBarry Smith   PetscFunctionReturn(0);
3681824310SBarry Smith }
3781824310SBarry Smith 
3881824310SBarry Smith #undef __FUNCT__
395a11e1b2SBarry Smith #define __FUNCT__ "SeqAIJCRL_create_aijcrl"
405a11e1b2SBarry Smith PetscErrorCode SeqAIJCRL_create_aijcrl(Mat A)
4181824310SBarry Smith {
4281824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
435a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
44d0f46423SBarry Smith   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
4581824310SBarry Smith   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
4681824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
47dd6ea824SBarry Smith   MatScalar      *aa = a->a;
48dd6ea824SBarry Smith   PetscScalar    *acols;
4981824310SBarry Smith   PetscErrorCode ierr;
5081824310SBarry Smith 
5181824310SBarry Smith   PetscFunctionBegin;
525a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
535a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
545a11e1b2SBarry Smith   aijcrl->rmax = rmax;
555a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
565a11e1b2SBarry Smith   ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr);
575a11e1b2SBarry Smith   acols = aijcrl->acols;
585a11e1b2SBarry Smith   icols = aijcrl->icols;
5981824310SBarry Smith   for (i=0; i<m; i++) {
6081824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
6181824310SBarry Smith       acols[j*m+i] = *aa++;
6281824310SBarry Smith       icols[j*m+i] = *aj++;
6381824310SBarry Smith     }
6481824310SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
6581824310SBarry Smith       acols[j*m+i] = 0.0;
6681824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
6781824310SBarry Smith     }
6881824310SBarry Smith   }
69*4de1be5dSLisandro Dalcin   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);
7081824310SBarry Smith   PetscFunctionReturn(0);
7181824310SBarry Smith }
7281824310SBarry Smith 
734723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
744723c594SBarry Smith 
7581824310SBarry Smith #undef __FUNCT__
765a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqAIJCRL"
775a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
7881824310SBarry Smith {
7981824310SBarry Smith   PetscErrorCode ierr;
8081824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
8181824310SBarry Smith 
82c02b24c5SBarry Smith   PetscFunctionBegin;
8381824310SBarry Smith   a->inode.use = PETSC_FALSE;
844723c594SBarry Smith   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
854723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
8681824310SBarry Smith 
8781824310SBarry Smith   /* Now calculate the permutation and grouping information. */
885a11e1b2SBarry Smith   ierr = SeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
8981824310SBarry Smith   PetscFunctionReturn(0);
9081824310SBarry Smith }
9181824310SBarry Smith 
92c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
935aeacdfdSBarry Smith 
9481824310SBarry Smith #undef __FUNCT__
955a11e1b2SBarry Smith #define __FUNCT__ "MatMult_AIJCRL"
964723c594SBarry Smith /*
975a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
984723c594SBarry Smith     - the scatter is used only in the parallel version
994723c594SBarry Smith 
1004723c594SBarry Smith */
1015a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy)
10281824310SBarry Smith {
1035a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
1045a11e1b2SBarry Smith   PetscInt       m = aijcrl->m;  /* Number of rows in the matrix. */
1055a11e1b2SBarry Smith   PetscInt       rmax = aijcrl->rmax,*icols = aijcrl->icols;
1065a11e1b2SBarry Smith   PetscScalar    *acols = aijcrl->acols;
10781824310SBarry Smith   PetscErrorCode ierr;
10881824310SBarry Smith   PetscScalar    *x,*y;
10981824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
11081824310SBarry Smith   PetscInt       i,j,ii;
11181824310SBarry Smith #endif
11281824310SBarry Smith 
11381824310SBarry Smith 
11481824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
11581824310SBarry Smith #pragma disjoint(*x,*y,*aa)
11681824310SBarry Smith #endif
11781824310SBarry Smith 
11881824310SBarry Smith   PetscFunctionBegin;
1195a11e1b2SBarry Smith   if (aijcrl->xscat) {
1205a11e1b2SBarry Smith     ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr);
121c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1225a11e1b2SBarry Smith     ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235a11e1b2SBarry Smith     ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245a11e1b2SBarry Smith     xx = aijcrl->xwork;
125c02b24c5SBarry Smith   };
126c02b24c5SBarry Smith 
12781824310SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12881824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
12981824310SBarry Smith 
13081824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
13181824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
13281824310SBarry Smith #else
13381824310SBarry Smith 
1344be65460SBarry Smith   /* first column */
1354be65460SBarry Smith   for (j=0; j<m; j++) {
1364be65460SBarry Smith     y[j] = acols[j]*x[icols[j]];
1374be65460SBarry Smith   }
1384be65460SBarry Smith 
1394be65460SBarry Smith   /* other columns */
140b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14181824310SBarry Smith #pragma _CRI preferstream
14281824310SBarry Smith #endif
1434be65460SBarry Smith   for (i=1; i<rmax; i++) {
14481824310SBarry Smith     ii = i*m;
145b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14681824310SBarry Smith #pragma _CRI prefervector
14781824310SBarry Smith #endif
14881824310SBarry Smith     for (j=0; j<m; j++) {
14981824310SBarry Smith       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
15081824310SBarry Smith     }
15181824310SBarry Smith   }
152b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
15381824310SBarry Smith #pragma _CRI ivdep
15481824310SBarry Smith #endif
15581824310SBarry Smith 
15681824310SBarry Smith #endif
1575a11e1b2SBarry Smith   ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr);
15881824310SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15981824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
16081824310SBarry Smith   PetscFunctionReturn(0);
16181824310SBarry Smith }
16281824310SBarry Smith 
16381824310SBarry Smith 
1645a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1655a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
16681824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1675a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
16881824310SBarry Smith EXTERN_C_BEGIN
16981824310SBarry Smith #undef __FUNCT__
1705a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJCRL"
1717087cfbeSBarry Smith PetscErrorCode  MatConvert_SeqAIJ_SeqAIJCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
17281824310SBarry Smith {
17381824310SBarry Smith   PetscErrorCode ierr;
17481824310SBarry Smith   Mat            B = *newmat;
1755a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
17681824310SBarry Smith 
17781824310SBarry Smith   PetscFunctionBegin;
17881824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
17981824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
18081824310SBarry Smith   }
18181824310SBarry Smith 
1825a11e1b2SBarry Smith   ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr);
1835a11e1b2SBarry Smith   B->spptr = (void *) aijcrl;
18481824310SBarry Smith 
18517667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1865a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1875a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1885a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1895a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
19081824310SBarry Smith 
19181824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1924eeb8337SBarry Smith   if (A->assembled) {
1935a11e1b2SBarry Smith     ierr = SeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
19481824310SBarry Smith   }
1955a11e1b2SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr);
19681824310SBarry Smith   *newmat = B;
19781824310SBarry Smith   PetscFunctionReturn(0);
19881824310SBarry Smith }
19981824310SBarry Smith EXTERN_C_END
20081824310SBarry Smith 
20181824310SBarry Smith 
20281824310SBarry Smith #undef __FUNCT__
2035a11e1b2SBarry Smith #define __FUNCT__ "MatCreateSeqAIJCRL"
20481824310SBarry Smith /*@C
2055a11e1b2SBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
20681824310SBarry Smith    This type inherits from AIJ, but stores some additional
20781824310SBarry Smith    information that is used to allow better vectorization of
20881824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
20981824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
21081824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
21181824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
21281824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
21381824310SBarry Smith    performance.
21481824310SBarry Smith 
21581824310SBarry Smith    Collective on MPI_Comm
21681824310SBarry Smith 
21781824310SBarry Smith    Input Parameters:
21881824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
21981824310SBarry Smith .  m - number of rows
22081824310SBarry Smith .  n - number of columns
22181824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
22281824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
22381824310SBarry Smith          (possibly different for each row) or PETSC_NULL
22481824310SBarry Smith 
22581824310SBarry Smith    Output Parameter:
22681824310SBarry Smith .  A - the matrix
22781824310SBarry Smith 
22881824310SBarry Smith    Notes:
22981824310SBarry Smith    If nnz is given then nz is ignored
23081824310SBarry Smith 
23181824310SBarry Smith    Level: intermediate
23281824310SBarry Smith 
23381824310SBarry Smith .keywords: matrix, cray, sparse, parallel
23481824310SBarry Smith 
2355a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
23681824310SBarry Smith @*/
2377087cfbeSBarry Smith PetscErrorCode  MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
23881824310SBarry Smith {
23981824310SBarry Smith   PetscErrorCode ierr;
24081824310SBarry Smith 
24181824310SBarry Smith   PetscFunctionBegin;
24281824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
24381824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2445a11e1b2SBarry Smith   ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr);
245d28bb7d2SJed Brown   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
24681824310SBarry Smith   PetscFunctionReturn(0);
24781824310SBarry Smith }
24881824310SBarry Smith 
24981824310SBarry Smith 
25081824310SBarry Smith EXTERN_C_BEGIN
25181824310SBarry Smith #undef __FUNCT__
2525a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_SeqAIJCRL"
2537087cfbeSBarry Smith PetscErrorCode  MatCreate_SeqAIJCRL(Mat A)
25481824310SBarry Smith {
25581824310SBarry Smith   PetscErrorCode ierr;
25681824310SBarry Smith 
25781824310SBarry Smith   PetscFunctionBegin;
25881824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
2595a11e1b2SBarry Smith   ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
26081824310SBarry Smith   PetscFunctionReturn(0);
26181824310SBarry Smith }
26281824310SBarry Smith EXTERN_C_END
26381824310SBarry Smith 
264