xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision c6db04a5321582041def2b1e244c75985478b3ef)
181824310SBarry Smith 
281824310SBarry Smith /*
381824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
481824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
581824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
681824310SBarry Smith   it with a column oriented storage that is more efficient for
781824310SBarry Smith   matrix vector products on Vector machines.
881824310SBarry Smith 
981824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1081824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1181824310SBarry Smith */
12*c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1381824310SBarry Smith 
1481824310SBarry Smith #undef __FUNCT__
155a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_SeqAIJCRL"
165a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
1781824310SBarry Smith {
1881824310SBarry Smith   PetscErrorCode ierr;
195a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL *) A->spptr;
2081824310SBarry Smith 
215a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
225a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
23c31cb41cSBarry Smith   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
2405b42c5fSBarry Smith 
2581824310SBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
264723c594SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
2781824310SBarry Smith   PetscFunctionReturn(0);
2881824310SBarry Smith }
2981824310SBarry Smith 
305a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
3181824310SBarry Smith {
3281824310SBarry Smith   PetscFunctionBegin;
335a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet");
3481824310SBarry Smith   PetscFunctionReturn(0);
3581824310SBarry Smith }
3681824310SBarry Smith 
3781824310SBarry Smith #undef __FUNCT__
385a11e1b2SBarry Smith #define __FUNCT__ "SeqAIJCRL_create_aijcrl"
395a11e1b2SBarry Smith PetscErrorCode SeqAIJCRL_create_aijcrl(Mat A)
4081824310SBarry Smith {
4181824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
425a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
43d0f46423SBarry Smith   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
4481824310SBarry Smith   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
4581824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
46dd6ea824SBarry Smith   MatScalar      *aa = a->a;
47dd6ea824SBarry Smith   PetscScalar    *acols;
4881824310SBarry Smith   PetscErrorCode ierr;
4981824310SBarry Smith 
5081824310SBarry Smith   PetscFunctionBegin;
515a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
525a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
535a11e1b2SBarry Smith   aijcrl->rmax = rmax;
545a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
555a11e1b2SBarry Smith   ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr);
565a11e1b2SBarry Smith   acols = aijcrl->acols;
575a11e1b2SBarry Smith   icols = aijcrl->icols;
5881824310SBarry Smith   for (i=0; i<m; i++) {
5981824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
6081824310SBarry Smith       acols[j*m+i] = *aa++;
6181824310SBarry Smith       icols[j*m+i] = *aj++;
6281824310SBarry Smith     }
6381824310SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
6481824310SBarry Smith       acols[j*m+i] = 0.0;
6581824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
6681824310SBarry Smith     }
6781824310SBarry Smith   }
68ae15b995SBarry Smith   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);
6981824310SBarry Smith   PetscFunctionReturn(0);
7081824310SBarry Smith }
7181824310SBarry Smith 
724723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
734723c594SBarry Smith 
7481824310SBarry Smith #undef __FUNCT__
755a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqAIJCRL"
765a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
7781824310SBarry Smith {
7881824310SBarry Smith   PetscErrorCode ierr;
7981824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
8081824310SBarry Smith 
81c02b24c5SBarry Smith   PetscFunctionBegin;
8281824310SBarry Smith   a->inode.use = PETSC_FALSE;
834723c594SBarry Smith   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
844723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
8581824310SBarry Smith 
8681824310SBarry Smith   /* Now calculate the permutation and grouping information. */
875a11e1b2SBarry Smith   ierr = SeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
8881824310SBarry Smith   PetscFunctionReturn(0);
8981824310SBarry Smith }
9081824310SBarry Smith 
91*c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
925aeacdfdSBarry Smith 
9381824310SBarry Smith #undef __FUNCT__
945a11e1b2SBarry Smith #define __FUNCT__ "MatMult_AIJCRL"
954723c594SBarry Smith /*
965a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
974723c594SBarry Smith     - the scatter is used only in the parallel version
984723c594SBarry Smith 
994723c594SBarry Smith */
1005a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy)
10181824310SBarry Smith {
1025a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
1035a11e1b2SBarry Smith   PetscInt       m = aijcrl->m;  /* Number of rows in the matrix. */
1045a11e1b2SBarry Smith   PetscInt       rmax = aijcrl->rmax,*icols = aijcrl->icols;
1055a11e1b2SBarry Smith   PetscScalar    *acols = aijcrl->acols;
10681824310SBarry Smith   PetscErrorCode ierr;
10781824310SBarry Smith   PetscScalar    *x,*y;
10881824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
10981824310SBarry Smith   PetscInt       i,j,ii;
11081824310SBarry Smith #endif
11181824310SBarry Smith 
11281824310SBarry Smith 
11381824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
11481824310SBarry Smith #pragma disjoint(*x,*y,*aa)
11581824310SBarry Smith #endif
11681824310SBarry Smith 
11781824310SBarry Smith   PetscFunctionBegin;
1185a11e1b2SBarry Smith   if (aijcrl->xscat) {
1195a11e1b2SBarry Smith     ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr);
120c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1215a11e1b2SBarry Smith     ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225a11e1b2SBarry Smith     ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235a11e1b2SBarry Smith     xx = aijcrl->xwork;
124c02b24c5SBarry Smith   };
125c02b24c5SBarry Smith 
12681824310SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12781824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
12881824310SBarry Smith 
12981824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
13081824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
13181824310SBarry Smith #else
13281824310SBarry Smith 
1334be65460SBarry Smith   /* first column */
1344be65460SBarry Smith   for (j=0; j<m; j++) {
1354be65460SBarry Smith     y[j] = acols[j]*x[icols[j]];
1364be65460SBarry Smith   }
1374be65460SBarry Smith 
1384be65460SBarry Smith   /* other columns */
139b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14081824310SBarry Smith #pragma _CRI preferstream
14181824310SBarry Smith #endif
1424be65460SBarry Smith   for (i=1; i<rmax; i++) {
14381824310SBarry Smith     ii = i*m;
144b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14581824310SBarry Smith #pragma _CRI prefervector
14681824310SBarry Smith #endif
14781824310SBarry Smith     for (j=0; j<m; j++) {
14881824310SBarry Smith       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
14981824310SBarry Smith     }
15081824310SBarry Smith   }
151b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
15281824310SBarry Smith #pragma _CRI ivdep
15381824310SBarry Smith #endif
15481824310SBarry Smith 
15581824310SBarry Smith #endif
1565a11e1b2SBarry Smith   ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr);
15781824310SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15881824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15981824310SBarry Smith   PetscFunctionReturn(0);
16081824310SBarry Smith }
16181824310SBarry Smith 
16281824310SBarry Smith 
1635a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1645a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
16581824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1665a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
16781824310SBarry Smith EXTERN_C_BEGIN
16881824310SBarry Smith #undef __FUNCT__
1695a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJCRL"
1707087cfbeSBarry Smith PetscErrorCode  MatConvert_SeqAIJ_SeqAIJCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
17181824310SBarry Smith {
17281824310SBarry Smith   PetscErrorCode ierr;
17381824310SBarry Smith   Mat            B = *newmat;
1745a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
17581824310SBarry Smith 
17681824310SBarry Smith   PetscFunctionBegin;
17781824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
17881824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
17981824310SBarry Smith   }
18081824310SBarry Smith 
1815a11e1b2SBarry Smith   ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr);
1825a11e1b2SBarry Smith   B->spptr = (void *) aijcrl;
18381824310SBarry Smith 
18417667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1855a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1865a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1875a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1885a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
18981824310SBarry Smith 
19081824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1914eeb8337SBarry Smith   if (A->assembled) {
1925a11e1b2SBarry Smith     ierr = SeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
19381824310SBarry Smith   }
1945a11e1b2SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr);
19581824310SBarry Smith   *newmat = B;
19681824310SBarry Smith   PetscFunctionReturn(0);
19781824310SBarry Smith }
19881824310SBarry Smith EXTERN_C_END
19981824310SBarry Smith 
20081824310SBarry Smith 
20181824310SBarry Smith #undef __FUNCT__
2025a11e1b2SBarry Smith #define __FUNCT__ "MatCreateSeqAIJCRL"
20381824310SBarry Smith /*@C
2045a11e1b2SBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
20581824310SBarry Smith    This type inherits from AIJ, but stores some additional
20681824310SBarry Smith    information that is used to allow better vectorization of
20781824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
20881824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
20981824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
21081824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
21181824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
21281824310SBarry Smith    performance.
21381824310SBarry Smith 
21481824310SBarry Smith    Collective on MPI_Comm
21581824310SBarry Smith 
21681824310SBarry Smith    Input Parameters:
21781824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
21881824310SBarry Smith .  m - number of rows
21981824310SBarry Smith .  n - number of columns
22081824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
22181824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
22281824310SBarry Smith          (possibly different for each row) or PETSC_NULL
22381824310SBarry Smith 
22481824310SBarry Smith    Output Parameter:
22581824310SBarry Smith .  A - the matrix
22681824310SBarry Smith 
22781824310SBarry Smith    Notes:
22881824310SBarry Smith    If nnz is given then nz is ignored
22981824310SBarry Smith 
23081824310SBarry Smith    Level: intermediate
23181824310SBarry Smith 
23281824310SBarry Smith .keywords: matrix, cray, sparse, parallel
23381824310SBarry Smith 
2345a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
23581824310SBarry Smith @*/
2367087cfbeSBarry Smith PetscErrorCode  MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
23781824310SBarry Smith {
23881824310SBarry Smith   PetscErrorCode ierr;
23981824310SBarry Smith 
24081824310SBarry Smith   PetscFunctionBegin;
24181824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
24281824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2435a11e1b2SBarry Smith   ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr);
244d28bb7d2SJed Brown   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
24581824310SBarry Smith   PetscFunctionReturn(0);
24681824310SBarry Smith }
24781824310SBarry Smith 
24881824310SBarry Smith 
24981824310SBarry Smith EXTERN_C_BEGIN
25081824310SBarry Smith #undef __FUNCT__
2515a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_SeqAIJCRL"
2527087cfbeSBarry Smith PetscErrorCode  MatCreate_SeqAIJCRL(Mat A)
25381824310SBarry Smith {
25481824310SBarry Smith   PetscErrorCode ierr;
25581824310SBarry Smith 
25681824310SBarry Smith   PetscFunctionBegin;
25781824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
2585a11e1b2SBarry Smith   ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
25981824310SBarry Smith   PetscFunctionReturn(0);
26081824310SBarry Smith }
26181824310SBarry Smith EXTERN_C_END
26281824310SBarry Smith 
263