xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision b409243cf0294ec25802c2c4bc7303c5e78df9e5)
181824310SBarry Smith #define PETSCMAT_DLL
281824310SBarry Smith 
381824310SBarry Smith /*
481824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
581824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
681824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
781824310SBarry Smith   it with a column oriented storage that is more efficient for
881824310SBarry Smith   matrix vector products on Vector machines.
981824310SBarry Smith 
1081824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1181824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1281824310SBarry Smith */
137c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/crl/crl.h"
1481824310SBarry Smith 
1581824310SBarry Smith #undef __FUNCT__
165a11e1b2SBarry Smith #define __FUNCT__ "MatDestroy_SeqAIJCRL"
175a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
1881824310SBarry Smith {
1981824310SBarry Smith   PetscErrorCode ierr;
205a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL *) A->spptr;
2181824310SBarry Smith 
225a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
235a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
245a11e1b2SBarry Smith   ierr = PetscFree(aijcrl);CHKERRQ(ierr);
256397e4c7SLisandro Dalcin   A->spptr = 0;
2605b42c5fSBarry Smith 
2781824310SBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
284723c594SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
2981824310SBarry Smith   PetscFunctionReturn(0);
3081824310SBarry Smith }
3181824310SBarry Smith 
325a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
3381824310SBarry Smith {
3481824310SBarry Smith   PetscFunctionBegin;
355a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet");
3681824310SBarry Smith   PetscFunctionReturn(0);
3781824310SBarry Smith }
3881824310SBarry Smith 
3981824310SBarry Smith #undef __FUNCT__
405a11e1b2SBarry Smith #define __FUNCT__ "SeqAIJCRL_create_aijcrl"
415a11e1b2SBarry Smith PetscErrorCode SeqAIJCRL_create_aijcrl(Mat A)
4281824310SBarry Smith {
4381824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
445a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
45d0f46423SBarry Smith   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
4681824310SBarry Smith   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
4781824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
48dd6ea824SBarry Smith   MatScalar      *aa = a->a;
49dd6ea824SBarry Smith   PetscScalar    *acols;
5081824310SBarry Smith   PetscErrorCode ierr;
5181824310SBarry Smith 
5281824310SBarry Smith   PetscFunctionBegin;
535a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
545a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
555a11e1b2SBarry Smith   aijcrl->rmax = rmax;
565a11e1b2SBarry Smith   ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
575a11e1b2SBarry Smith   ierr = PetscMalloc2(rmax*m,PetscScalar,&aijcrl->acols,rmax*m,PetscInt,&aijcrl->icols);CHKERRQ(ierr);
585a11e1b2SBarry Smith   acols = aijcrl->acols;
595a11e1b2SBarry Smith   icols = aijcrl->icols;
6081824310SBarry Smith   for (i=0; i<m; i++) {
6181824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
6281824310SBarry Smith       acols[j*m+i] = *aa++;
6381824310SBarry Smith       icols[j*m+i] = *aj++;
6481824310SBarry Smith     }
6581824310SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
6681824310SBarry Smith       acols[j*m+i] = 0.0;
6781824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
6881824310SBarry Smith     }
6981824310SBarry Smith   }
70ae15b995SBarry Smith   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);
7181824310SBarry Smith   PetscFunctionReturn(0);
7281824310SBarry Smith }
7381824310SBarry Smith 
744723c594SBarry Smith extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
754723c594SBarry Smith 
7681824310SBarry Smith #undef __FUNCT__
775a11e1b2SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqAIJCRL"
785a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
7981824310SBarry Smith {
8081824310SBarry Smith   PetscErrorCode ierr;
8181824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
8281824310SBarry Smith 
83c02b24c5SBarry Smith   PetscFunctionBegin;
8481824310SBarry Smith   a->inode.use = PETSC_FALSE;
854723c594SBarry Smith   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
864723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
8781824310SBarry Smith 
8881824310SBarry Smith   /* Now calculate the permutation and grouping information. */
895a11e1b2SBarry Smith   ierr = SeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
9081824310SBarry Smith   PetscFunctionReturn(0);
9181824310SBarry Smith }
9281824310SBarry Smith 
93a4005a5dSBarry Smith #include "../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h"
945aeacdfdSBarry Smith 
9581824310SBarry Smith #undef __FUNCT__
965a11e1b2SBarry Smith #define __FUNCT__ "MatMult_AIJCRL"
974723c594SBarry Smith /*
985a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
994723c594SBarry Smith     - the scatter is used only in the parallel version
1004723c594SBarry Smith 
1014723c594SBarry Smith */
1025a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy)
10381824310SBarry Smith {
1045a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
1055a11e1b2SBarry Smith   PetscInt       m = aijcrl->m;  /* Number of rows in the matrix. */
1065a11e1b2SBarry Smith   PetscInt       rmax = aijcrl->rmax,*icols = aijcrl->icols;
1075a11e1b2SBarry Smith   PetscScalar    *acols = aijcrl->acols;
10881824310SBarry Smith   PetscErrorCode ierr;
10981824310SBarry Smith   PetscScalar    *x,*y;
11081824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
11181824310SBarry Smith   PetscInt       i,j,ii;
11281824310SBarry Smith #endif
11381824310SBarry Smith 
11481824310SBarry Smith 
11581824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
11681824310SBarry Smith #pragma disjoint(*x,*y,*aa)
11781824310SBarry Smith #endif
11881824310SBarry Smith 
11981824310SBarry Smith   PetscFunctionBegin;
1205a11e1b2SBarry Smith   if (aijcrl->xscat) {
1215a11e1b2SBarry Smith     ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr);
122c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1235a11e1b2SBarry Smith     ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245a11e1b2SBarry Smith     ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1255a11e1b2SBarry Smith     xx = aijcrl->xwork;
126c02b24c5SBarry Smith   };
127c02b24c5SBarry Smith 
12881824310SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12981824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
13081824310SBarry Smith 
13181824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
13281824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
13381824310SBarry Smith #else
13481824310SBarry Smith 
1354be65460SBarry Smith   /* first column */
1364be65460SBarry Smith   for (j=0; j<m; j++) {
1374be65460SBarry Smith     y[j] = acols[j]*x[icols[j]];
1384be65460SBarry Smith   }
1394be65460SBarry Smith 
1404be65460SBarry Smith   /* other columns */
141*b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14281824310SBarry Smith #pragma _CRI preferstream
14381824310SBarry Smith #endif
1444be65460SBarry Smith   for (i=1; i<rmax; i++) {
14581824310SBarry Smith     ii = i*m;
146*b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14781824310SBarry Smith #pragma _CRI prefervector
14881824310SBarry Smith #endif
14981824310SBarry Smith     for (j=0; j<m; j++) {
15081824310SBarry Smith       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
15181824310SBarry Smith     }
15281824310SBarry Smith   }
153*b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
15481824310SBarry Smith #pragma _CRI ivdep
15581824310SBarry Smith #endif
15681824310SBarry Smith 
15781824310SBarry Smith #endif
1585a11e1b2SBarry Smith   ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr);
15981824310SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
16081824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
16181824310SBarry Smith   PetscFunctionReturn(0);
16281824310SBarry Smith }
16381824310SBarry Smith 
16481824310SBarry Smith 
1655a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1665a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
16781824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1685a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
16981824310SBarry Smith EXTERN_C_BEGIN
17081824310SBarry Smith #undef __FUNCT__
1715a11e1b2SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJCRL"
1727087cfbeSBarry Smith PetscErrorCode  MatConvert_SeqAIJ_SeqAIJCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
17381824310SBarry Smith {
17481824310SBarry Smith   PetscErrorCode ierr;
17581824310SBarry Smith   Mat            B = *newmat;
1765a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
17781824310SBarry Smith 
17881824310SBarry Smith   PetscFunctionBegin;
17981824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
18081824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
18181824310SBarry Smith   }
18281824310SBarry Smith 
1835a11e1b2SBarry Smith   ierr = PetscNewLog(B,Mat_AIJCRL,&aijcrl);CHKERRQ(ierr);
1845a11e1b2SBarry Smith   B->spptr = (void *) aijcrl;
18581824310SBarry Smith 
18617667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1875a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1885a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1895a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1905a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
19181824310SBarry Smith 
19281824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1934eeb8337SBarry Smith   if (A->assembled) {
1945a11e1b2SBarry Smith     ierr = SeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
19581824310SBarry Smith   }
1965a11e1b2SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr);
19781824310SBarry Smith   *newmat = B;
19881824310SBarry Smith   PetscFunctionReturn(0);
19981824310SBarry Smith }
20081824310SBarry Smith EXTERN_C_END
20181824310SBarry Smith 
20281824310SBarry Smith 
20381824310SBarry Smith #undef __FUNCT__
2045a11e1b2SBarry Smith #define __FUNCT__ "MatCreateSeqAIJCRL"
20581824310SBarry Smith /*@C
2065a11e1b2SBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
20781824310SBarry Smith    This type inherits from AIJ, but stores some additional
20881824310SBarry Smith    information that is used to allow better vectorization of
20981824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
21081824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
21181824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
21281824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
21381824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
21481824310SBarry Smith    performance.
21581824310SBarry Smith 
21681824310SBarry Smith    Collective on MPI_Comm
21781824310SBarry Smith 
21881824310SBarry Smith    Input Parameters:
21981824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
22081824310SBarry Smith .  m - number of rows
22181824310SBarry Smith .  n - number of columns
22281824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
22381824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
22481824310SBarry Smith          (possibly different for each row) or PETSC_NULL
22581824310SBarry Smith 
22681824310SBarry Smith    Output Parameter:
22781824310SBarry Smith .  A - the matrix
22881824310SBarry Smith 
22981824310SBarry Smith    Notes:
23081824310SBarry Smith    If nnz is given then nz is ignored
23181824310SBarry Smith 
23281824310SBarry Smith    Level: intermediate
23381824310SBarry Smith 
23481824310SBarry Smith .keywords: matrix, cray, sparse, parallel
23581824310SBarry Smith 
2365a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
23781824310SBarry Smith @*/
2387087cfbeSBarry Smith PetscErrorCode  MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
23981824310SBarry Smith {
24081824310SBarry Smith   PetscErrorCode ierr;
24181824310SBarry Smith 
24281824310SBarry Smith   PetscFunctionBegin;
24381824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
24481824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2455a11e1b2SBarry Smith   ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr);
246d28bb7d2SJed Brown   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
24781824310SBarry Smith   PetscFunctionReturn(0);
24881824310SBarry Smith }
24981824310SBarry Smith 
25081824310SBarry Smith 
25181824310SBarry Smith EXTERN_C_BEGIN
25281824310SBarry Smith #undef __FUNCT__
2535a11e1b2SBarry Smith #define __FUNCT__ "MatCreate_SeqAIJCRL"
2547087cfbeSBarry Smith PetscErrorCode  MatCreate_SeqAIJCRL(Mat A)
25581824310SBarry Smith {
25681824310SBarry Smith   PetscErrorCode ierr;
25781824310SBarry Smith 
25881824310SBarry Smith   PetscFunctionBegin;
25981824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
2605a11e1b2SBarry Smith   ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
26181824310SBarry Smith   PetscFunctionReturn(0);
26281824310SBarry Smith }
26381824310SBarry Smith EXTERN_C_END
26481824310SBarry Smith 
265