1*81824310SBarry Smith #define PETSCMAT_DLL 2*81824310SBarry Smith 3*81824310SBarry Smith /* 4*81824310SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 5*81824310SBarry Smith This class is derived from the MATSEQAIJ class and retains the 6*81824310SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 7*81824310SBarry Smith it with a column oriented storage that is more efficient for 8*81824310SBarry Smith matrix vector products on Vector machines. 9*81824310SBarry Smith 10*81824310SBarry Smith CRL stands for constant row length (that is the same number of columns 11*81824310SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 12*81824310SBarry Smith */ 13*81824310SBarry Smith 14*81824310SBarry Smith #include "src/mat/impls/aij/seq/aij.h" 15*81824310SBarry Smith 16*81824310SBarry Smith typedef struct { 17*81824310SBarry Smith PetscInt ncols; /* number of columns in each row */ 18*81824310SBarry Smith PetscInt *icols; /* columns of nonzeros, stored one column at a time */ 19*81824310SBarry Smith PetscScalar *acols; /* values of nonzeros, stored as icols */ 20*81824310SBarry Smith 21*81824310SBarry Smith /* We need to keep a pointer to MatAssemblyEnd_SeqAIJ because we 22*81824310SBarry Smith * actually want to call this function from within the 23*81824310SBarry Smith * MatAssemblyEnd_SeqCRL function. Similarly, we also need 24*81824310SBarry Smith * MatDestroy_SeqAIJ and MatDuplicate_SeqAIJ. */ 25*81824310SBarry Smith PetscErrorCode (*AssemblyEnd_SeqAIJ)(Mat,MatAssemblyType); 26*81824310SBarry Smith PetscErrorCode (*MatDestroy_SeqAIJ)(Mat); 27*81824310SBarry Smith PetscErrorCode (*MatDuplicate_SeqAIJ)(Mat,MatDuplicateOption,Mat*); 28*81824310SBarry Smith } Mat_SeqCRL; 29*81824310SBarry Smith 30*81824310SBarry Smith #undef __FUNCT__ 31*81824310SBarry Smith #define __FUNCT__ "MatDestroy_SeqCRL" 32*81824310SBarry Smith PetscErrorCode MatDestroy_SeqCRL(Mat A) 33*81824310SBarry Smith { 34*81824310SBarry Smith PetscErrorCode ierr; 35*81824310SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL *) A->spptr; 36*81824310SBarry Smith 37*81824310SBarry Smith /* We are going to convert A back into a SEQAIJ matrix, since we are 38*81824310SBarry Smith * eventually going to use MatDestroy_SeqAIJ() to destroy everything 39*81824310SBarry Smith * that is not specific to CRL. 40*81824310SBarry Smith * In preparation for this, reset the operations pointers in A to 41*81824310SBarry Smith * their SeqAIJ versions. */ 42*81824310SBarry Smith A->ops->assemblyend = crl->AssemblyEnd_SeqAIJ; 43*81824310SBarry Smith A->ops->destroy = crl->MatDestroy_SeqAIJ; 44*81824310SBarry Smith A->ops->duplicate = crl->MatDuplicate_SeqAIJ; 45*81824310SBarry Smith 46*81824310SBarry Smith /* Free everything in the Mat_SeqCRL data structure. */ 47*81824310SBarry Smith if (crl->icols) { 48*81824310SBarry Smith ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr); 49*81824310SBarry Smith } 50*81824310SBarry Smith /* Free the Mat_SeqCRL struct itself. */ 51*81824310SBarry Smith ierr = PetscFree(crl);CHKERRQ(ierr); 52*81824310SBarry Smith 53*81824310SBarry Smith /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 54*81824310SBarry Smith * to destroy everything that remains. */ 55*81824310SBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 56*81824310SBarry Smith /* Note that I don't call MatSetType(). I believe this is because that 57*81824310SBarry Smith * is only to be called when *building* a matrix. */ 58*81824310SBarry Smith ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 59*81824310SBarry Smith PetscFunctionReturn(0); 60*81824310SBarry Smith } 61*81824310SBarry Smith 62*81824310SBarry Smith PetscErrorCode MatDuplicate_SeqCRL(Mat A, MatDuplicateOption op, Mat *M) 63*81824310SBarry Smith { 64*81824310SBarry Smith PetscErrorCode ierr; 65*81824310SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL *) A->spptr; 66*81824310SBarry Smith 67*81824310SBarry Smith PetscFunctionBegin; 68*81824310SBarry Smith ierr = (*crl->MatDuplicate_SeqAIJ)(A,op,M);CHKERRQ(ierr); 69*81824310SBarry Smith SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet"); 70*81824310SBarry Smith PetscFunctionReturn(0); 71*81824310SBarry Smith } 72*81824310SBarry Smith 73*81824310SBarry Smith #undef __FUNCT__ 74*81824310SBarry Smith #define __FUNCT__ "SeqCRL_create_crl" 75*81824310SBarry Smith PetscErrorCode SeqCRL_create_crl(Mat A) 76*81824310SBarry Smith { 77*81824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 78*81824310SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL*) A->spptr; 79*81824310SBarry Smith PetscInt m = A->m; /* Number of rows in the matrix. */ 80*81824310SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 81*81824310SBarry Smith PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 82*81824310SBarry Smith PetscScalar *aa = a->a,*acols; 83*81824310SBarry Smith PetscErrorCode ierr; 84*81824310SBarry Smith 85*81824310SBarry Smith PetscFunctionBegin; 86*81824310SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr); 87*81824310SBarry Smith acols = crl->acols; 88*81824310SBarry Smith icols = crl->icols; 89*81824310SBarry Smith for (i=0; i<m; i++) { 90*81824310SBarry Smith for (j=0; j<ilen[i]; j++) { 91*81824310SBarry Smith acols[j*m+i] = *aa++; 92*81824310SBarry Smith icols[j*m+i] = *aj++; 93*81824310SBarry Smith } 94*81824310SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 95*81824310SBarry Smith acols[j*m+i] = 0.0; 96*81824310SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 97*81824310SBarry Smith } 98*81824310SBarry Smith } 99*81824310SBarry Smith ierr = PetscLogInfo((A,"SeqCRL_create_crl: Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)a->nz)/((double)(rmax*m)))); 100*81824310SBarry Smith PetscFunctionReturn(0); 101*81824310SBarry Smith } 102*81824310SBarry Smith 103*81824310SBarry Smith #undef __FUNCT__ 104*81824310SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqCRL" 105*81824310SBarry Smith PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode) 106*81824310SBarry Smith { 107*81824310SBarry Smith PetscErrorCode ierr; 108*81824310SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL*) A->spptr; 109*81824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 110*81824310SBarry Smith 111*81824310SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 112*81824310SBarry Smith 113*81824310SBarry Smith /* Since a MATSEQCRL matrix is really just a MATSEQAIJ with some 114*81824310SBarry Smith * extra information, call the AssemblyEnd routine for a MATSEQAIJ. 115*81824310SBarry Smith * I'm not sure if this is the best way to do this, but it avoids 116*81824310SBarry Smith * a lot of code duplication. 117*81824310SBarry Smith * I also note that currently MATSEQCRL doesn't know anything about 118*81824310SBarry Smith * the Mat_CompressedRow data structure that SeqAIJ now uses when there 119*81824310SBarry Smith * are many zero rows. If the SeqAIJ assembly end routine decides to use 120*81824310SBarry Smith * this, this may break things. (Don't know... haven't looked at it.) */ 121*81824310SBarry Smith a->inode.use = PETSC_FALSE; 122*81824310SBarry Smith (*crl->AssemblyEnd_SeqAIJ)(A, mode); 123*81824310SBarry Smith 124*81824310SBarry Smith /* Now calculate the permutation and grouping information. */ 125*81824310SBarry Smith ierr = SeqCRL_create_crl(A);CHKERRQ(ierr); 126*81824310SBarry Smith PetscFunctionReturn(0); 127*81824310SBarry Smith } 128*81824310SBarry Smith 129*81824310SBarry Smith #undef __FUNCT__ 130*81824310SBarry Smith #define __FUNCT__ "MatMult_SeqCRL" 131*81824310SBarry Smith PetscErrorCode MatMult_SeqCRL(Mat A,Vec xx,Vec yy) 132*81824310SBarry Smith { 133*81824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 134*81824310SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL*) A->spptr; 135*81824310SBarry Smith PetscInt m = A->m; /* Number of rows in the matrix. */ 136*81824310SBarry Smith PetscInt rmax = a->rmax,*icols = crl->icols; 137*81824310SBarry Smith PetscScalar *acols = crl->acols; 138*81824310SBarry Smith PetscErrorCode ierr; 139*81824310SBarry Smith PetscScalar *x,*y; 140*81824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 141*81824310SBarry Smith PetscInt i,j,ii; 142*81824310SBarry Smith #endif 143*81824310SBarry Smith 144*81824310SBarry Smith 145*81824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 146*81824310SBarry Smith #pragma disjoint(*x,*y,*aa) 147*81824310SBarry Smith #endif 148*81824310SBarry Smith 149*81824310SBarry Smith PetscFunctionBegin; 150*81824310SBarry Smith ierr = VecZeroEntries(yy);CHKERRQ(ierr); 151*81824310SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 152*81824310SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 153*81824310SBarry Smith 154*81824310SBarry Smith 155*81824310SBarry Smith #define PETSC_USE_FORTRAN_KERNEL_MULTCRL 156*81824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 157*81824310SBarry Smith fortranmultcrl_(&m,&rmax,x,y,icols,acols); 158*81824310SBarry Smith #else 159*81824310SBarry Smith 160*81824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 161*81824310SBarry Smith #pragma _CRI preferstream 162*81824310SBarry Smith #endif 163*81824310SBarry Smith for (i=0; i<rmax; i++) { 164*81824310SBarry Smith ii = i*m; 165*81824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 166*81824310SBarry Smith #pragma _CRI prefervector 167*81824310SBarry Smith #endif 168*81824310SBarry Smith for (j=0; j<m; j++) { 169*81824310SBarry Smith y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 170*81824310SBarry Smith } 171*81824310SBarry Smith } 172*81824310SBarry Smith #if defined(PETSC_HAVE_CRAYC) 173*81824310SBarry Smith #pragma _CRI ivdep 174*81824310SBarry Smith #endif 175*81824310SBarry Smith 176*81824310SBarry Smith #endif 177*81824310SBarry Smith ierr = PetscLogFlops(2*a->nz - m);CHKERRQ(ierr); 178*81824310SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 179*81824310SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 180*81824310SBarry Smith PetscFunctionReturn(0); 181*81824310SBarry Smith } 182*81824310SBarry Smith 183*81824310SBarry Smith 184*81824310SBarry Smith /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a 185*81824310SBarry Smith * SeqCRL matrix. This routine is called by the MatCreate_SeqCRL() 186*81824310SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 187*81824310SBarry Smith * into a SeqCRL one. */ 188*81824310SBarry Smith EXTERN_C_BEGIN 189*81824310SBarry Smith #undef __FUNCT__ 190*81824310SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL" 191*81824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 192*81824310SBarry Smith { 193*81824310SBarry Smith /* This routine is only called to convert to MATSEQCRL 194*81824310SBarry Smith * from MATSEQAIJ, so we can ignore 'MatType Type'. */ 195*81824310SBarry Smith PetscErrorCode ierr; 196*81824310SBarry Smith Mat B = *newmat; 197*81824310SBarry Smith Mat_SeqCRL *crl; 198*81824310SBarry Smith 199*81824310SBarry Smith PetscFunctionBegin; 200*81824310SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 201*81824310SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 202*81824310SBarry Smith } 203*81824310SBarry Smith 204*81824310SBarry Smith ierr = PetscNew(Mat_SeqCRL,&crl);CHKERRQ(ierr); 205*81824310SBarry Smith B->spptr = (void *) crl; 206*81824310SBarry Smith 207*81824310SBarry Smith /* Save a pointer to the original SeqAIJ assembly end routine, because we 208*81824310SBarry Smith * will want to use it later in the CRL assembly end routine. 209*81824310SBarry Smith * Also, save a pointer to the original SeqAIJ Destroy routine, because we 210*81824310SBarry Smith * will want to use it in the CRL destroy routine. */ 211*81824310SBarry Smith crl->AssemblyEnd_SeqAIJ = A->ops->assemblyend; 212*81824310SBarry Smith crl->MatDestroy_SeqAIJ = A->ops->destroy; 213*81824310SBarry Smith crl->MatDuplicate_SeqAIJ = A->ops->duplicate; 214*81824310SBarry Smith 215*81824310SBarry Smith /* Set function pointers for methods that we inherit from AIJ but 216*81824310SBarry Smith * override. */ 217*81824310SBarry Smith B->ops->duplicate = MatDuplicate_SeqCRL; 218*81824310SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqCRL; 219*81824310SBarry Smith B->ops->destroy = MatDestroy_SeqCRL; 220*81824310SBarry Smith B->ops->mult = MatMult_SeqCRL; 221*81824310SBarry Smith 222*81824310SBarry Smith /* If A has already been assembled, compute the permutation. */ 223*81824310SBarry Smith if (A->assembled == PETSC_TRUE) { 224*81824310SBarry Smith ierr = SeqCRL_create_crl(B);CHKERRQ(ierr); 225*81824310SBarry Smith } 226*81824310SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr); 227*81824310SBarry Smith *newmat = B; 228*81824310SBarry Smith PetscFunctionReturn(0); 229*81824310SBarry Smith } 230*81824310SBarry Smith EXTERN_C_END 231*81824310SBarry Smith 232*81824310SBarry Smith 233*81824310SBarry Smith #undef __FUNCT__ 234*81824310SBarry Smith #define __FUNCT__ "MatCreateSeqCRL" 235*81824310SBarry Smith /*@C 236*81824310SBarry Smith MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL. 237*81824310SBarry Smith This type inherits from AIJ, but stores some additional 238*81824310SBarry Smith information that is used to allow better vectorization of 239*81824310SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 240*81824310SBarry Smith matrix can be copied to a format in which pieces of the matrix are 241*81824310SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 242*81824310SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 243*81824310SBarry Smith important to preallocate matrix storage in order to get good assembly 244*81824310SBarry Smith performance. 245*81824310SBarry Smith 246*81824310SBarry Smith Collective on MPI_Comm 247*81824310SBarry Smith 248*81824310SBarry Smith Input Parameters: 249*81824310SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 250*81824310SBarry Smith . m - number of rows 251*81824310SBarry Smith . n - number of columns 252*81824310SBarry Smith . nz - number of nonzeros per row (same for all rows) 253*81824310SBarry Smith - nnz - array containing the number of nonzeros in the various rows 254*81824310SBarry Smith (possibly different for each row) or PETSC_NULL 255*81824310SBarry Smith 256*81824310SBarry Smith Output Parameter: 257*81824310SBarry Smith . A - the matrix 258*81824310SBarry Smith 259*81824310SBarry Smith Notes: 260*81824310SBarry Smith If nnz is given then nz is ignored 261*81824310SBarry Smith 262*81824310SBarry Smith Level: intermediate 263*81824310SBarry Smith 264*81824310SBarry Smith .keywords: matrix, cray, sparse, parallel 265*81824310SBarry Smith 266*81824310SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues() 267*81824310SBarry Smith @*/ 268*81824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 269*81824310SBarry Smith { 270*81824310SBarry Smith PetscErrorCode ierr; 271*81824310SBarry Smith 272*81824310SBarry Smith PetscFunctionBegin; 273*81824310SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 274*81824310SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 275*81824310SBarry Smith ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr); 276*81824310SBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 277*81824310SBarry Smith PetscFunctionReturn(0); 278*81824310SBarry Smith } 279*81824310SBarry Smith 280*81824310SBarry Smith 281*81824310SBarry Smith EXTERN_C_BEGIN 282*81824310SBarry Smith #undef __FUNCT__ 283*81824310SBarry Smith #define __FUNCT__ "MatCreate_SeqCRL" 284*81824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A) 285*81824310SBarry Smith { 286*81824310SBarry Smith PetscErrorCode ierr; 287*81824310SBarry Smith 288*81824310SBarry Smith PetscFunctionBegin; 289*81824310SBarry Smith /* Change the type name before calling MatSetType() to force proper construction of SeqAIJ 290*81824310SBarry Smith and MATSEQCRL types. */ 291*81824310SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQCRL);CHKERRQ(ierr); 292*81824310SBarry Smith ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 293*81824310SBarry Smith ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 294*81824310SBarry Smith PetscFunctionReturn(0); 295*81824310SBarry Smith } 296*81824310SBarry Smith EXTERN_C_END 297*81824310SBarry Smith 298