1*5fa1c062SBarry Smith #define PETSCMAT_DLL 2*5fa1c062SBarry Smith 3*5fa1c062SBarry Smith /* 4*5fa1c062SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 5*5fa1c062SBarry Smith This class is derived from the MATSEQAIJ class and retains the 6*5fa1c062SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 7*5fa1c062SBarry Smith it with a column oriented storage that is more efficient for 8*5fa1c062SBarry Smith matrix vector products on Vector machines. 9*5fa1c062SBarry Smith 10*5fa1c062SBarry Smith CRL stands for constant row length (that is the same number of columns 11*5fa1c062SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 12*5fa1c062SBarry Smith */ 13*5fa1c062SBarry Smith 14*5fa1c062SBarry Smith #include "src/mat/impls/aij/seq/aij.h" 15*5fa1c062SBarry Smith 16*5fa1c062SBarry Smith typedef struct { 17*5fa1c062SBarry Smith PetscInt ncols; /* number of columns in each row */ 18*5fa1c062SBarry Smith PetscInt *icols; /* columns of nonzeros, stored one column at a time */ 19*5fa1c062SBarry Smith PetscScalar *acols; /* values of nonzeros, stored as icols */ 20*5fa1c062SBarry Smith 21*5fa1c062SBarry Smith /* We need to keep a pointer to MatAssemblyEnd_SeqAIJ because we 22*5fa1c062SBarry Smith * actually want to call this function from within the 23*5fa1c062SBarry Smith * MatAssemblyEnd_SeqCRL function. Similarly, we also need 24*5fa1c062SBarry Smith * MatDestroy_SeqAIJ and MatDuplicate_SeqAIJ. */ 25*5fa1c062SBarry Smith PetscErrorCode (*AssemblyEnd_SeqAIJ)(Mat,MatAssemblyType); 26*5fa1c062SBarry Smith PetscErrorCode (*MatDestroy_SeqAIJ)(Mat); 27*5fa1c062SBarry Smith PetscErrorCode (*MatDuplicate_SeqAIJ)(Mat,MatDuplicateOption,Mat*); 28*5fa1c062SBarry Smith } Mat_SeqCRL; 29*5fa1c062SBarry Smith 30*5fa1c062SBarry Smith #undef __FUNCT__ 31*5fa1c062SBarry Smith #define __FUNCT__ "MatDestroy_SeqCRL" 32*5fa1c062SBarry Smith PetscErrorCode MatDestroy_SeqCRL(Mat A) 33*5fa1c062SBarry Smith { 34*5fa1c062SBarry Smith PetscErrorCode ierr; 35*5fa1c062SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL *) A->spptr; 36*5fa1c062SBarry Smith 37*5fa1c062SBarry Smith /* We are going to convert A back into a SEQAIJ matrix, since we are 38*5fa1c062SBarry Smith * eventually going to use MatDestroy_SeqAIJ() to destroy everything 39*5fa1c062SBarry Smith * that is not specific to CRL. 40*5fa1c062SBarry Smith * In preparation for this, reset the operations pointers in A to 41*5fa1c062SBarry Smith * their SeqAIJ versions. */ 42*5fa1c062SBarry Smith A->ops->assemblyend = crl->AssemblyEnd_SeqAIJ; 43*5fa1c062SBarry Smith A->ops->destroy = crl->MatDestroy_SeqAIJ; 44*5fa1c062SBarry Smith A->ops->duplicate = crl->MatDuplicate_SeqAIJ; 45*5fa1c062SBarry Smith 46*5fa1c062SBarry Smith /* Free everything in the Mat_SeqCRL data structure. */ 47*5fa1c062SBarry Smith if (crl->icols) { 48*5fa1c062SBarry Smith ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr); 49*5fa1c062SBarry Smith } 50*5fa1c062SBarry Smith /* Free the Mat_SeqCRL struct itself. */ 51*5fa1c062SBarry Smith ierr = PetscFree(crl);CHKERRQ(ierr); 52*5fa1c062SBarry Smith 53*5fa1c062SBarry Smith /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 54*5fa1c062SBarry Smith * to destroy everything that remains. */ 55*5fa1c062SBarry Smith ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 56*5fa1c062SBarry Smith /* Note that I don't call MatSetType(). I believe this is because that 57*5fa1c062SBarry Smith * is only to be called when *building* a matrix. */ 58*5fa1c062SBarry Smith ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 59*5fa1c062SBarry Smith PetscFunctionReturn(0); 60*5fa1c062SBarry Smith } 61*5fa1c062SBarry Smith 62*5fa1c062SBarry Smith PetscErrorCode MatDuplicate_SeqCRL(Mat A, MatDuplicateOption op, Mat *M) 63*5fa1c062SBarry Smith { 64*5fa1c062SBarry Smith PetscErrorCode ierr; 65*5fa1c062SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL *) A->spptr; 66*5fa1c062SBarry Smith 67*5fa1c062SBarry Smith PetscFunctionBegin; 68*5fa1c062SBarry Smith ierr = (*crl->MatDuplicate_SeqAIJ)(A,op,M);CHKERRQ(ierr); 69*5fa1c062SBarry Smith SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet"); 70*5fa1c062SBarry Smith PetscFunctionReturn(0); 71*5fa1c062SBarry Smith } 72*5fa1c062SBarry Smith 73*5fa1c062SBarry Smith #undef __FUNCT__ 74*5fa1c062SBarry Smith #define __FUNCT__ "SeqCRL_create_crl" 75*5fa1c062SBarry Smith PetscErrorCode SeqCRL_create_crl(Mat A) 76*5fa1c062SBarry Smith { 77*5fa1c062SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 78*5fa1c062SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL*) A->spptr; 79*5fa1c062SBarry Smith PetscInt m = A->m; /* Number of rows in the matrix. */ 80*5fa1c062SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 81*5fa1c062SBarry Smith PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen; 82*5fa1c062SBarry Smith PetscScalar *aa = a->a,*acols; 83*5fa1c062SBarry Smith PetscErrorCode ierr; 84*5fa1c062SBarry Smith 85*5fa1c062SBarry Smith PetscFunctionBegin; 86*5fa1c062SBarry Smith ierr = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr); 87*5fa1c062SBarry Smith acols = crl->acols; 88*5fa1c062SBarry Smith icols = crl->icols; 89*5fa1c062SBarry Smith for (i=0; i<m; i++) { 90*5fa1c062SBarry Smith for (j=0; j<ilen[i]; j++) { 91*5fa1c062SBarry Smith acols[j*m+i] = *aa++; 92*5fa1c062SBarry Smith icols[j*m+i] = *aj++; 93*5fa1c062SBarry Smith } 94*5fa1c062SBarry Smith for (;j<rmax; j++) { /* empty column entries */ 95*5fa1c062SBarry Smith acols[j*m+i] = 0.0; 96*5fa1c062SBarry Smith icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */ 97*5fa1c062SBarry Smith } 98*5fa1c062SBarry Smith } 99*5fa1c062SBarry Smith ierr = PetscLogInfo((A,"SeqCRL_create_crl: Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)a->nz)/((double)(rmax*m)))); 100*5fa1c062SBarry Smith PetscFunctionReturn(0); 101*5fa1c062SBarry Smith } 102*5fa1c062SBarry Smith 103*5fa1c062SBarry Smith #undef __FUNCT__ 104*5fa1c062SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqCRL" 105*5fa1c062SBarry Smith PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode) 106*5fa1c062SBarry Smith { 107*5fa1c062SBarry Smith PetscErrorCode ierr; 108*5fa1c062SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL*) A->spptr; 109*5fa1c062SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 110*5fa1c062SBarry Smith 111*5fa1c062SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 112*5fa1c062SBarry Smith 113*5fa1c062SBarry Smith /* Since a MATSEQCRL matrix is really just a MATSEQAIJ with some 114*5fa1c062SBarry Smith * extra information, call the AssemblyEnd routine for a MATSEQAIJ. 115*5fa1c062SBarry Smith * I'm not sure if this is the best way to do this, but it avoids 116*5fa1c062SBarry Smith * a lot of code duplication. 117*5fa1c062SBarry Smith * I also note that currently MATSEQCRL doesn't know anything about 118*5fa1c062SBarry Smith * the Mat_CompressedRow data structure that SeqAIJ now uses when there 119*5fa1c062SBarry Smith * are many zero rows. If the SeqAIJ assembly end routine decides to use 120*5fa1c062SBarry Smith * this, this may break things. (Don't know... haven't looked at it.) */ 121*5fa1c062SBarry Smith a->inode.use = PETSC_FALSE; 122*5fa1c062SBarry Smith (*crl->AssemblyEnd_SeqAIJ)(A, mode); 123*5fa1c062SBarry Smith 124*5fa1c062SBarry Smith /* Now calculate the permutation and grouping information. */ 125*5fa1c062SBarry Smith ierr = SeqCRL_create_crl(A);CHKERRQ(ierr); 126*5fa1c062SBarry Smith PetscFunctionReturn(0); 127*5fa1c062SBarry Smith } 128*5fa1c062SBarry Smith 129*5fa1c062SBarry Smith #include "src/inline/dot.h" 130*5fa1c062SBarry Smith 131*5fa1c062SBarry Smith #undef __FUNCT__ 132*5fa1c062SBarry Smith #define __FUNCT__ "MatMult_SeqCRL" 133*5fa1c062SBarry Smith PetscErrorCode MatMult_SeqCRL(Mat A,Vec xx,Vec yy) 134*5fa1c062SBarry Smith { 135*5fa1c062SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 136*5fa1c062SBarry Smith Mat_SeqCRL *crl = (Mat_SeqCRL*) A->spptr; 137*5fa1c062SBarry Smith PetscInt m = A->m; /* Number of rows in the matrix. */ 138*5fa1c062SBarry Smith PetscInt rmax = a->rmax,*icols = crl->icols; 139*5fa1c062SBarry Smith PetscScalar *acols = crl->acols; 140*5fa1c062SBarry Smith PetscErrorCode ierr; 141*5fa1c062SBarry Smith PetscScalar *x,*y; 142*5fa1c062SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 143*5fa1c062SBarry Smith PetscInt i,j,ii; 144*5fa1c062SBarry Smith #endif 145*5fa1c062SBarry Smith 146*5fa1c062SBarry Smith 147*5fa1c062SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 148*5fa1c062SBarry Smith #pragma disjoint(*x,*y,*aa) 149*5fa1c062SBarry Smith #endif 150*5fa1c062SBarry Smith 151*5fa1c062SBarry Smith PetscFunctionBegin; 152*5fa1c062SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153*5fa1c062SBarry Smith ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 154*5fa1c062SBarry Smith 155*5fa1c062SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 156*5fa1c062SBarry Smith fortranmultcrl_(&m,&rmax,x,y,icols,acols); 157*5fa1c062SBarry Smith #else 158*5fa1c062SBarry Smith 159*5fa1c062SBarry Smith /* first column */ 160*5fa1c062SBarry Smith for (j=0; j<m; j++) { 161*5fa1c062SBarry Smith y[j] = acols[j]*x[icols[j]]; 162*5fa1c062SBarry Smith } 163*5fa1c062SBarry Smith 164*5fa1c062SBarry Smith /* other columns */ 165*5fa1c062SBarry Smith #if defined(PETSC_HAVE_CRAYC) 166*5fa1c062SBarry Smith #pragma _CRI preferstream 167*5fa1c062SBarry Smith #endif 168*5fa1c062SBarry Smith for (i=1; i<rmax; i++) { 169*5fa1c062SBarry Smith ii = i*m; 170*5fa1c062SBarry Smith #if defined(PETSC_HAVE_CRAYC) 171*5fa1c062SBarry Smith #pragma _CRI prefervector 172*5fa1c062SBarry Smith #endif 173*5fa1c062SBarry Smith for (j=0; j<m; j++) { 174*5fa1c062SBarry Smith y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; 175*5fa1c062SBarry Smith } 176*5fa1c062SBarry Smith } 177*5fa1c062SBarry Smith #if defined(PETSC_HAVE_CRAYC) 178*5fa1c062SBarry Smith #pragma _CRI ivdep 179*5fa1c062SBarry Smith #endif 180*5fa1c062SBarry Smith 181*5fa1c062SBarry Smith #endif 182*5fa1c062SBarry Smith ierr = PetscLogFlops(2*a->nz - m);CHKERRQ(ierr); 183*5fa1c062SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 184*5fa1c062SBarry Smith ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 185*5fa1c062SBarry Smith PetscFunctionReturn(0); 186*5fa1c062SBarry Smith } 187*5fa1c062SBarry Smith 188*5fa1c062SBarry Smith 189*5fa1c062SBarry Smith /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a 190*5fa1c062SBarry Smith * SeqCRL matrix. This routine is called by the MatCreate_SeqCRL() 191*5fa1c062SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 192*5fa1c062SBarry Smith * into a SeqCRL one. */ 193*5fa1c062SBarry Smith EXTERN_C_BEGIN 194*5fa1c062SBarry Smith #undef __FUNCT__ 195*5fa1c062SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL" 196*5fa1c062SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 197*5fa1c062SBarry Smith { 198*5fa1c062SBarry Smith /* This routine is only called to convert to MATSEQCRL 199*5fa1c062SBarry Smith * from MATSEQAIJ, so we can ignore 'MatType Type'. */ 200*5fa1c062SBarry Smith PetscErrorCode ierr; 201*5fa1c062SBarry Smith Mat B = *newmat; 202*5fa1c062SBarry Smith Mat_SeqCRL *crl; 203*5fa1c062SBarry Smith 204*5fa1c062SBarry Smith PetscFunctionBegin; 205*5fa1c062SBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 206*5fa1c062SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 207*5fa1c062SBarry Smith } 208*5fa1c062SBarry Smith 209*5fa1c062SBarry Smith ierr = PetscNew(Mat_SeqCRL,&crl);CHKERRQ(ierr); 210*5fa1c062SBarry Smith B->spptr = (void *) crl; 211*5fa1c062SBarry Smith 212*5fa1c062SBarry Smith /* Save a pointer to the original SeqAIJ assembly end routine, because we 213*5fa1c062SBarry Smith * will want to use it later in the CRL assembly end routine. 214*5fa1c062SBarry Smith * Also, save a pointer to the original SeqAIJ Destroy routine, because we 215*5fa1c062SBarry Smith * will want to use it in the CRL destroy routine. */ 216*5fa1c062SBarry Smith crl->AssemblyEnd_SeqAIJ = A->ops->assemblyend; 217*5fa1c062SBarry Smith crl->MatDestroy_SeqAIJ = A->ops->destroy; 218*5fa1c062SBarry Smith crl->MatDuplicate_SeqAIJ = A->ops->duplicate; 219*5fa1c062SBarry Smith 220*5fa1c062SBarry Smith /* Set function pointers for methods that we inherit from AIJ but 221*5fa1c062SBarry Smith * override. */ 222*5fa1c062SBarry Smith B->ops->duplicate = MatDuplicate_SeqCRL; 223*5fa1c062SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqCRL; 224*5fa1c062SBarry Smith B->ops->destroy = MatDestroy_SeqCRL; 225*5fa1c062SBarry Smith B->ops->mult = MatMult_SeqCRL; 226*5fa1c062SBarry Smith 227*5fa1c062SBarry Smith /* If A has already been assembled, compute the permutation. */ 228*5fa1c062SBarry Smith if (A->assembled == PETSC_TRUE) { 229*5fa1c062SBarry Smith ierr = SeqCRL_create_crl(B);CHKERRQ(ierr); 230*5fa1c062SBarry Smith } 231*5fa1c062SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr); 232*5fa1c062SBarry Smith *newmat = B; 233*5fa1c062SBarry Smith PetscFunctionReturn(0); 234*5fa1c062SBarry Smith } 235*5fa1c062SBarry Smith EXTERN_C_END 236*5fa1c062SBarry Smith 237*5fa1c062SBarry Smith 238*5fa1c062SBarry Smith #undef __FUNCT__ 239*5fa1c062SBarry Smith #define __FUNCT__ "MatCreateSeqCRL" 240*5fa1c062SBarry Smith /*@C 241*5fa1c062SBarry Smith MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL. 242*5fa1c062SBarry Smith This type inherits from AIJ, but stores some additional 243*5fa1c062SBarry Smith information that is used to allow better vectorization of 244*5fa1c062SBarry Smith the matrix-vector product. At the cost of increased storage, the AIJ formatted 245*5fa1c062SBarry Smith matrix can be copied to a format in which pieces of the matrix are 246*5fa1c062SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 247*5fa1c062SBarry Smith routine to use stride-1 memory accesses. As with the AIJ type, it is 248*5fa1c062SBarry Smith important to preallocate matrix storage in order to get good assembly 249*5fa1c062SBarry Smith performance. 250*5fa1c062SBarry Smith 251*5fa1c062SBarry Smith Collective on MPI_Comm 252*5fa1c062SBarry Smith 253*5fa1c062SBarry Smith Input Parameters: 254*5fa1c062SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 255*5fa1c062SBarry Smith . m - number of rows 256*5fa1c062SBarry Smith . n - number of columns 257*5fa1c062SBarry Smith . nz - number of nonzeros per row (same for all rows) 258*5fa1c062SBarry Smith - nnz - array containing the number of nonzeros in the various rows 259*5fa1c062SBarry Smith (possibly different for each row) or PETSC_NULL 260*5fa1c062SBarry Smith 261*5fa1c062SBarry Smith Output Parameter: 262*5fa1c062SBarry Smith . A - the matrix 263*5fa1c062SBarry Smith 264*5fa1c062SBarry Smith Notes: 265*5fa1c062SBarry Smith If nnz is given then nz is ignored 266*5fa1c062SBarry Smith 267*5fa1c062SBarry Smith Level: intermediate 268*5fa1c062SBarry Smith 269*5fa1c062SBarry Smith .keywords: matrix, cray, sparse, parallel 270*5fa1c062SBarry Smith 271*5fa1c062SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues() 272*5fa1c062SBarry Smith @*/ 273*5fa1c062SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 274*5fa1c062SBarry Smith { 275*5fa1c062SBarry Smith PetscErrorCode ierr; 276*5fa1c062SBarry Smith 277*5fa1c062SBarry Smith PetscFunctionBegin; 278*5fa1c062SBarry Smith ierr = MatCreate(comm,A);CHKERRQ(ierr); 279*5fa1c062SBarry Smith ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 280*5fa1c062SBarry Smith ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr); 281*5fa1c062SBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 282*5fa1c062SBarry Smith PetscFunctionReturn(0); 283*5fa1c062SBarry Smith } 284*5fa1c062SBarry Smith 285*5fa1c062SBarry Smith 286*5fa1c062SBarry Smith EXTERN_C_BEGIN 287*5fa1c062SBarry Smith #undef __FUNCT__ 288*5fa1c062SBarry Smith #define __FUNCT__ "MatCreate_SeqCRL" 289*5fa1c062SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A) 290*5fa1c062SBarry Smith { 291*5fa1c062SBarry Smith PetscErrorCode ierr; 292*5fa1c062SBarry Smith 293*5fa1c062SBarry Smith PetscFunctionBegin; 294*5fa1c062SBarry Smith /* Change the type name before calling MatSetType() to force proper construction of SeqAIJ 295*5fa1c062SBarry Smith and MATSEQCRL types. */ 296*5fa1c062SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQCRL);CHKERRQ(ierr); 297*5fa1c062SBarry Smith ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 298*5fa1c062SBarry Smith ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 299*5fa1c062SBarry Smith PetscFunctionReturn(0); 300*5fa1c062SBarry Smith } 301*5fa1c062SBarry Smith EXTERN_C_END 302*5fa1c062SBarry Smith 303