1*938d9b04SRichard Tran Mills 2*938d9b04SRichard Tran Mills /* 3*938d9b04SRichard Tran Mills Defines basic operations for the MATSEQAIJPERM matrix class. 4*938d9b04SRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 5*938d9b04SRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but augments 6*938d9b04SRichard Tran Mills it with some permutation information that enables some operations 7*938d9b04SRichard Tran Mills to be more vectorizable. A physically rearranged copy of the matrix 8*938d9b04SRichard Tran Mills may be stored if the user desires. 9*938d9b04SRichard Tran Mills 10*938d9b04SRichard Tran Mills Eventually a variety of permutations may be supported. 11*938d9b04SRichard Tran Mills */ 12*938d9b04SRichard Tran Mills 13*938d9b04SRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 14*938d9b04SRichard Tran Mills 15*938d9b04SRichard Tran Mills #define NDIM 512 16*938d9b04SRichard Tran Mills /* NDIM specifies how many rows at a time we should work with when 17*938d9b04SRichard Tran Mills * performing the vectorized mat-vec. This depends on various factors 18*938d9b04SRichard Tran Mills * such as vector register length, etc., and I really need to add a 19*938d9b04SRichard Tran Mills * way for the user (or the library) to tune this. I'm setting it to 20*938d9b04SRichard Tran Mills * 512 for now since that is what Ed D'Azevedo was using in his Fortran 21*938d9b04SRichard Tran Mills * routines. */ 22*938d9b04SRichard Tran Mills 23*938d9b04SRichard Tran Mills typedef struct { 24*938d9b04SRichard Tran Mills PetscObjectState nonzerostate; /* used to determine if the nonzero structure has changed and hence the permutations need updating */ 25*938d9b04SRichard Tran Mills 26*938d9b04SRichard Tran Mills PetscInt ngroup; 27*938d9b04SRichard Tran Mills PetscInt *xgroup; 28*938d9b04SRichard Tran Mills /* Denotes where groups of rows with same number of nonzeros 29*938d9b04SRichard Tran Mills * begin and end, i.e., xgroup[i] gives us the position in iperm[] 30*938d9b04SRichard Tran Mills * where the ith group begins. */ 31*938d9b04SRichard Tran Mills 32*938d9b04SRichard Tran Mills PetscInt *nzgroup; /* how many nonzeros each row that is a member of group i has. */ 33*938d9b04SRichard Tran Mills PetscInt *iperm; /* The permutation vector. */ 34*938d9b04SRichard Tran Mills 35*938d9b04SRichard Tran Mills /* Some of this stuff is for Ed's recursive triangular solve. 36*938d9b04SRichard Tran Mills * I'm not sure what I need yet. */ 37*938d9b04SRichard Tran Mills PetscInt blocksize; 38*938d9b04SRichard Tran Mills PetscInt nstep; 39*938d9b04SRichard Tran Mills PetscInt *jstart_list; 40*938d9b04SRichard Tran Mills PetscInt *jend_list; 41*938d9b04SRichard Tran Mills PetscInt *action_list; 42*938d9b04SRichard Tran Mills PetscInt *ngroup_list; 43*938d9b04SRichard Tran Mills PetscInt **ipointer_list; 44*938d9b04SRichard Tran Mills PetscInt **xgroup_list; 45*938d9b04SRichard Tran Mills PetscInt **nzgroup_list; 46*938d9b04SRichard Tran Mills PetscInt **iperm_list; 47*938d9b04SRichard Tran Mills } Mat_SeqAIJPERM; 48*938d9b04SRichard Tran Mills 49*938d9b04SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 50*938d9b04SRichard Tran Mills { 51*938d9b04SRichard Tran Mills /* This routine is only called to convert a MATAIJPERM to its base PETSc type, */ 52*938d9b04SRichard Tran Mills /* so we will ignore 'MatType type'. */ 53*938d9b04SRichard Tran Mills PetscErrorCode ierr; 54*938d9b04SRichard Tran Mills Mat B = *newmat; 55*938d9b04SRichard Tran Mills Mat_SeqAIJPERM *aijperm=(Mat_SeqAIJPERM*)A->spptr; 56*938d9b04SRichard Tran Mills 57*938d9b04SRichard Tran Mills PetscFunctionBegin; 58*938d9b04SRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 59*938d9b04SRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 60*938d9b04SRichard Tran Mills aijperm=(Mat_SeqAIJPERM*)B->spptr; 61*938d9b04SRichard Tran Mills } 62*938d9b04SRichard Tran Mills 63*938d9b04SRichard Tran Mills /* Reset the original function pointers. */ 64*938d9b04SRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 65*938d9b04SRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 66*938d9b04SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 67*938d9b04SRichard Tran Mills B->ops->mult = MatMult_SeqAIJ; 68*938d9b04SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJ; 69*938d9b04SRichard Tran Mills 70*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijperm_seqaij_C",NULL);CHKERRQ(ierr); 71*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijperm_C",NULL);CHKERRQ(ierr); 72*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijperm_C",NULL);CHKERRQ(ierr); 73*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijperm_C",NULL);CHKERRQ(ierr); 74*938d9b04SRichard Tran Mills 75*938d9b04SRichard Tran Mills /* Free everything in the Mat_SeqAIJPERM data structure.*/ 76*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->xgroup);CHKERRQ(ierr); 77*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->nzgroup);CHKERRQ(ierr); 78*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->iperm);CHKERRQ(ierr); 79*938d9b04SRichard Tran Mills ierr = PetscFree(B->spptr);CHKERRQ(ierr); 80*938d9b04SRichard Tran Mills 81*938d9b04SRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 82*938d9b04SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 83*938d9b04SRichard Tran Mills 84*938d9b04SRichard Tran Mills *newmat = B; 85*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 86*938d9b04SRichard Tran Mills } 87*938d9b04SRichard Tran Mills 88*938d9b04SRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJPERM(Mat A) 89*938d9b04SRichard Tran Mills { 90*938d9b04SRichard Tran Mills PetscErrorCode ierr; 91*938d9b04SRichard Tran Mills Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr; 92*938d9b04SRichard Tran Mills 93*938d9b04SRichard Tran Mills PetscFunctionBegin; 94*938d9b04SRichard Tran Mills if (aijperm) { 95*938d9b04SRichard Tran Mills /* If MatHeaderMerge() was used then this SeqAIJPERM matrix will not have a spprt. */ 96*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->xgroup);CHKERRQ(ierr); 97*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->nzgroup);CHKERRQ(ierr); 98*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->iperm);CHKERRQ(ierr); 99*938d9b04SRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100*938d9b04SRichard Tran Mills } 101*938d9b04SRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 102*938d9b04SRichard Tran Mills * to destroy everything that remains. */ 103*938d9b04SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 104*938d9b04SRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 105*938d9b04SRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 106*938d9b04SRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 107*938d9b04SRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 108*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 109*938d9b04SRichard Tran Mills } 110*938d9b04SRichard Tran Mills 111*938d9b04SRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJPERM(Mat A, MatDuplicateOption op, Mat *M) 112*938d9b04SRichard Tran Mills { 113*938d9b04SRichard Tran Mills PetscErrorCode ierr; 114*938d9b04SRichard Tran Mills Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr; 115*938d9b04SRichard Tran Mills Mat_SeqAIJPERM *aijperm_dest; 116*938d9b04SRichard Tran Mills PetscBool perm; 117*938d9b04SRichard Tran Mills 118*938d9b04SRichard Tran Mills PetscFunctionBegin; 119*938d9b04SRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 120*938d9b04SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)*M,MATSEQAIJPERM,&perm);CHKERRQ(ierr); 121*938d9b04SRichard Tran Mills if (perm) { 122*938d9b04SRichard Tran Mills aijperm_dest = (Mat_SeqAIJPERM *) (*M)->spptr; 123*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm_dest->xgroup);CHKERRQ(ierr); 124*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm_dest->nzgroup);CHKERRQ(ierr); 125*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm_dest->iperm);CHKERRQ(ierr); 126*938d9b04SRichard Tran Mills } else { 127*938d9b04SRichard Tran Mills ierr = PetscNewLog(*M,&aijperm_dest);CHKERRQ(ierr); 128*938d9b04SRichard Tran Mills (*M)->spptr = (void*) aijperm_dest; 129*938d9b04SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)*M,MATSEQAIJPERM);CHKERRQ(ierr); 130*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)*M,"MatConvert_seqaijperm_seqaij_C",MatConvert_SeqAIJPERM_SeqAIJ);CHKERRQ(ierr); 131*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)*M,"MatMatMult_seqdense_seqaijperm_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 132*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)*M,"MatMatMultSymbolic_seqdense_seqaijperm_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 133*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)*M,"MatMatMultNumeric_seqdense_seqaijperm_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 134*938d9b04SRichard Tran Mills } 135*938d9b04SRichard Tran Mills ierr = PetscMemcpy(aijperm_dest,aijperm,sizeof(Mat_SeqAIJPERM));CHKERRQ(ierr); 136*938d9b04SRichard Tran Mills /* Allocate space for, and copy the grouping and permutation info. 137*938d9b04SRichard Tran Mills * I note that when the groups are initially determined in 138*938d9b04SRichard Tran Mills * MatSeqAIJPERM_create_perm, xgroup and nzgroup may be sized larger than 139*938d9b04SRichard Tran Mills * necessary. But at this point, we know how large they need to be, and 140*938d9b04SRichard Tran Mills * allocate only the necessary amount of memory. So the duplicated matrix 141*938d9b04SRichard Tran Mills * may actually use slightly less storage than the original! */ 142*938d9b04SRichard Tran Mills ierr = PetscMalloc1(A->rmap->n, &aijperm_dest->iperm);CHKERRQ(ierr); 143*938d9b04SRichard Tran Mills ierr = PetscMalloc1(aijperm->ngroup+1, &aijperm_dest->xgroup);CHKERRQ(ierr); 144*938d9b04SRichard Tran Mills ierr = PetscMalloc1(aijperm->ngroup, &aijperm_dest->nzgroup);CHKERRQ(ierr); 145*938d9b04SRichard Tran Mills ierr = PetscMemcpy(aijperm_dest->iperm,aijperm->iperm,sizeof(PetscInt)*A->rmap->n);CHKERRQ(ierr); 146*938d9b04SRichard Tran Mills ierr = PetscMemcpy(aijperm_dest->xgroup,aijperm->xgroup,sizeof(PetscInt)*(aijperm->ngroup+1));CHKERRQ(ierr); 147*938d9b04SRichard Tran Mills ierr = PetscMemcpy(aijperm_dest->nzgroup,aijperm->nzgroup,sizeof(PetscInt)*aijperm->ngroup);CHKERRQ(ierr); 148*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 149*938d9b04SRichard Tran Mills } 150*938d9b04SRichard Tran Mills 151*938d9b04SRichard Tran Mills PetscErrorCode MatSeqAIJPERM_create_perm(Mat A) 152*938d9b04SRichard Tran Mills { 153*938d9b04SRichard Tran Mills PetscErrorCode ierr; 154*938d9b04SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)(A)->data; 155*938d9b04SRichard Tran Mills Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr; 156*938d9b04SRichard Tran Mills PetscInt m; /* Number of rows in the matrix. */ 157*938d9b04SRichard Tran Mills PetscInt *ia; /* From the CSR representation; points to the beginning of each row. */ 158*938d9b04SRichard Tran Mills PetscInt maxnz; /* Maximum number of nonzeros in any row. */ 159*938d9b04SRichard Tran Mills PetscInt *rows_in_bucket; 160*938d9b04SRichard Tran Mills /* To construct the permutation, we sort each row into one of maxnz 161*938d9b04SRichard Tran Mills * buckets based on how many nonzeros are in the row. */ 162*938d9b04SRichard Tran Mills PetscInt nz; 163*938d9b04SRichard Tran Mills PetscInt *nz_in_row; /* the number of nonzero elements in row k. */ 164*938d9b04SRichard Tran Mills PetscInt *ipnz; 165*938d9b04SRichard Tran Mills /* When constructing the iperm permutation vector, 166*938d9b04SRichard Tran Mills * ipnz[nz] is used to point to the next place in the permutation vector 167*938d9b04SRichard Tran Mills * that a row with nz nonzero elements should be placed.*/ 168*938d9b04SRichard Tran Mills PetscInt i, ngroup, istart, ipos; 169*938d9b04SRichard Tran Mills 170*938d9b04SRichard Tran Mills PetscFunctionBegin; 171*938d9b04SRichard Tran Mills if (aijperm->nonzerostate == A->nonzerostate) PetscFunctionReturn(0); /* permutation exists and matches current nonzero structure */ 172*938d9b04SRichard Tran Mills aijperm->nonzerostate = A->nonzerostate; 173*938d9b04SRichard Tran Mills /* Free anything previously put in the Mat_SeqAIJPERM data structure. */ 174*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->xgroup);CHKERRQ(ierr); 175*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->nzgroup);CHKERRQ(ierr); 176*938d9b04SRichard Tran Mills ierr = PetscFree(aijperm->iperm);CHKERRQ(ierr); 177*938d9b04SRichard Tran Mills 178*938d9b04SRichard Tran Mills m = A->rmap->n; 179*938d9b04SRichard Tran Mills ia = a->i; 180*938d9b04SRichard Tran Mills 181*938d9b04SRichard Tran Mills /* Allocate the arrays that will hold the permutation vector. */ 182*938d9b04SRichard Tran Mills ierr = PetscMalloc1(m, &aijperm->iperm);CHKERRQ(ierr); 183*938d9b04SRichard Tran Mills 184*938d9b04SRichard Tran Mills /* Allocate some temporary work arrays that will be used in 185*938d9b04SRichard Tran Mills * calculating the permuation vector and groupings. */ 186*938d9b04SRichard Tran Mills ierr = PetscMalloc1(m, &nz_in_row);CHKERRQ(ierr); 187*938d9b04SRichard Tran Mills 188*938d9b04SRichard Tran Mills /* Now actually figure out the permutation and grouping. */ 189*938d9b04SRichard Tran Mills 190*938d9b04SRichard Tran Mills /* First pass: Determine number of nonzeros in each row, maximum 191*938d9b04SRichard Tran Mills * number of nonzeros in any row, and how many rows fall into each 192*938d9b04SRichard Tran Mills * "bucket" of rows with same number of nonzeros. */ 193*938d9b04SRichard Tran Mills maxnz = 0; 194*938d9b04SRichard Tran Mills for (i=0; i<m; i++) { 195*938d9b04SRichard Tran Mills nz_in_row[i] = ia[i+1]-ia[i]; 196*938d9b04SRichard Tran Mills if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i]; 197*938d9b04SRichard Tran Mills } 198*938d9b04SRichard Tran Mills ierr = PetscMalloc1(PetscMax(maxnz,m)+1, &rows_in_bucket);CHKERRQ(ierr); 199*938d9b04SRichard Tran Mills ierr = PetscMalloc1(PetscMax(maxnz,m)+1, &ipnz);CHKERRQ(ierr); 200*938d9b04SRichard Tran Mills 201*938d9b04SRichard Tran Mills for (i=0; i<=maxnz; i++) { 202*938d9b04SRichard Tran Mills rows_in_bucket[i] = 0; 203*938d9b04SRichard Tran Mills } 204*938d9b04SRichard Tran Mills for (i=0; i<m; i++) { 205*938d9b04SRichard Tran Mills nz = nz_in_row[i]; 206*938d9b04SRichard Tran Mills rows_in_bucket[nz]++; 207*938d9b04SRichard Tran Mills } 208*938d9b04SRichard Tran Mills 209*938d9b04SRichard Tran Mills /* Allocate space for the grouping info. There will be at most (maxnz + 1) 210*938d9b04SRichard Tran Mills * groups. (It is maxnz + 1 instead of simply maxnz because there may be 211*938d9b04SRichard Tran Mills * rows with no nonzero elements.) If there are (maxnz + 1) groups, 212*938d9b04SRichard Tran Mills * then xgroup[] must consist of (maxnz + 2) elements, since the last 213*938d9b04SRichard Tran Mills * element of xgroup will tell us where the (maxnz + 1)th group ends. 214*938d9b04SRichard Tran Mills * We allocate space for the maximum number of groups; 215*938d9b04SRichard Tran Mills * that is potentially a little wasteful, but not too much so. 216*938d9b04SRichard Tran Mills * Perhaps I should fix it later. */ 217*938d9b04SRichard Tran Mills ierr = PetscMalloc1(maxnz+2, &aijperm->xgroup);CHKERRQ(ierr); 218*938d9b04SRichard Tran Mills ierr = PetscMalloc1(maxnz+1, &aijperm->nzgroup);CHKERRQ(ierr); 219*938d9b04SRichard Tran Mills 220*938d9b04SRichard Tran Mills /* Second pass. Look at what is in the buckets and create the groupings. 221*938d9b04SRichard Tran Mills * Note that it is OK to have a group of rows with no non-zero values. */ 222*938d9b04SRichard Tran Mills ngroup = 0; 223*938d9b04SRichard Tran Mills istart = 0; 224*938d9b04SRichard Tran Mills for (i=0; i<=maxnz; i++) { 225*938d9b04SRichard Tran Mills if (rows_in_bucket[i] > 0) { 226*938d9b04SRichard Tran Mills aijperm->nzgroup[ngroup] = i; 227*938d9b04SRichard Tran Mills aijperm->xgroup[ngroup] = istart; 228*938d9b04SRichard Tran Mills ngroup++; 229*938d9b04SRichard Tran Mills istart += rows_in_bucket[i]; 230*938d9b04SRichard Tran Mills } 231*938d9b04SRichard Tran Mills } 232*938d9b04SRichard Tran Mills 233*938d9b04SRichard Tran Mills aijperm->xgroup[ngroup] = istart; 234*938d9b04SRichard Tran Mills aijperm->ngroup = ngroup; 235*938d9b04SRichard Tran Mills 236*938d9b04SRichard Tran Mills /* Now fill in the permutation vector iperm. */ 237*938d9b04SRichard Tran Mills ipnz[0] = 0; 238*938d9b04SRichard Tran Mills for (i=0; i<maxnz; i++) { 239*938d9b04SRichard Tran Mills ipnz[i+1] = ipnz[i] + rows_in_bucket[i]; 240*938d9b04SRichard Tran Mills } 241*938d9b04SRichard Tran Mills 242*938d9b04SRichard Tran Mills for (i=0; i<m; i++) { 243*938d9b04SRichard Tran Mills nz = nz_in_row[i]; 244*938d9b04SRichard Tran Mills ipos = ipnz[nz]; 245*938d9b04SRichard Tran Mills aijperm->iperm[ipos] = i; 246*938d9b04SRichard Tran Mills ipnz[nz]++; 247*938d9b04SRichard Tran Mills } 248*938d9b04SRichard Tran Mills 249*938d9b04SRichard Tran Mills /* Clean up temporary work arrays. */ 250*938d9b04SRichard Tran Mills ierr = PetscFree(rows_in_bucket);CHKERRQ(ierr); 251*938d9b04SRichard Tran Mills ierr = PetscFree(ipnz);CHKERRQ(ierr); 252*938d9b04SRichard Tran Mills ierr = PetscFree(nz_in_row);CHKERRQ(ierr); 253*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 254*938d9b04SRichard Tran Mills } 255*938d9b04SRichard Tran Mills 256*938d9b04SRichard Tran Mills 257*938d9b04SRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJPERM(Mat A, MatAssemblyType mode) 258*938d9b04SRichard Tran Mills { 259*938d9b04SRichard Tran Mills PetscErrorCode ierr; 260*938d9b04SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 261*938d9b04SRichard Tran Mills 262*938d9b04SRichard Tran Mills PetscFunctionBegin; 263*938d9b04SRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 264*938d9b04SRichard Tran Mills 265*938d9b04SRichard Tran Mills /* Since a MATSEQAIJPERM matrix is really just a MATSEQAIJ with some 266*938d9b04SRichard Tran Mills * extra information, call the AssemblyEnd routine for a MATSEQAIJ. 267*938d9b04SRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 268*938d9b04SRichard Tran Mills * a lot of code duplication. 269*938d9b04SRichard Tran Mills * I also note that currently MATSEQAIJPERM doesn't know anything about 270*938d9b04SRichard Tran Mills * the Mat_CompressedRow data structure that SeqAIJ now uses when there 271*938d9b04SRichard Tran Mills * are many zero rows. If the SeqAIJ assembly end routine decides to use 272*938d9b04SRichard Tran Mills * this, this may break things. (Don't know... haven't looked at it.) */ 273*938d9b04SRichard Tran Mills a->inode.use = PETSC_FALSE; 274*938d9b04SRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 275*938d9b04SRichard Tran Mills 276*938d9b04SRichard Tran Mills /* Now calculate the permutation and grouping information. */ 277*938d9b04SRichard Tran Mills ierr = MatSeqAIJPERM_create_perm(A);CHKERRQ(ierr); 278*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 279*938d9b04SRichard Tran Mills } 280*938d9b04SRichard Tran Mills 281*938d9b04SRichard Tran Mills PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy) 282*938d9b04SRichard Tran Mills { 283*938d9b04SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 284*938d9b04SRichard Tran Mills const PetscScalar *x; 285*938d9b04SRichard Tran Mills PetscScalar *y; 286*938d9b04SRichard Tran Mills const MatScalar *aa; 287*938d9b04SRichard Tran Mills PetscErrorCode ierr; 288*938d9b04SRichard Tran Mills const PetscInt *aj,*ai; 289*938d9b04SRichard Tran Mills #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking)) 290*938d9b04SRichard Tran Mills PetscInt i,j; 291*938d9b04SRichard Tran Mills #endif 292*938d9b04SRichard Tran Mills 293*938d9b04SRichard Tran Mills /* Variables that don't appear in MatMult_SeqAIJ. */ 294*938d9b04SRichard Tran Mills Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr; 295*938d9b04SRichard Tran Mills PetscInt *iperm; /* Points to the permutation vector. */ 296*938d9b04SRichard Tran Mills PetscInt *xgroup; 297*938d9b04SRichard Tran Mills /* Denotes where groups of rows with same number of nonzeros 298*938d9b04SRichard Tran Mills * begin and end in iperm. */ 299*938d9b04SRichard Tran Mills PetscInt *nzgroup; 300*938d9b04SRichard Tran Mills PetscInt ngroup; 301*938d9b04SRichard Tran Mills PetscInt igroup; 302*938d9b04SRichard Tran Mills PetscInt jstart,jend; 303*938d9b04SRichard Tran Mills /* jstart is used in loops to denote the position in iperm where a 304*938d9b04SRichard Tran Mills * group starts; jend denotes the position where it ends. 305*938d9b04SRichard Tran Mills * (jend + 1 is where the next group starts.) */ 306*938d9b04SRichard Tran Mills PetscInt iold,nz; 307*938d9b04SRichard Tran Mills PetscInt istart,iend,isize; 308*938d9b04SRichard Tran Mills PetscInt ipos; 309*938d9b04SRichard Tran Mills PetscScalar yp[NDIM]; 310*938d9b04SRichard Tran Mills PetscInt ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */ 311*938d9b04SRichard Tran Mills 312*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 313*938d9b04SRichard Tran Mills #pragma disjoint(*x,*y,*aa) 314*938d9b04SRichard Tran Mills #endif 315*938d9b04SRichard Tran Mills 316*938d9b04SRichard Tran Mills PetscFunctionBegin; 317*938d9b04SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 318*938d9b04SRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 319*938d9b04SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 320*938d9b04SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 321*938d9b04SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 322*938d9b04SRichard Tran Mills 323*938d9b04SRichard Tran Mills /* Get the info we need about the permutations and groupings. */ 324*938d9b04SRichard Tran Mills iperm = aijperm->iperm; 325*938d9b04SRichard Tran Mills ngroup = aijperm->ngroup; 326*938d9b04SRichard Tran Mills xgroup = aijperm->xgroup; 327*938d9b04SRichard Tran Mills nzgroup = aijperm->nzgroup; 328*938d9b04SRichard Tran Mills 329*938d9b04SRichard Tran Mills #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking) 330*938d9b04SRichard Tran Mills fortranmultaijperm_(&m,x,ii,aj,aa,y); 331*938d9b04SRichard Tran Mills #else 332*938d9b04SRichard Tran Mills 333*938d9b04SRichard Tran Mills for (igroup=0; igroup<ngroup; igroup++) { 334*938d9b04SRichard Tran Mills jstart = xgroup[igroup]; 335*938d9b04SRichard Tran Mills jend = xgroup[igroup+1] - 1; 336*938d9b04SRichard Tran Mills nz = nzgroup[igroup]; 337*938d9b04SRichard Tran Mills 338*938d9b04SRichard Tran Mills /* Handle the special cases where the number of nonzeros per row 339*938d9b04SRichard Tran Mills * in the group is either 0 or 1. */ 340*938d9b04SRichard Tran Mills if (nz == 0) { 341*938d9b04SRichard Tran Mills for (i=jstart; i<=jend; i++) { 342*938d9b04SRichard Tran Mills y[iperm[i]] = 0.0; 343*938d9b04SRichard Tran Mills } 344*938d9b04SRichard Tran Mills } else if (nz == 1) { 345*938d9b04SRichard Tran Mills for (i=jstart; i<=jend; i++) { 346*938d9b04SRichard Tran Mills iold = iperm[i]; 347*938d9b04SRichard Tran Mills ipos = ai[iold]; 348*938d9b04SRichard Tran Mills y[iold] = aa[ipos] * x[aj[ipos]]; 349*938d9b04SRichard Tran Mills } 350*938d9b04SRichard Tran Mills } else { 351*938d9b04SRichard Tran Mills 352*938d9b04SRichard Tran Mills /* We work our way through the current group in chunks of NDIM rows 353*938d9b04SRichard Tran Mills * at a time. */ 354*938d9b04SRichard Tran Mills 355*938d9b04SRichard Tran Mills for (istart=jstart; istart<=jend; istart+=NDIM) { 356*938d9b04SRichard Tran Mills /* Figure out where the chunk of 'isize' rows ends in iperm. 357*938d9b04SRichard Tran Mills * 'isize may of course be less than NDIM for the last chunk. */ 358*938d9b04SRichard Tran Mills iend = istart + (NDIM - 1); 359*938d9b04SRichard Tran Mills 360*938d9b04SRichard Tran Mills if (iend > jend) iend = jend; 361*938d9b04SRichard Tran Mills 362*938d9b04SRichard Tran Mills isize = iend - istart + 1; 363*938d9b04SRichard Tran Mills 364*938d9b04SRichard Tran Mills /* Initialize the yp[] array that will be used to hold part of 365*938d9b04SRichard Tran Mills * the permuted results vector, and figure out where in aa each 366*938d9b04SRichard Tran Mills * row of the chunk will begin. */ 367*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 368*938d9b04SRichard Tran Mills iold = iperm[istart + i]; 369*938d9b04SRichard Tran Mills /* iold is a row number from the matrix A *before* reordering. */ 370*938d9b04SRichard Tran Mills ip[i] = ai[iold]; 371*938d9b04SRichard Tran Mills /* ip[i] tells us where the ith row of the chunk begins in aa. */ 372*938d9b04SRichard Tran Mills yp[i] = (PetscScalar) 0.0; 373*938d9b04SRichard Tran Mills } 374*938d9b04SRichard Tran Mills 375*938d9b04SRichard Tran Mills /* If the number of zeros per row exceeds the number of rows in 376*938d9b04SRichard Tran Mills * the chunk, we should vectorize along nz, that is, perform the 377*938d9b04SRichard Tran Mills * mat-vec one row at a time as in the usual CSR case. */ 378*938d9b04SRichard Tran Mills if (nz > isize) { 379*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR) 380*938d9b04SRichard Tran Mills #pragma _CRI preferstream 381*938d9b04SRichard Tran Mills #endif 382*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 383*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR) 384*938d9b04SRichard Tran Mills #pragma _CRI prefervector 385*938d9b04SRichard Tran Mills #endif 386*938d9b04SRichard Tran Mills for (j=0; j<nz; j++) { 387*938d9b04SRichard Tran Mills ipos = ip[i] + j; 388*938d9b04SRichard Tran Mills yp[i] += aa[ipos] * x[aj[ipos]]; 389*938d9b04SRichard Tran Mills } 390*938d9b04SRichard Tran Mills } 391*938d9b04SRichard Tran Mills } else { 392*938d9b04SRichard Tran Mills /* Otherwise, there are enough rows in the chunk to make it 393*938d9b04SRichard Tran Mills * worthwhile to vectorize across the rows, that is, to do the 394*938d9b04SRichard Tran Mills * matvec by operating with "columns" of the chunk. */ 395*938d9b04SRichard Tran Mills for (j=0; j<nz; j++) { 396*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 397*938d9b04SRichard Tran Mills ipos = ip[i] + j; 398*938d9b04SRichard Tran Mills yp[i] += aa[ipos] * x[aj[ipos]]; 399*938d9b04SRichard Tran Mills } 400*938d9b04SRichard Tran Mills } 401*938d9b04SRichard Tran Mills } 402*938d9b04SRichard Tran Mills 403*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR) 404*938d9b04SRichard Tran Mills #pragma _CRI ivdep 405*938d9b04SRichard Tran Mills #endif 406*938d9b04SRichard Tran Mills /* Put results from yp[] into non-permuted result vector y. */ 407*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 408*938d9b04SRichard Tran Mills y[iperm[istart+i]] = yp[i]; 409*938d9b04SRichard Tran Mills } 410*938d9b04SRichard Tran Mills } /* End processing chunk of isize rows of a group. */ 411*938d9b04SRichard Tran Mills } /* End handling matvec for chunk with nz > 1. */ 412*938d9b04SRichard Tran Mills } /* End loop over igroup. */ 413*938d9b04SRichard Tran Mills #endif 414*938d9b04SRichard Tran Mills ierr = PetscLogFlops(PetscMax(2.0*a->nz - A->rmap->n,0));CHKERRQ(ierr); 415*938d9b04SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 416*938d9b04SRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 417*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 418*938d9b04SRichard Tran Mills } 419*938d9b04SRichard Tran Mills 420*938d9b04SRichard Tran Mills 421*938d9b04SRichard Tran Mills /* MatMultAdd_SeqAIJPERM() calculates yy = ww + A * xx. 422*938d9b04SRichard Tran Mills * Note that the names I used to designate the vectors differs from that 423*938d9b04SRichard Tran Mills * used in MatMultAdd_SeqAIJ(). I did this to keep my notation consistent 424*938d9b04SRichard Tran Mills * with the MatMult_SeqAIJPERM() routine, which is very similar to this one. */ 425*938d9b04SRichard Tran Mills /* 426*938d9b04SRichard Tran Mills I hate having virtually identical code for the mult and the multadd!!! 427*938d9b04SRichard Tran Mills */ 428*938d9b04SRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJPERM(Mat A,Vec xx,Vec ww,Vec yy) 429*938d9b04SRichard Tran Mills { 430*938d9b04SRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431*938d9b04SRichard Tran Mills const PetscScalar *x; 432*938d9b04SRichard Tran Mills PetscScalar *y,*w; 433*938d9b04SRichard Tran Mills const MatScalar *aa; 434*938d9b04SRichard Tran Mills PetscErrorCode ierr; 435*938d9b04SRichard Tran Mills const PetscInt *aj,*ai; 436*938d9b04SRichard Tran Mills #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM) 437*938d9b04SRichard Tran Mills PetscInt i,j; 438*938d9b04SRichard Tran Mills #endif 439*938d9b04SRichard Tran Mills 440*938d9b04SRichard Tran Mills /* Variables that don't appear in MatMultAdd_SeqAIJ. */ 441*938d9b04SRichard Tran Mills Mat_SeqAIJPERM * aijperm; 442*938d9b04SRichard Tran Mills PetscInt *iperm; /* Points to the permutation vector. */ 443*938d9b04SRichard Tran Mills PetscInt *xgroup; 444*938d9b04SRichard Tran Mills /* Denotes where groups of rows with same number of nonzeros 445*938d9b04SRichard Tran Mills * begin and end in iperm. */ 446*938d9b04SRichard Tran Mills PetscInt *nzgroup; 447*938d9b04SRichard Tran Mills PetscInt ngroup; 448*938d9b04SRichard Tran Mills PetscInt igroup; 449*938d9b04SRichard Tran Mills PetscInt jstart,jend; 450*938d9b04SRichard Tran Mills /* jstart is used in loops to denote the position in iperm where a 451*938d9b04SRichard Tran Mills * group starts; jend denotes the position where it ends. 452*938d9b04SRichard Tran Mills * (jend + 1 is where the next group starts.) */ 453*938d9b04SRichard Tran Mills PetscInt iold,nz; 454*938d9b04SRichard Tran Mills PetscInt istart,iend,isize; 455*938d9b04SRichard Tran Mills PetscInt ipos; 456*938d9b04SRichard Tran Mills PetscScalar yp[NDIM]; 457*938d9b04SRichard Tran Mills PetscInt ip[NDIM]; 458*938d9b04SRichard Tran Mills /* yp[] and ip[] are treated as vector "registers" for performing 459*938d9b04SRichard Tran Mills * the mat-vec. */ 460*938d9b04SRichard Tran Mills 461*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 462*938d9b04SRichard Tran Mills #pragma disjoint(*x,*y,*aa) 463*938d9b04SRichard Tran Mills #endif 464*938d9b04SRichard Tran Mills 465*938d9b04SRichard Tran Mills PetscFunctionBegin; 466*938d9b04SRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 467*938d9b04SRichard Tran Mills ierr = VecGetArrayPair(yy,ww,&y,&w);CHKERRQ(ierr); 468*938d9b04SRichard Tran Mills 469*938d9b04SRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 470*938d9b04SRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 471*938d9b04SRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 472*938d9b04SRichard Tran Mills 473*938d9b04SRichard Tran Mills /* Get the info we need about the permutations and groupings. */ 474*938d9b04SRichard Tran Mills aijperm = (Mat_SeqAIJPERM*) A->spptr; 475*938d9b04SRichard Tran Mills iperm = aijperm->iperm; 476*938d9b04SRichard Tran Mills ngroup = aijperm->ngroup; 477*938d9b04SRichard Tran Mills xgroup = aijperm->xgroup; 478*938d9b04SRichard Tran Mills nzgroup = aijperm->nzgroup; 479*938d9b04SRichard Tran Mills 480*938d9b04SRichard Tran Mills #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM) 481*938d9b04SRichard Tran Mills fortranmultaddaijperm_(&m,x,ii,aj,aa,y,w); 482*938d9b04SRichard Tran Mills #else 483*938d9b04SRichard Tran Mills 484*938d9b04SRichard Tran Mills for (igroup=0; igroup<ngroup; igroup++) { 485*938d9b04SRichard Tran Mills jstart = xgroup[igroup]; 486*938d9b04SRichard Tran Mills jend = xgroup[igroup+1] - 1; 487*938d9b04SRichard Tran Mills 488*938d9b04SRichard Tran Mills nz = nzgroup[igroup]; 489*938d9b04SRichard Tran Mills 490*938d9b04SRichard Tran Mills /* Handle the special cases where the number of nonzeros per row 491*938d9b04SRichard Tran Mills * in the group is either 0 or 1. */ 492*938d9b04SRichard Tran Mills if (nz == 0) { 493*938d9b04SRichard Tran Mills for (i=jstart; i<=jend; i++) { 494*938d9b04SRichard Tran Mills iold = iperm[i]; 495*938d9b04SRichard Tran Mills y[iold] = w[iold]; 496*938d9b04SRichard Tran Mills } 497*938d9b04SRichard Tran Mills } 498*938d9b04SRichard Tran Mills else if (nz == 1) { 499*938d9b04SRichard Tran Mills for (i=jstart; i<=jend; i++) { 500*938d9b04SRichard Tran Mills iold = iperm[i]; 501*938d9b04SRichard Tran Mills ipos = ai[iold]; 502*938d9b04SRichard Tran Mills y[iold] = w[iold] + aa[ipos] * x[aj[ipos]]; 503*938d9b04SRichard Tran Mills } 504*938d9b04SRichard Tran Mills } 505*938d9b04SRichard Tran Mills /* For the general case: */ 506*938d9b04SRichard Tran Mills else { 507*938d9b04SRichard Tran Mills 508*938d9b04SRichard Tran Mills /* We work our way through the current group in chunks of NDIM rows 509*938d9b04SRichard Tran Mills * at a time. */ 510*938d9b04SRichard Tran Mills 511*938d9b04SRichard Tran Mills for (istart=jstart; istart<=jend; istart+=NDIM) { 512*938d9b04SRichard Tran Mills /* Figure out where the chunk of 'isize' rows ends in iperm. 513*938d9b04SRichard Tran Mills * 'isize may of course be less than NDIM for the last chunk. */ 514*938d9b04SRichard Tran Mills iend = istart + (NDIM - 1); 515*938d9b04SRichard Tran Mills if (iend > jend) iend = jend; 516*938d9b04SRichard Tran Mills isize = iend - istart + 1; 517*938d9b04SRichard Tran Mills 518*938d9b04SRichard Tran Mills /* Initialize the yp[] array that will be used to hold part of 519*938d9b04SRichard Tran Mills * the permuted results vector, and figure out where in aa each 520*938d9b04SRichard Tran Mills * row of the chunk will begin. */ 521*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 522*938d9b04SRichard Tran Mills iold = iperm[istart + i]; 523*938d9b04SRichard Tran Mills /* iold is a row number from the matrix A *before* reordering. */ 524*938d9b04SRichard Tran Mills ip[i] = ai[iold]; 525*938d9b04SRichard Tran Mills /* ip[i] tells us where the ith row of the chunk begins in aa. */ 526*938d9b04SRichard Tran Mills yp[i] = w[iold]; 527*938d9b04SRichard Tran Mills } 528*938d9b04SRichard Tran Mills 529*938d9b04SRichard Tran Mills /* If the number of zeros per row exceeds the number of rows in 530*938d9b04SRichard Tran Mills * the chunk, we should vectorize along nz, that is, perform the 531*938d9b04SRichard Tran Mills * mat-vec one row at a time as in the usual CSR case. */ 532*938d9b04SRichard Tran Mills if (nz > isize) { 533*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR) 534*938d9b04SRichard Tran Mills #pragma _CRI preferstream 535*938d9b04SRichard Tran Mills #endif 536*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 537*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR) 538*938d9b04SRichard Tran Mills #pragma _CRI prefervector 539*938d9b04SRichard Tran Mills #endif 540*938d9b04SRichard Tran Mills for (j=0; j<nz; j++) { 541*938d9b04SRichard Tran Mills ipos = ip[i] + j; 542*938d9b04SRichard Tran Mills yp[i] += aa[ipos] * x[aj[ipos]]; 543*938d9b04SRichard Tran Mills } 544*938d9b04SRichard Tran Mills } 545*938d9b04SRichard Tran Mills } 546*938d9b04SRichard Tran Mills /* Otherwise, there are enough rows in the chunk to make it 547*938d9b04SRichard Tran Mills * worthwhile to vectorize across the rows, that is, to do the 548*938d9b04SRichard Tran Mills * matvec by operating with "columns" of the chunk. */ 549*938d9b04SRichard Tran Mills else { 550*938d9b04SRichard Tran Mills for (j=0; j<nz; j++) { 551*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 552*938d9b04SRichard Tran Mills ipos = ip[i] + j; 553*938d9b04SRichard Tran Mills yp[i] += aa[ipos] * x[aj[ipos]]; 554*938d9b04SRichard Tran Mills } 555*938d9b04SRichard Tran Mills } 556*938d9b04SRichard Tran Mills } 557*938d9b04SRichard Tran Mills 558*938d9b04SRichard Tran Mills #if defined(PETSC_HAVE_CRAY_VECTOR) 559*938d9b04SRichard Tran Mills #pragma _CRI ivdep 560*938d9b04SRichard Tran Mills #endif 561*938d9b04SRichard Tran Mills /* Put results from yp[] into non-permuted result vector y. */ 562*938d9b04SRichard Tran Mills for (i=0; i<isize; i++) { 563*938d9b04SRichard Tran Mills y[iperm[istart+i]] = yp[i]; 564*938d9b04SRichard Tran Mills } 565*938d9b04SRichard Tran Mills } /* End processing chunk of isize rows of a group. */ 566*938d9b04SRichard Tran Mills 567*938d9b04SRichard Tran Mills } /* End handling matvec for chunk with nz > 1. */ 568*938d9b04SRichard Tran Mills } /* End loop over igroup. */ 569*938d9b04SRichard Tran Mills 570*938d9b04SRichard Tran Mills #endif 571*938d9b04SRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 572*938d9b04SRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 573*938d9b04SRichard Tran Mills ierr = VecRestoreArrayPair(yy,ww,&y,&w);CHKERRQ(ierr); 574*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 575*938d9b04SRichard Tran Mills } 576*938d9b04SRichard Tran Mills 577*938d9b04SRichard Tran Mills 578*938d9b04SRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJPERM converts a SeqAIJ matrix into a 579*938d9b04SRichard Tran Mills * SeqAIJPERM matrix. This routine is called by the MatCreate_SeqAIJPERM() 580*938d9b04SRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 581*938d9b04SRichard Tran Mills * into a SeqAIJPERM one. */ 582*938d9b04SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJPERM(Mat A,MatType type,MatReuse reuse,Mat *newmat) 583*938d9b04SRichard Tran Mills { 584*938d9b04SRichard Tran Mills PetscErrorCode ierr; 585*938d9b04SRichard Tran Mills Mat B = *newmat; 586*938d9b04SRichard Tran Mills Mat_SeqAIJPERM *aijperm; 587*938d9b04SRichard Tran Mills PetscBool sametype; 588*938d9b04SRichard Tran Mills 589*938d9b04SRichard Tran Mills PetscFunctionBegin; 590*938d9b04SRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 591*938d9b04SRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 592*938d9b04SRichard Tran Mills } 593*938d9b04SRichard Tran Mills ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 594*938d9b04SRichard Tran Mills if (sametype) PetscFunctionReturn(0); 595*938d9b04SRichard Tran Mills 596*938d9b04SRichard Tran Mills ierr = PetscNewLog(B,&aijperm);CHKERRQ(ierr); 597*938d9b04SRichard Tran Mills B->spptr = (void*) aijperm; 598*938d9b04SRichard Tran Mills 599*938d9b04SRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. */ 600*938d9b04SRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJPERM; 601*938d9b04SRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJPERM; 602*938d9b04SRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJPERM; 603*938d9b04SRichard Tran Mills B->ops->mult = MatMult_SeqAIJPERM; 604*938d9b04SRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJPERM; 605*938d9b04SRichard Tran Mills 606*938d9b04SRichard Tran Mills aijperm->nonzerostate = -1; /* this will trigger the generation of the permutation information the first time through MatAssembly()*/ 607*938d9b04SRichard Tran Mills /* If A has already been assembled, compute the permutation. */ 608*938d9b04SRichard Tran Mills if (A->assembled) { 609*938d9b04SRichard Tran Mills ierr = MatSeqAIJPERM_create_perm(B);CHKERRQ(ierr); 610*938d9b04SRichard Tran Mills } 611*938d9b04SRichard Tran Mills 612*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijperm_seqaij_C",MatConvert_SeqAIJPERM_SeqAIJ);CHKERRQ(ierr); 613*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijperm_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 614*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijperm_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 615*938d9b04SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijperm_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 616*938d9b04SRichard Tran Mills 617*938d9b04SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJPERM);CHKERRQ(ierr); 618*938d9b04SRichard Tran Mills *newmat = B; 619*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 620*938d9b04SRichard Tran Mills } 621*938d9b04SRichard Tran Mills 622*938d9b04SRichard Tran Mills /*@C 623*938d9b04SRichard Tran Mills MatCreateSeqAIJPERM - Creates a sparse matrix of type SEQAIJPERM. 624*938d9b04SRichard Tran Mills This type inherits from AIJ, but calculates some additional permutation 625*938d9b04SRichard Tran Mills information that is used to allow better vectorization of some 626*938d9b04SRichard Tran Mills operations. At the cost of increased storage, the AIJ formatted 627*938d9b04SRichard Tran Mills matrix can be copied to a format in which pieces of the matrix are 628*938d9b04SRichard Tran Mills stored in ELLPACK format, allowing the vectorized matrix multiply 629*938d9b04SRichard Tran Mills routine to use stride-1 memory accesses. As with the AIJ type, it is 630*938d9b04SRichard Tran Mills important to preallocate matrix storage in order to get good assembly 631*938d9b04SRichard Tran Mills performance. 632*938d9b04SRichard Tran Mills 633*938d9b04SRichard Tran Mills Collective on MPI_Comm 634*938d9b04SRichard Tran Mills 635*938d9b04SRichard Tran Mills Input Parameters: 636*938d9b04SRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 637*938d9b04SRichard Tran Mills . m - number of rows 638*938d9b04SRichard Tran Mills . n - number of columns 639*938d9b04SRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 640*938d9b04SRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 641*938d9b04SRichard Tran Mills (possibly different for each row) or NULL 642*938d9b04SRichard Tran Mills 643*938d9b04SRichard Tran Mills Output Parameter: 644*938d9b04SRichard Tran Mills . A - the matrix 645*938d9b04SRichard Tran Mills 646*938d9b04SRichard Tran Mills Notes: 647*938d9b04SRichard Tran Mills If nnz is given then nz is ignored 648*938d9b04SRichard Tran Mills 649*938d9b04SRichard Tran Mills Level: intermediate 650*938d9b04SRichard Tran Mills 651*938d9b04SRichard Tran Mills .keywords: matrix, cray, sparse, parallel 652*938d9b04SRichard Tran Mills 653*938d9b04SRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues() 654*938d9b04SRichard Tran Mills @*/ 655*938d9b04SRichard Tran Mills PetscErrorCode MatCreateSeqAIJPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 656*938d9b04SRichard Tran Mills { 657*938d9b04SRichard Tran Mills PetscErrorCode ierr; 658*938d9b04SRichard Tran Mills 659*938d9b04SRichard Tran Mills PetscFunctionBegin; 660*938d9b04SRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 661*938d9b04SRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 662*938d9b04SRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJPERM);CHKERRQ(ierr); 663*938d9b04SRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 664*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 665*938d9b04SRichard Tran Mills } 666*938d9b04SRichard Tran Mills 667*938d9b04SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJPERM(Mat A) 668*938d9b04SRichard Tran Mills { 669*938d9b04SRichard Tran Mills PetscErrorCode ierr; 670*938d9b04SRichard Tran Mills 671*938d9b04SRichard Tran Mills PetscFunctionBegin; 672*938d9b04SRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 673*938d9b04SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJPERM(A,MATSEQAIJPERM,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 674*938d9b04SRichard Tran Mills PetscFunctionReturn(0); 675*938d9b04SRichard Tran Mills } 676*938d9b04SRichard Tran Mills 677