1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 6 /* 7 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 8 */ 9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 10 { 11 PetscErrorCode ierr; 12 PetscInt i,n,nrows,N,j,k,m,ncols,col; 13 const PetscInt *is,*rows,*ci,*cj; 14 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 15 IS *isa; 16 PetscBool flg1; 17 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 18 19 PetscFunctionBegin; 20 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 21 22 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 23 SeqBAIJ calls this routine, thus check it */ 24 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 25 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 26 if (!flg1) { 27 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 28 } 29 N = mat->cmap->N/bs; 30 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 31 c->N = mat->cmap->N/bs; 32 c->m = mat->rmap->N/bs; 33 c->rstart = 0; 34 35 c->ncolors = nis; 36 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 37 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 38 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 39 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 40 41 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 42 43 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 44 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 45 46 nz = 0; 47 for (i=0; i<nis; i++) { /* loop over colors */ 48 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 49 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 50 51 c->ncolumns[i] = n; 52 if (n) { 53 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 54 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 55 } else { 56 c->columns[i] = 0; 57 } 58 59 /* fast, crude version requires O(N*N) work */ 60 nrows = 0; 61 for (j=0; j<n; j++) { /* loop over columns */ 62 col = is[j]; 63 rows = cj + ci[col]; 64 m = ci[col+1] - ci[col]; 65 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 66 rowhit[*rows] = col + 1; 67 spidxhit[*rows++] = spidx[ci[col] + k]; 68 nrows++; 69 } 70 } 71 c->nrows[i] = nrows; /* total num of rows for this color */ 72 73 nrows = 0; 74 for (j=0; j<N; j++) { /* loop over rows */ 75 if (rowhit[j]) { 76 c->rowcolden2sp3[nz++] = j; /* row index */ 77 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 78 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 79 rowhit[j] = 0.0; /* zero rowhit for reuse */ 80 } 81 } 82 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 83 } 84 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 85 86 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 87 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 88 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 89 90 c->ctype = IS_COLORING_GHOSTED; 91 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 92 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 96 /* --------------------------------------------------------------- */ 97 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 100 /* 101 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 102 This is why it has the ugly code with the MatGetBlockSize() 103 */ 104 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 105 { 106 PetscErrorCode ierr; 107 PetscInt i,n,nrows,N,j,k,m,ncols,col; 108 const PetscInt *is,*rows,*ci,*cj; 109 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 110 IS *isa; 111 PetscBool done,flg = PETSC_FALSE; 112 PetscBool flg1; 113 PetscBool new_impl=PETSC_FALSE; 114 115 PetscFunctionBegin; 116 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 117 if (new_impl){ 118 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 122 123 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 124 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 125 SeqBAIJ calls this routine, thus check it */ 126 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 127 if (flg1) { 128 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 129 } 130 131 N = mat->cmap->N/bs; 132 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 133 c->N = mat->cmap->N/bs; 134 c->m = mat->rmap->N/bs; 135 c->rstart = 0; 136 137 c->ncolors = nis; 138 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 139 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 140 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 141 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 142 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 143 144 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 145 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 146 147 /* 148 Temporary option to allow for debugging/testing 149 */ 150 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 151 152 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 153 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 154 155 for (i=0; i<nis; i++) { 156 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 157 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 158 159 c->ncolumns[i] = n; 160 if (n) { 161 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 162 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 163 } else { 164 c->columns[i] = 0; 165 } 166 167 if (!flg) { /* ------------------------------------------------------------------------------*/ 168 /* fast, crude version requires O(N*N) work */ 169 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 170 /* loop over columns*/ 171 for (j=0; j<n; j++) { 172 col = is[j]; 173 rows = cj + ci[col]; 174 m = ci[col+1] - ci[col]; 175 /* loop over columns marking them in rowhit */ 176 for (k=0; k<m; k++) { 177 rowhit[*rows++] = col + 1; 178 } 179 } 180 /* count the number of hits */ 181 nrows = 0; 182 for (j=0; j<N; j++) { 183 if (rowhit[j]) nrows++; 184 } 185 c->nrows[i] = nrows; 186 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 187 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 188 nrows = 0; 189 for (j=0; j<N; j++) { 190 if (rowhit[j]) { 191 c->rows[i][nrows] = j; 192 c->columnsforrow[i][nrows] = rowhit[j] - 1; 193 nrows++; 194 } 195 } 196 } else { /*-------------------------------------------------------------------------------*/ 197 /* slow version, using rowhit as a linked list */ 198 PetscInt currentcol,fm,mfm; 199 rowhit[N] = N; 200 nrows = 0; 201 /* loop over columns */ 202 for (j=0; j<n; j++) { 203 col = is[j]; 204 rows = cj + ci[col]; 205 m = ci[col+1] - ci[col]; 206 /* loop over columns marking them in rowhit */ 207 fm = N; /* fm points to first entry in linked list */ 208 for (k=0; k<m; k++) { 209 currentcol = *rows++; 210 /* is it already in the list? */ 211 do { 212 mfm = fm; 213 fm = rowhit[fm]; 214 } while (fm < currentcol); 215 /* not in list so add it */ 216 if (fm != currentcol) { 217 nrows++; 218 columnsforrow[currentcol] = col; 219 /* next three lines insert new entry into linked list */ 220 rowhit[mfm] = currentcol; 221 rowhit[currentcol] = fm; 222 fm = currentcol; 223 /* fm points to present position in list since we know the columns are sorted */ 224 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 225 } 226 } 227 c->nrows[i] = nrows; 228 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 229 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 230 /* now store the linked list of rows into c->rows[i] */ 231 nrows = 0; 232 fm = rowhit[N]; 233 do { 234 c->rows[i][nrows] = fm; 235 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 236 fm = rowhit[fm]; 237 } while (fm < N); 238 } /* ---------------------------------------------------------------------------------------*/ 239 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 240 } 241 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 242 243 ierr = PetscFree(rowhit);CHKERRQ(ierr); 244 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 245 246 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 247 /* 248 see the version for MPIAIJ 249 */ 250 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 251 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 252 for (k=0; k<c->ncolors; k++) { 253 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 254 for (l=0; l<c->nrows[k]; l++) { 255 col = c->columnsforrow[k][l]; 256 257 c->vscaleforrow[k][l] = col; 258 } 259 } 260 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264