1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 6 /* 7 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 8 */ 9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 10 { 11 PetscErrorCode ierr; 12 PetscInt i,n,nrows,N,j,k,m,ncols,col; 13 const PetscInt *is,*rows,*ci,*cj; 14 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs=1,*spidx,*spidxhit,*den2sp,*den2sp_i; 15 IS *isa; 16 PetscBool flg1; 17 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 18 PetscInt nz_den=0; 19 20 PetscFunctionBegin; 21 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 22 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 23 SeqBAIJ calls this routine, thus check it */ 24 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 25 if (flg1) { 26 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 27 } 28 29 N = mat->cmap->N/bs; 30 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 31 c->N = mat->cmap->N/bs; 32 c->m = mat->rmap->N/bs; 33 c->rstart = 0; 34 35 c->ncolors = nis; 36 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 37 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 38 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 39 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 40 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 41 ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr); 42 den2sp_i = den2sp; 43 ierr = PetscMalloc2(csp->nz,PetscInt,&c->colorforrow,csp->nz,PetscInt,&c->colorforcolumn);CHKERRQ(ierr); 44 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 45 46 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 47 48 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 49 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 50 51 for (i=0; i<nis; i++) { /* loop over colors */ 52 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 53 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 54 55 c->ncolumns[i] = n; 56 if (n) { 57 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 58 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 59 } else { 60 c->columns[i] = 0; 61 } 62 63 /* fast, crude version requires O(N*N) work */ 64 nrows = 0; 65 for (j=0; j<n; j++) { /* loop over columns */ 66 col = is[j]; 67 rows = cj + ci[col]; 68 m = ci[col+1] - ci[col]; 69 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 70 rowhit[*rows] = col + 1; 71 spidxhit[*rows++] = spidx[ci[col] + k]; 72 nrows++; 73 } 74 } 75 c->nrows[i] = nrows; /* total num of rows for this color */ 76 77 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 78 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 79 nrows = 0; 80 for (j=0; j<N; j++) { /* loop over rows */ 81 if (rowhit[j]) { 82 c->rows[i][nrows] = j; /* row index */ 83 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */ 84 den2sp_i[nrows++] = spidxhit[j]; 85 86 c->colorforrow[nz_den] = j; /* row index */ 87 c->colorforcolumn[nz_den] = rowhit[j] - 1; /* column index */ 88 89 c->rowcolden2sp3[3*nz_den] = j; /* row index */ 90 c->rowcolden2sp3[3*nz_den+1] = rowhit[j] - 1; /* column index */ 91 c->rowcolden2sp3[3*nz_den+2] = spidxhit[j]; 92 93 nz_den++; 94 rowhit[j] = 0.0; /* zero rowhit for reuse */ 95 } 96 } 97 den2sp_i += nrows; 98 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 99 } 100 printf("nz_den %d, csp->nz %d\n",nz_den,csp->nz); 101 102 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 103 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 104 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 105 106 c->den2sp = den2sp; 107 c->ctype = IS_COLORING_GHOSTED; 108 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 109 PetscFunctionReturn(0); 110 } 111 112 /* --------------------------------------------------------------- */ 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 116 /* 117 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 118 This is why it has the ugly code with the MatGetBlockSize() 119 */ 120 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 121 { 122 PetscErrorCode ierr; 123 PetscInt i,n,nrows,N,j,k,m,ncols,col; 124 const PetscInt *is,*rows,*ci,*cj; 125 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 126 IS *isa; 127 PetscBool done,flg = PETSC_FALSE; 128 PetscBool flg1; 129 PetscBool new_impl=PETSC_FALSE; 130 131 PetscFunctionBegin; 132 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 133 if (new_impl){ 134 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 138 139 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 140 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 141 SeqBAIJ calls this routine, thus check it */ 142 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 143 if (flg1) { 144 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 145 } 146 147 N = mat->cmap->N/bs; 148 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 149 c->N = mat->cmap->N/bs; 150 c->m = mat->rmap->N/bs; 151 c->rstart = 0; 152 153 c->ncolors = nis; 154 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 155 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 156 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 157 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 158 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 159 160 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 161 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 162 163 /* 164 Temporary option to allow for debugging/testing 165 */ 166 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 167 168 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 169 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 170 171 for (i=0; i<nis; i++) { 172 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 173 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 174 175 c->ncolumns[i] = n; 176 if (n) { 177 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 178 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 179 } else { 180 c->columns[i] = 0; 181 } 182 183 if (!flg) { /* ------------------------------------------------------------------------------*/ 184 /* fast, crude version requires O(N*N) work */ 185 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 186 /* loop over columns*/ 187 for (j=0; j<n; j++) { 188 col = is[j]; 189 rows = cj + ci[col]; 190 m = ci[col+1] - ci[col]; 191 /* loop over columns marking them in rowhit */ 192 for (k=0; k<m; k++) { 193 rowhit[*rows++] = col + 1; 194 } 195 } 196 /* count the number of hits */ 197 nrows = 0; 198 for (j=0; j<N; j++) { 199 if (rowhit[j]) nrows++; 200 } 201 c->nrows[i] = nrows; 202 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 203 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 204 nrows = 0; 205 for (j=0; j<N; j++) { 206 if (rowhit[j]) { 207 c->rows[i][nrows] = j; 208 c->columnsforrow[i][nrows] = rowhit[j] - 1; 209 nrows++; 210 } 211 } 212 } else { /*-------------------------------------------------------------------------------*/ 213 /* slow version, using rowhit as a linked list */ 214 PetscInt currentcol,fm,mfm; 215 rowhit[N] = N; 216 nrows = 0; 217 /* loop over columns */ 218 for (j=0; j<n; j++) { 219 col = is[j]; 220 rows = cj + ci[col]; 221 m = ci[col+1] - ci[col]; 222 /* loop over columns marking them in rowhit */ 223 fm = N; /* fm points to first entry in linked list */ 224 for (k=0; k<m; k++) { 225 currentcol = *rows++; 226 /* is it already in the list? */ 227 do { 228 mfm = fm; 229 fm = rowhit[fm]; 230 } while (fm < currentcol); 231 /* not in list so add it */ 232 if (fm != currentcol) { 233 nrows++; 234 columnsforrow[currentcol] = col; 235 /* next three lines insert new entry into linked list */ 236 rowhit[mfm] = currentcol; 237 rowhit[currentcol] = fm; 238 fm = currentcol; 239 /* fm points to present position in list since we know the columns are sorted */ 240 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 241 } 242 } 243 c->nrows[i] = nrows; 244 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 245 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 246 /* now store the linked list of rows into c->rows[i] */ 247 nrows = 0; 248 fm = rowhit[N]; 249 do { 250 c->rows[i][nrows] = fm; 251 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 252 fm = rowhit[fm]; 253 } while (fm < N); 254 } /* ---------------------------------------------------------------------------------------*/ 255 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 256 } 257 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 258 259 ierr = PetscFree(rowhit);CHKERRQ(ierr); 260 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 261 262 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 263 /* 264 see the version for MPIAIJ 265 */ 266 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 267 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 268 for (k=0; k<c->ncolors; k++) { 269 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 270 for (l=0; l<c->nrows[k]; l++) { 271 col = c->columnsforrow[k][l]; 272 273 c->vscaleforrow[k][l] = col; 274 } 275 } 276 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280