1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 6 /* 7 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 8 */ 9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 10 { 11 PetscErrorCode ierr; 12 PetscInt i,n,nrows,N,j,k,m,ncols,col; 13 const PetscInt *is,*rows,*ci,*cj; 14 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1,*spidx,*spidxhit,*den2sp,*den2sp_i; 15 IS *isa; 16 PetscBool flg1,flg2; 17 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 18 19 PetscFunctionBegin; 20 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 21 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 22 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 23 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 24 if (flg1 || flg2) { 25 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 26 } 27 28 N = mat->cmap->N/bs; 29 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 30 c->N = mat->cmap->N/bs; 31 c->m = mat->rmap->N/bs; 32 c->rstart = 0; 33 34 c->ncolors = nis; 35 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 36 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 37 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 38 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 39 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 40 ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr); 41 den2sp_i = den2sp; 42 43 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 44 45 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 46 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 47 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); // N=mat->cmap->N/bs or c->M ? 48 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&spidxhit);CHKERRQ(ierr); 49 50 for (i=0; i<nis; i++) { /* loop over colors */ 51 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 52 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 53 54 c->ncolumns[i] = n; 55 if (n) { 56 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 57 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 58 } else { 59 c->columns[i] = 0; 60 } 61 62 /* fast, crude version requires O(N*N) work */ 63 for (j=0; j<n; j++) { /* loop over columns */ 64 col = is[j]; 65 rows = cj + ci[col]; 66 m = ci[col+1] - ci[col]; 67 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 68 rowhit[*rows] = col + 1; 69 spidxhit[*rows] = spidx[ci[col] + k]; 70 rows++; 71 } 72 } 73 /* get num of rows by counting the number of hits */ 74 nrows = 0; 75 for (j=0; j<N; j++) { 76 if (rowhit[j]) nrows++; 77 } 78 c->nrows[i] = nrows; 79 80 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 81 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 82 nrows = 0; 83 for (j=0; j<N; j++) { /* loop over rows */ 84 if (rowhit[j]) { 85 c->rows[i][nrows] = j; /* row index */ 86 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */ 87 den2sp_i[nrows++] = spidxhit[j]; 88 rowhit[j] = 0.0; /* zero rowhit for reuse */ 89 } 90 } 91 den2sp_i += nrows; 92 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 93 } 94 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 95 96 ierr = PetscFree(rowhit);CHKERRQ(ierr); 97 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 98 ierr = PetscFree(spidxhit);CHKERRQ(ierr); 99 100 /* Optimize by adding the vscale, and scaleforrow[][] fields -- needed for seqaij??? */ 101 /* 102 see the version for MPIAIJ 103 */ 104 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 105 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 106 for (k=0; k<c->ncolors; k++) { 107 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 108 for (l=0; l<c->nrows[k]; l++) { 109 col = c->columnsforrow[k][l]; 110 111 c->vscaleforrow[k][l] = col; 112 } 113 } 114 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 115 c->den2sp = den2sp; 116 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 117 PetscFunctionReturn(0); 118 } 119 120 /* --------------------------------------------------------------- */ 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 124 /* 125 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 126 This is why it has the ugly code with the MatGetBlockSize() 127 */ 128 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 129 { 130 PetscErrorCode ierr; 131 PetscInt i,n,nrows,N,j,k,m,ncols,col; 132 const PetscInt *is,*rows,*ci,*cj; 133 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 134 IS *isa; 135 PetscBool done,flg = PETSC_FALSE; 136 PetscBool flg1,flg2; 137 PetscBool new_impl=PETSC_FALSE; 138 139 PetscFunctionBegin; 140 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 141 if (new_impl){ 142 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 146 147 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 148 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 149 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 150 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 151 if (flg1 || flg2) { 152 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 153 } 154 155 N = mat->cmap->N/bs; 156 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 157 c->N = mat->cmap->N/bs; 158 c->m = mat->rmap->N/bs; 159 c->rstart = 0; 160 161 c->ncolors = nis; 162 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 163 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 164 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 165 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 166 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 167 168 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 169 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 170 171 /* 172 Temporary option to allow for debugging/testing 173 */ 174 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 175 176 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 177 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 178 179 for (i=0; i<nis; i++) { 180 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 181 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 182 183 c->ncolumns[i] = n; 184 if (n) { 185 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 186 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 187 } else { 188 c->columns[i] = 0; 189 } 190 191 if (!flg) { /* ------------------------------------------------------------------------------*/ 192 /* fast, crude version requires O(N*N) work */ 193 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 194 /* loop over columns*/ 195 for (j=0; j<n; j++) { 196 col = is[j]; 197 rows = cj + ci[col]; 198 m = ci[col+1] - ci[col]; 199 /* loop over columns marking them in rowhit */ 200 for (k=0; k<m; k++) { 201 rowhit[*rows++] = col + 1; 202 } 203 } 204 /* count the number of hits */ 205 nrows = 0; 206 for (j=0; j<N; j++) { 207 if (rowhit[j]) nrows++; 208 } 209 c->nrows[i] = nrows; 210 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 211 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 212 nrows = 0; 213 for (j=0; j<N; j++) { 214 if (rowhit[j]) { 215 c->rows[i][nrows] = j; 216 c->columnsforrow[i][nrows] = rowhit[j] - 1; 217 nrows++; 218 } 219 } 220 } else { /*-------------------------------------------------------------------------------*/ 221 /* slow version, using rowhit as a linked list */ 222 PetscInt currentcol,fm,mfm; 223 rowhit[N] = N; 224 nrows = 0; 225 /* loop over columns */ 226 for (j=0; j<n; j++) { 227 col = is[j]; 228 rows = cj + ci[col]; 229 m = ci[col+1] - ci[col]; 230 /* loop over columns marking them in rowhit */ 231 fm = N; /* fm points to first entry in linked list */ 232 for (k=0; k<m; k++) { 233 currentcol = *rows++; 234 /* is it already in the list? */ 235 do { 236 mfm = fm; 237 fm = rowhit[fm]; 238 } while (fm < currentcol); 239 /* not in list so add it */ 240 if (fm != currentcol) { 241 nrows++; 242 columnsforrow[currentcol] = col; 243 /* next three lines insert new entry into linked list */ 244 rowhit[mfm] = currentcol; 245 rowhit[currentcol] = fm; 246 fm = currentcol; 247 /* fm points to present position in list since we know the columns are sorted */ 248 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 249 } 250 } 251 c->nrows[i] = nrows; 252 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 253 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 254 /* now store the linked list of rows into c->rows[i] */ 255 nrows = 0; 256 fm = rowhit[N]; 257 do { 258 c->rows[i][nrows] = fm; 259 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 260 fm = rowhit[fm]; 261 } while (fm < N); 262 } /* ---------------------------------------------------------------------------------------*/ 263 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 264 } 265 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 266 267 ierr = PetscFree(rowhit);CHKERRQ(ierr); 268 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 269 270 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 271 /* 272 see the version for MPIAIJ 273 */ 274 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 275 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 276 for (k=0; k<c->ncolors; k++) { 277 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 278 for (l=0; l<c->nrows[k]; l++) { 279 col = c->columnsforrow[k][l]; 280 281 c->vscaleforrow[k][l] = col; 282 } 283 } 284 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288