1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 6 /* 7 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 8 */ 9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 10 { 11 PetscErrorCode ierr; 12 PetscInt i,n,nrows,N,j,k,m,ncols,col; 13 const PetscInt *is,*rows,*ci,*cj; 14 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1,*spidx,*spidxhit,*den2sp,*den2sp_i; 15 IS *isa; 16 PetscBool flg1,flg2; 17 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 18 19 PetscFunctionBegin; 20 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 21 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 22 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 23 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 24 if (flg1 || flg2) { 25 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 26 } 27 28 N = mat->cmap->N/bs; 29 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 30 c->N = mat->cmap->N/bs; 31 c->m = mat->rmap->N/bs; 32 c->rstart = 0; 33 34 c->ncolors = nis; 35 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 36 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 37 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 38 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 39 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 40 ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr); 41 den2sp_i = den2sp; 42 43 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 44 45 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 46 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 47 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); // N=mat->cmap->N/bs or c->M ? 48 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&spidxhit);CHKERRQ(ierr); 49 50 for (i=0; i<nis; i++) { /* loop over colors */ 51 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 52 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 53 54 c->ncolumns[i] = n; 55 if (n) { 56 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 57 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 58 } else { 59 c->columns[i] = 0; 60 } 61 62 /* fast, crude version requires O(N*N) work */ 63 for (j=0; j<n; j++) { /* loop over columns */ 64 col = is[j]; 65 rows = cj + ci[col]; 66 m = ci[col+1] - ci[col]; 67 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 68 rowhit[*rows] = col + 1; 69 spidxhit[*rows] = spidx[ci[col] + k]; 70 rows++; 71 } 72 } 73 /* get num of rows by counting the number of hits */ 74 nrows = 0; 75 for (j=0; j<N; j++) { 76 if (rowhit[j]) nrows++; 77 } 78 c->nrows[i] = nrows; 79 80 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 81 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 82 nrows = 0; 83 for (j=0; j<N; j++) { /* loop over rows */ 84 if (rowhit[j]) { 85 c->rows[i][nrows] = j; /* row index */ 86 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */ 87 den2sp_i[nrows++] = spidxhit[j]; 88 rowhit[j] = 0.0; /* zero rowhit for reuse */ 89 } 90 } 91 den2sp_i += nrows; 92 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 93 } 94 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 95 96 ierr = PetscFree(rowhit);CHKERRQ(ierr); 97 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 98 ierr = PetscFree(spidxhit);CHKERRQ(ierr); 99 100 /* Optimize by adding the vscale, and scaleforrow[][] fields -- needed for seqaij??? */ 101 /* 102 see the version for MPIAIJ 103 */ 104 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 105 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 106 for (k=0; k<c->ncolors; k++) { 107 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 108 for (l=0; l<c->nrows[k]; l++) { 109 col = c->columnsforrow[k][l]; 110 111 c->vscaleforrow[k][l] = col; 112 } 113 } 114 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 115 c->den2sp = den2sp; 116 c->ctype = IS_COLORING_GHOSTED; 117 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 118 PetscFunctionReturn(0); 119 } 120 121 /* --------------------------------------------------------------- */ 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 125 /* 126 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 127 This is why it has the ugly code with the MatGetBlockSize() 128 */ 129 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 130 { 131 PetscErrorCode ierr; 132 PetscInt i,n,nrows,N,j,k,m,ncols,col; 133 const PetscInt *is,*rows,*ci,*cj; 134 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 135 IS *isa; 136 PetscBool done,flg = PETSC_FALSE; 137 PetscBool flg1,flg2; 138 PetscBool new_impl=PETSC_FALSE; 139 140 PetscFunctionBegin; 141 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 142 if (new_impl){ 143 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 147 148 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 149 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 150 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 151 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 152 if (flg1 || flg2) { 153 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 154 } 155 156 N = mat->cmap->N/bs; 157 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 158 c->N = mat->cmap->N/bs; 159 c->m = mat->rmap->N/bs; 160 c->rstart = 0; 161 162 c->ncolors = nis; 163 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 164 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 165 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 166 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 167 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 168 169 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 170 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 171 172 /* 173 Temporary option to allow for debugging/testing 174 */ 175 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 176 177 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 178 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 179 180 for (i=0; i<nis; i++) { 181 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 182 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 183 184 c->ncolumns[i] = n; 185 if (n) { 186 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 187 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 188 } else { 189 c->columns[i] = 0; 190 } 191 192 if (!flg) { /* ------------------------------------------------------------------------------*/ 193 /* fast, crude version requires O(N*N) work */ 194 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 195 /* loop over columns*/ 196 for (j=0; j<n; j++) { 197 col = is[j]; 198 rows = cj + ci[col]; 199 m = ci[col+1] - ci[col]; 200 /* loop over columns marking them in rowhit */ 201 for (k=0; k<m; k++) { 202 rowhit[*rows++] = col + 1; 203 } 204 } 205 /* count the number of hits */ 206 nrows = 0; 207 for (j=0; j<N; j++) { 208 if (rowhit[j]) nrows++; 209 } 210 c->nrows[i] = nrows; 211 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 212 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 213 nrows = 0; 214 for (j=0; j<N; j++) { 215 if (rowhit[j]) { 216 c->rows[i][nrows] = j; 217 c->columnsforrow[i][nrows] = rowhit[j] - 1; 218 nrows++; 219 } 220 } 221 } else { /*-------------------------------------------------------------------------------*/ 222 /* slow version, using rowhit as a linked list */ 223 PetscInt currentcol,fm,mfm; 224 rowhit[N] = N; 225 nrows = 0; 226 /* loop over columns */ 227 for (j=0; j<n; j++) { 228 col = is[j]; 229 rows = cj + ci[col]; 230 m = ci[col+1] - ci[col]; 231 /* loop over columns marking them in rowhit */ 232 fm = N; /* fm points to first entry in linked list */ 233 for (k=0; k<m; k++) { 234 currentcol = *rows++; 235 /* is it already in the list? */ 236 do { 237 mfm = fm; 238 fm = rowhit[fm]; 239 } while (fm < currentcol); 240 /* not in list so add it */ 241 if (fm != currentcol) { 242 nrows++; 243 columnsforrow[currentcol] = col; 244 /* next three lines insert new entry into linked list */ 245 rowhit[mfm] = currentcol; 246 rowhit[currentcol] = fm; 247 fm = currentcol; 248 /* fm points to present position in list since we know the columns are sorted */ 249 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 250 } 251 } 252 c->nrows[i] = nrows; 253 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 254 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 255 /* now store the linked list of rows into c->rows[i] */ 256 nrows = 0; 257 fm = rowhit[N]; 258 do { 259 c->rows[i][nrows] = fm; 260 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 261 fm = rowhit[fm]; 262 } while (fm < N); 263 } /* ---------------------------------------------------------------------------------------*/ 264 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 265 } 266 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 267 268 ierr = PetscFree(rowhit);CHKERRQ(ierr); 269 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 270 271 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 272 /* 273 see the version for MPIAIJ 274 */ 275 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 276 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 277 for (k=0; k<c->ncolors; k++) { 278 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 279 for (l=0; l<c->nrows[k]; l++) { 280 col = c->columnsforrow[k][l]; 281 282 c->vscaleforrow[k][l] = col; 283 } 284 } 285 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289