1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 6 /* 7 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 8 */ 9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 10 { 11 PetscErrorCode ierr; 12 PetscInt i,n,nrows,N,j,k,m,ncols,col; 13 const PetscInt *is,*rows,*ci,*cj; 14 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1,*spidx,*spidxhit,*den2sp,*den2sp_i; 15 IS *isa; 16 PetscBool flg1,flg2; 17 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 18 19 PetscFunctionBegin; 20 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 21 22 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 23 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 24 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 25 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 26 if (flg1 || flg2) { 27 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 28 } 29 30 N = mat->cmap->N/bs; 31 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 32 c->N = mat->cmap->N/bs; 33 c->m = mat->rmap->N/bs; 34 c->rstart = 0; 35 36 c->ncolors = nis; 37 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 38 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 39 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 40 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 41 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 42 ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr); 43 den2sp_i = den2sp; 44 45 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 46 47 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 48 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 49 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); // N=mat->cmap->N/bs or c->M ? 50 ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&spidxhit);CHKERRQ(ierr); 51 52 for (i=0; i<nis; i++) { /* loop over colors */ 53 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 54 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 55 56 c->ncolumns[i] = n; 57 if (n) { 58 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 59 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 60 } else { 61 c->columns[i] = 0; 62 } 63 64 /* fast, crude version requires O(N*N) work */ 65 for (j=0; j<n; j++) { /* loop over columns */ 66 col = is[j]; 67 rows = cj + ci[col]; 68 m = ci[col+1] - ci[col]; 69 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 70 rowhit[*rows] = col + 1; 71 spidxhit[*rows] = spidx[ci[col] + k]; 72 rows++; 73 } 74 } 75 /* get num of rows by counting the number of hits */ 76 nrows = 0; 77 for (j=0; j<N; j++) { 78 if (rowhit[j]) nrows++; 79 } 80 c->nrows[i] = nrows; 81 82 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 83 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 84 nrows = 0; 85 for (j=0; j<N; j++) { /* loop over rows */ 86 if (rowhit[j]) { 87 c->rows[i][nrows] = j; /* row index */ 88 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */ 89 den2sp_i[nrows++] = spidxhit[j]; 90 rowhit[j] = 0.0; /* zero rowhit for reuse */ 91 } 92 } 93 den2sp_i += nrows; 94 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 95 } 96 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 97 98 ierr = PetscFree(rowhit);CHKERRQ(ierr); 99 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 100 ierr = PetscFree(spidxhit);CHKERRQ(ierr); 101 102 /* Optimize by adding the vscale, and scaleforrow[][] fields -- needed for seqaij??? */ 103 /* 104 see the version for MPIAIJ 105 */ 106 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 107 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 108 for (k=0; k<c->ncolors; k++) { 109 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 110 for (l=0; l<c->nrows[k]; l++) { 111 col = c->columnsforrow[k][l]; 112 113 c->vscaleforrow[k][l] = col; 114 } 115 } 116 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 117 c->den2sp = den2sp; 118 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 119 PetscFunctionReturn(0); 120 } 121 122 /* --------------------------------------------------------------- */ 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 126 /* 127 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 128 This is why it has the ugly code with the MatGetBlockSize() 129 */ 130 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 131 { 132 PetscErrorCode ierr; 133 PetscInt i,n,nrows,N,j,k,m,ncols,col; 134 const PetscInt *is,*rows,*ci,*cj; 135 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 136 IS *isa; 137 PetscBool done,flg = PETSC_FALSE; 138 PetscBool flg1,flg2; 139 PetscBool new_impl=PETSC_FALSE; 140 141 PetscFunctionBegin; 142 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 143 if (new_impl){ 144 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 148 149 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 150 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 151 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 152 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 153 if (flg1 || flg2) { 154 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 155 } 156 157 N = mat->cmap->N/bs; 158 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 159 c->N = mat->cmap->N/bs; 160 c->m = mat->rmap->N/bs; 161 c->rstart = 0; 162 163 c->ncolors = nis; 164 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 165 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 166 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 167 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 168 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 169 170 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 171 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 172 173 /* 174 Temporary option to allow for debugging/testing 175 */ 176 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 177 178 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 179 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 180 181 for (i=0; i<nis; i++) { 182 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 183 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 184 185 c->ncolumns[i] = n; 186 if (n) { 187 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 188 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 189 } else { 190 c->columns[i] = 0; 191 } 192 193 if (!flg) { /* ------------------------------------------------------------------------------*/ 194 /* fast, crude version requires O(N*N) work */ 195 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 196 /* loop over columns*/ 197 for (j=0; j<n; j++) { 198 col = is[j]; 199 rows = cj + ci[col]; 200 m = ci[col+1] - ci[col]; 201 /* loop over columns marking them in rowhit */ 202 for (k=0; k<m; k++) { 203 rowhit[*rows++] = col + 1; 204 } 205 } 206 /* count the number of hits */ 207 nrows = 0; 208 for (j=0; j<N; j++) { 209 if (rowhit[j]) nrows++; 210 } 211 c->nrows[i] = nrows; 212 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 213 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 214 nrows = 0; 215 for (j=0; j<N; j++) { 216 if (rowhit[j]) { 217 c->rows[i][nrows] = j; 218 c->columnsforrow[i][nrows] = rowhit[j] - 1; 219 nrows++; 220 } 221 } 222 } else { /*-------------------------------------------------------------------------------*/ 223 /* slow version, using rowhit as a linked list */ 224 PetscInt currentcol,fm,mfm; 225 rowhit[N] = N; 226 nrows = 0; 227 /* loop over columns */ 228 for (j=0; j<n; j++) { 229 col = is[j]; 230 rows = cj + ci[col]; 231 m = ci[col+1] - ci[col]; 232 /* loop over columns marking them in rowhit */ 233 fm = N; /* fm points to first entry in linked list */ 234 for (k=0; k<m; k++) { 235 currentcol = *rows++; 236 /* is it already in the list? */ 237 do { 238 mfm = fm; 239 fm = rowhit[fm]; 240 } while (fm < currentcol); 241 /* not in list so add it */ 242 if (fm != currentcol) { 243 nrows++; 244 columnsforrow[currentcol] = col; 245 /* next three lines insert new entry into linked list */ 246 rowhit[mfm] = currentcol; 247 rowhit[currentcol] = fm; 248 fm = currentcol; 249 /* fm points to present position in list since we know the columns are sorted */ 250 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 251 } 252 } 253 c->nrows[i] = nrows; 254 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 255 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 256 /* now store the linked list of rows into c->rows[i] */ 257 nrows = 0; 258 fm = rowhit[N]; 259 do { 260 c->rows[i][nrows] = fm; 261 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 262 fm = rowhit[fm]; 263 } while (fm < N); 264 } /* ---------------------------------------------------------------------------------------*/ 265 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 266 } 267 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 268 269 ierr = PetscFree(rowhit);CHKERRQ(ierr); 270 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 271 272 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 273 /* 274 see the version for MPIAIJ 275 */ 276 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 277 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 278 for (k=0; k<c->ncolors; k++) { 279 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 280 for (l=0; l<c->nrows[k]; l++) { 281 col = c->columnsforrow[k][l]; 282 283 c->vscaleforrow[k][l] = col; 284 } 285 } 286 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290