1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 6 /* 7 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 8 */ 9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 10 { 11 PetscErrorCode ierr; 12 PetscInt i,n,nrows,N,j,k,m,ncols,col; 13 const PetscInt *is,*rows,*ci,*cj; 14 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs=1,*spidx,*spidxhit,*den2sp,*den2sp_i; 15 IS *isa; 16 PetscBool flg1; 17 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 18 19 PetscFunctionBegin; 20 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 21 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 22 SeqBAIJ calls this routine, thus check it */ 23 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 24 if (flg1) { 25 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 26 } 27 28 N = mat->cmap->N/bs; 29 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 30 c->N = mat->cmap->N/bs; 31 c->m = mat->rmap->N/bs; 32 c->rstart = 0; 33 34 c->ncolors = nis; 35 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 36 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 37 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 38 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 39 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 40 ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr); 41 den2sp_i = den2sp; 42 43 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 44 45 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 46 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 47 48 for (i=0; i<nis; i++) { /* loop over colors */ 49 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 50 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 51 52 c->ncolumns[i] = n; 53 if (n) { 54 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 55 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 56 } else { 57 c->columns[i] = 0; 58 } 59 60 /* fast, crude version requires O(N*N) work */ 61 nrows = 0; 62 for (j=0; j<n; j++) { /* loop over columns */ 63 col = is[j]; 64 rows = cj + ci[col]; 65 m = ci[col+1] - ci[col]; 66 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 67 rowhit[*rows] = col + 1; 68 spidxhit[*rows++] = spidx[ci[col] + k]; 69 nrows++; 70 } 71 } 72 c->nrows[i] = nrows; /* total num of rows for this color */ 73 74 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 75 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 76 nrows = 0; 77 for (j=0; j<N; j++) { /* loop over rows */ 78 if (rowhit[j]) { 79 c->rows[i][nrows] = j; /* row index */ 80 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */ 81 den2sp_i[nrows++] = spidxhit[j]; 82 rowhit[j] = 0.0; /* zero rowhit for reuse */ 83 } 84 } 85 den2sp_i += nrows; 86 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 87 } 88 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 89 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 90 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 91 92 c->den2sp = den2sp; 93 c->ctype = IS_COLORING_GHOSTED; 94 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 95 PetscFunctionReturn(0); 96 } 97 98 /* --------------------------------------------------------------- */ 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 102 /* 103 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 104 This is why it has the ugly code with the MatGetBlockSize() 105 */ 106 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 107 { 108 PetscErrorCode ierr; 109 PetscInt i,n,nrows,N,j,k,m,ncols,col; 110 const PetscInt *is,*rows,*ci,*cj; 111 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 112 IS *isa; 113 PetscBool done,flg = PETSC_FALSE; 114 PetscBool flg1; 115 PetscBool new_impl=PETSC_FALSE; 116 117 PetscFunctionBegin; 118 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 119 if (new_impl){ 120 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 124 125 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 126 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 127 SeqBAIJ calls this routine, thus check it */ 128 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 129 if (flg1) { 130 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 131 } 132 133 N = mat->cmap->N/bs; 134 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 135 c->N = mat->cmap->N/bs; 136 c->m = mat->rmap->N/bs; 137 c->rstart = 0; 138 139 c->ncolors = nis; 140 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 141 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 142 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 143 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 144 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 145 146 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 147 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 148 149 /* 150 Temporary option to allow for debugging/testing 151 */ 152 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 153 154 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 155 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 156 157 for (i=0; i<nis; i++) { 158 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 159 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 160 161 c->ncolumns[i] = n; 162 if (n) { 163 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 164 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 165 } else { 166 c->columns[i] = 0; 167 } 168 169 if (!flg) { /* ------------------------------------------------------------------------------*/ 170 /* fast, crude version requires O(N*N) work */ 171 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 172 /* loop over columns*/ 173 for (j=0; j<n; j++) { 174 col = is[j]; 175 rows = cj + ci[col]; 176 m = ci[col+1] - ci[col]; 177 /* loop over columns marking them in rowhit */ 178 for (k=0; k<m; k++) { 179 rowhit[*rows++] = col + 1; 180 } 181 } 182 /* count the number of hits */ 183 nrows = 0; 184 for (j=0; j<N; j++) { 185 if (rowhit[j]) nrows++; 186 } 187 c->nrows[i] = nrows; 188 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 189 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 190 nrows = 0; 191 for (j=0; j<N; j++) { 192 if (rowhit[j]) { 193 c->rows[i][nrows] = j; 194 c->columnsforrow[i][nrows] = rowhit[j] - 1; 195 nrows++; 196 } 197 } 198 } else { /*-------------------------------------------------------------------------------*/ 199 /* slow version, using rowhit as a linked list */ 200 PetscInt currentcol,fm,mfm; 201 rowhit[N] = N; 202 nrows = 0; 203 /* loop over columns */ 204 for (j=0; j<n; j++) { 205 col = is[j]; 206 rows = cj + ci[col]; 207 m = ci[col+1] - ci[col]; 208 /* loop over columns marking them in rowhit */ 209 fm = N; /* fm points to first entry in linked list */ 210 for (k=0; k<m; k++) { 211 currentcol = *rows++; 212 /* is it already in the list? */ 213 do { 214 mfm = fm; 215 fm = rowhit[fm]; 216 } while (fm < currentcol); 217 /* not in list so add it */ 218 if (fm != currentcol) { 219 nrows++; 220 columnsforrow[currentcol] = col; 221 /* next three lines insert new entry into linked list */ 222 rowhit[mfm] = currentcol; 223 rowhit[currentcol] = fm; 224 fm = currentcol; 225 /* fm points to present position in list since we know the columns are sorted */ 226 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 227 } 228 } 229 c->nrows[i] = nrows; 230 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 231 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 232 /* now store the linked list of rows into c->rows[i] */ 233 nrows = 0; 234 fm = rowhit[N]; 235 do { 236 c->rows[i][nrows] = fm; 237 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 238 fm = rowhit[fm]; 239 } while (fm < N); 240 } /* ---------------------------------------------------------------------------------------*/ 241 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 242 } 243 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 244 245 ierr = PetscFree(rowhit);CHKERRQ(ierr); 246 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 247 248 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 249 /* 250 see the version for MPIAIJ 251 */ 252 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 253 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 254 for (k=0; k<c->ncolors; k++) { 255 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 256 for (l=0; l<c->nrows[k]; l++) { 257 col = c->columnsforrow[k][l]; 258 259 c->vscaleforrow[k][l] = col; 260 } 261 } 262 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266