1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 6 /* 7 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 8 */ 9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 10 { 11 PetscErrorCode ierr; 12 PetscInt i,n,nrows,N,j,k,m,ncols,col; 13 const PetscInt *is,*rows,*ci,*cj; 14 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs=1,*spidx,*spidxhit,nz; 15 IS *isa; 16 PetscBool flg1; 17 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 18 19 PetscFunctionBegin; 20 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 21 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 22 SeqBAIJ calls this routine, thus check it */ 23 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 24 if (flg1) { 25 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 26 } 27 28 N = mat->cmap->N/bs; 29 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 30 c->N = mat->cmap->N/bs; 31 c->m = mat->rmap->N/bs; 32 c->rstart = 0; 33 34 c->ncolors = nis; 35 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 36 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 37 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 38 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 39 40 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 41 42 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 43 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 44 45 nz = 0; 46 for (i=0; i<nis; i++) { /* loop over colors */ 47 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 48 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 49 50 c->ncolumns[i] = n; 51 if (n) { 52 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 53 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 54 } else { 55 c->columns[i] = 0; 56 } 57 58 /* fast, crude version requires O(N*N) work */ 59 nrows = 0; 60 for (j=0; j<n; j++) { /* loop over columns */ 61 col = is[j]; 62 rows = cj + ci[col]; 63 m = ci[col+1] - ci[col]; 64 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 65 rowhit[*rows] = col + 1; 66 spidxhit[*rows++] = spidx[ci[col] + k]; 67 nrows++; 68 } 69 } 70 c->nrows[i] = nrows; /* total num of rows for this color */ 71 72 nrows = 0; 73 for (j=0; j<N; j++) { /* loop over rows */ 74 if (rowhit[j]) { 75 c->rowcolden2sp3[nz++] = j; /* row index */ 76 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 77 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 78 rowhit[j] = 0.0; /* zero rowhit for reuse */ 79 } 80 } 81 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 82 } 83 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 84 85 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 86 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 87 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 88 89 c->ctype = IS_COLORING_GHOSTED; 90 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 91 PetscFunctionReturn(0); 92 } 93 94 /* --------------------------------------------------------------- */ 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 98 /* 99 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 100 This is why it has the ugly code with the MatGetBlockSize() 101 */ 102 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 103 { 104 PetscErrorCode ierr; 105 PetscInt i,n,nrows,N,j,k,m,ncols,col; 106 const PetscInt *is,*rows,*ci,*cj; 107 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 108 IS *isa; 109 PetscBool done,flg = PETSC_FALSE; 110 PetscBool flg1; 111 PetscBool new_impl=PETSC_FALSE; 112 113 PetscFunctionBegin; 114 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 115 if (new_impl){ 116 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 120 121 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 122 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 123 SeqBAIJ calls this routine, thus check it */ 124 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 125 if (flg1) { 126 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 127 } 128 129 N = mat->cmap->N/bs; 130 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 131 c->N = mat->cmap->N/bs; 132 c->m = mat->rmap->N/bs; 133 c->rstart = 0; 134 135 c->ncolors = nis; 136 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 137 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 138 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 139 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 140 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 141 142 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 143 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 144 145 /* 146 Temporary option to allow for debugging/testing 147 */ 148 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 149 150 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 151 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 152 153 for (i=0; i<nis; i++) { 154 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 155 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 156 157 c->ncolumns[i] = n; 158 if (n) { 159 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 160 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 161 } else { 162 c->columns[i] = 0; 163 } 164 165 if (!flg) { /* ------------------------------------------------------------------------------*/ 166 /* fast, crude version requires O(N*N) work */ 167 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 168 /* loop over columns*/ 169 for (j=0; j<n; j++) { 170 col = is[j]; 171 rows = cj + ci[col]; 172 m = ci[col+1] - ci[col]; 173 /* loop over columns marking them in rowhit */ 174 for (k=0; k<m; k++) { 175 rowhit[*rows++] = col + 1; 176 } 177 } 178 /* count the number of hits */ 179 nrows = 0; 180 for (j=0; j<N; j++) { 181 if (rowhit[j]) nrows++; 182 } 183 c->nrows[i] = nrows; 184 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 185 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 186 nrows = 0; 187 for (j=0; j<N; j++) { 188 if (rowhit[j]) { 189 c->rows[i][nrows] = j; 190 c->columnsforrow[i][nrows] = rowhit[j] - 1; 191 nrows++; 192 } 193 } 194 } else { /*-------------------------------------------------------------------------------*/ 195 /* slow version, using rowhit as a linked list */ 196 PetscInt currentcol,fm,mfm; 197 rowhit[N] = N; 198 nrows = 0; 199 /* loop over columns */ 200 for (j=0; j<n; j++) { 201 col = is[j]; 202 rows = cj + ci[col]; 203 m = ci[col+1] - ci[col]; 204 /* loop over columns marking them in rowhit */ 205 fm = N; /* fm points to first entry in linked list */ 206 for (k=0; k<m; k++) { 207 currentcol = *rows++; 208 /* is it already in the list? */ 209 do { 210 mfm = fm; 211 fm = rowhit[fm]; 212 } while (fm < currentcol); 213 /* not in list so add it */ 214 if (fm != currentcol) { 215 nrows++; 216 columnsforrow[currentcol] = col; 217 /* next three lines insert new entry into linked list */ 218 rowhit[mfm] = currentcol; 219 rowhit[currentcol] = fm; 220 fm = currentcol; 221 /* fm points to present position in list since we know the columns are sorted */ 222 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 223 } 224 } 225 c->nrows[i] = nrows; 226 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 227 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 228 /* now store the linked list of rows into c->rows[i] */ 229 nrows = 0; 230 fm = rowhit[N]; 231 do { 232 c->rows[i][nrows] = fm; 233 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 234 fm = rowhit[fm]; 235 } while (fm < N); 236 } /* ---------------------------------------------------------------------------------------*/ 237 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 238 } 239 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 240 241 ierr = PetscFree(rowhit);CHKERRQ(ierr); 242 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 243 244 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 245 /* 246 see the version for MPIAIJ 247 */ 248 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 249 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 250 for (k=0; k<c->ncolors; k++) { 251 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 252 for (l=0; l<c->nrows[k]; l++) { 253 col = c->columnsforrow[k][l]; 254 255 c->vscaleforrow[k][l] = col; 256 } 257 } 258 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 262