170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3639f9d9dSBarry Smith 479c1e64dSHong Zhang #undef __FUNCT__ 579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 679c1e64dSHong Zhang /* 779c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 879c1e64dSHong Zhang */ 979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 1079c1e64dSHong Zhang { 1179c1e64dSHong Zhang PetscErrorCode ierr; 1279c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 1379c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 1479c1e64dSHong Zhang PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1,*spidx,*spidxhit,*den2sp,*den2sp_i; 1579c1e64dSHong Zhang IS *isa; 1679c1e64dSHong Zhang PetscBool flg1,flg2; 1779c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1879c1e64dSHong Zhang 1979c1e64dSHong Zhang PetscFunctionBegin; 2079c1e64dSHong Zhang if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 2179c1e64dSHong Zhang 2279c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2379c1e64dSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 2479c1e64dSHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 2579c1e64dSHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2679c1e64dSHong Zhang if (flg1 || flg2) { 2779c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2879c1e64dSHong Zhang } 2979c1e64dSHong Zhang 3079c1e64dSHong Zhang N = mat->cmap->N/bs; 3179c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 3279c1e64dSHong Zhang c->N = mat->cmap->N/bs; 3379c1e64dSHong Zhang c->m = mat->rmap->N/bs; 3479c1e64dSHong Zhang c->rstart = 0; 3579c1e64dSHong Zhang 3679c1e64dSHong Zhang c->ncolors = nis; 3779c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 3879c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 3979c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 4079c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 4179c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 4279c1e64dSHong Zhang ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr); 4379c1e64dSHong Zhang den2sp_i = den2sp; 4479c1e64dSHong Zhang 45*6378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 4679c1e64dSHong Zhang 4779c1e64dSHong Zhang ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 4879c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 4979c1e64dSHong Zhang ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); // N=mat->cmap->N/bs or c->M ? 5079c1e64dSHong Zhang ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&spidxhit);CHKERRQ(ierr); 5179c1e64dSHong Zhang 5279c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 5379c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 5479c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 5579c1e64dSHong Zhang 5679c1e64dSHong Zhang c->ncolumns[i] = n; 5779c1e64dSHong Zhang if (n) { 5879c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 5979c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 6079c1e64dSHong Zhang } else { 6179c1e64dSHong Zhang c->columns[i] = 0; 6279c1e64dSHong Zhang } 6379c1e64dSHong Zhang 6479c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 6579c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 6679c1e64dSHong Zhang col = is[j]; 6779c1e64dSHong Zhang rows = cj + ci[col]; 6879c1e64dSHong Zhang m = ci[col+1] - ci[col]; 6979c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 7079c1e64dSHong Zhang rowhit[*rows] = col + 1; 7179c1e64dSHong Zhang spidxhit[*rows] = spidx[ci[col] + k]; 7279c1e64dSHong Zhang rows++; 7379c1e64dSHong Zhang } 7479c1e64dSHong Zhang } 7579c1e64dSHong Zhang /* get num of rows by counting the number of hits */ 7679c1e64dSHong Zhang nrows = 0; 7779c1e64dSHong Zhang for (j=0; j<N; j++) { 7879c1e64dSHong Zhang if (rowhit[j]) nrows++; 7979c1e64dSHong Zhang } 8079c1e64dSHong Zhang c->nrows[i] = nrows; 8179c1e64dSHong Zhang 8279c1e64dSHong Zhang ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 8379c1e64dSHong Zhang ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 8479c1e64dSHong Zhang nrows = 0; 8579c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 8679c1e64dSHong Zhang if (rowhit[j]) { 8779c1e64dSHong Zhang c->rows[i][nrows] = j; /* row index */ 8879c1e64dSHong Zhang c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */ 8979c1e64dSHong Zhang den2sp_i[nrows++] = spidxhit[j]; 9079c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 9179c1e64dSHong Zhang } 9279c1e64dSHong Zhang } 9379c1e64dSHong Zhang den2sp_i += nrows; 9479c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 9579c1e64dSHong Zhang } 96*6378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 9779c1e64dSHong Zhang 9879c1e64dSHong Zhang ierr = PetscFree(rowhit);CHKERRQ(ierr); 9979c1e64dSHong Zhang ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 10079c1e64dSHong Zhang ierr = PetscFree(spidxhit);CHKERRQ(ierr); 10179c1e64dSHong Zhang 10279c1e64dSHong Zhang /* Optimize by adding the vscale, and scaleforrow[][] fields -- needed for seqaij??? */ 10379c1e64dSHong Zhang /* 10479c1e64dSHong Zhang see the version for MPIAIJ 10579c1e64dSHong Zhang */ 10679c1e64dSHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 10779c1e64dSHong Zhang ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 10879c1e64dSHong Zhang for (k=0; k<c->ncolors; k++) { 10979c1e64dSHong Zhang ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 11079c1e64dSHong Zhang for (l=0; l<c->nrows[k]; l++) { 11179c1e64dSHong Zhang col = c->columnsforrow[k][l]; 11279c1e64dSHong Zhang 11379c1e64dSHong Zhang c->vscaleforrow[k][l] = col; 11479c1e64dSHong Zhang } 11579c1e64dSHong Zhang } 11679c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 11779c1e64dSHong Zhang c->den2sp = den2sp; 11879c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 11979c1e64dSHong Zhang PetscFunctionReturn(0); 12079c1e64dSHong Zhang } 12179c1e64dSHong Zhang 12279c1e64dSHong Zhang /* --------------------------------------------------------------- */ 12379c1e64dSHong Zhang 1244a2ae208SSatish Balay #undef __FUNCT__ 1254a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 1263acb8795SBarry Smith /* 1273acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 1283acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 1293acb8795SBarry Smith */ 130dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 131639f9d9dSBarry Smith { 1326849ba73SBarry Smith PetscErrorCode ierr; 1331a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 1341a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 1353acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 136b9617806SBarry Smith IS *isa; 137ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 138ace3abfcSBarry Smith PetscBool flg1,flg2; 13979c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 140639f9d9dSBarry Smith 1413a40ed3dSBarry Smith PetscFunctionBegin; 14279c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 14379c1e64dSHong Zhang if (new_impl){ 14479c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 14579c1e64dSHong Zhang PetscFunctionReturn(0); 14679c1e64dSHong Zhang } 147e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 148522c5e43SBarry Smith 149b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1503acb8795SBarry Smith /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 151251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 152251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1533acb8795SBarry Smith if (flg1 || flg2) { 1543acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1553acb8795SBarry Smith } 1563acb8795SBarry Smith 1573acb8795SBarry Smith N = mat->cmap->N/bs; 1583acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1593acb8795SBarry Smith c->N = mat->cmap->N/bs; 1603acb8795SBarry Smith c->m = mat->rmap->N/bs; 161005c665bSBarry Smith c->rstart = 0; 162005c665bSBarry Smith 163639f9d9dSBarry Smith c->ncolors = nis; 16438baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 16538baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 16638baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 16738baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 16838baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 16943a90d84SBarry Smith 1703acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 171ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 17270c3da92SBarry Smith 17370c3da92SBarry Smith /* 174639f9d9dSBarry Smith Temporary option to allow for debugging/testing 17570c3da92SBarry Smith */ 1760298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 17770c3da92SBarry Smith 17838baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 17938baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 18070c3da92SBarry Smith 181639f9d9dSBarry Smith for (i=0; i<nis; i++) { 182b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 183639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1842205254eSKarl Rupp 185639f9d9dSBarry Smith c->ncolumns[i] = n; 186639f9d9dSBarry Smith if (n) { 18738baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 18838baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 18970c3da92SBarry Smith } else { 190639f9d9dSBarry Smith c->columns[i] = 0; 19170c3da92SBarry Smith } 19270c3da92SBarry Smith 1933a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 1943a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 19538baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 196639f9d9dSBarry Smith /* loop over columns*/ 197639f9d9dSBarry Smith for (j=0; j<n; j++) { 198639f9d9dSBarry Smith col = is[j]; 199639f9d9dSBarry Smith rows = cj + ci[col]; 200639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 201639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 202639f9d9dSBarry Smith for (k=0; k<m; k++) { 203639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 20470c3da92SBarry Smith } 20570c3da92SBarry Smith } 206639f9d9dSBarry Smith /* count the number of hits */ 207639f9d9dSBarry Smith nrows = 0; 20870c3da92SBarry Smith for (j=0; j<N; j++) { 209639f9d9dSBarry Smith if (rowhit[j]) nrows++; 210639f9d9dSBarry Smith } 211639f9d9dSBarry Smith c->nrows[i] = nrows; 21238baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 21338baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 214639f9d9dSBarry Smith nrows = 0; 215639f9d9dSBarry Smith for (j=0; j<N; j++) { 216639f9d9dSBarry Smith if (rowhit[j]) { 217639f9d9dSBarry Smith c->rows[i][nrows] = j; 218639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 219639f9d9dSBarry Smith nrows++; 22070c3da92SBarry Smith } 22170c3da92SBarry Smith } 222639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 2233a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 22438baddfdSBarry Smith PetscInt currentcol,fm,mfm; 225639f9d9dSBarry Smith rowhit[N] = N; 226639f9d9dSBarry Smith nrows = 0; 227639f9d9dSBarry Smith /* loop over columns */ 228639f9d9dSBarry Smith for (j=0; j<n; j++) { 229639f9d9dSBarry Smith col = is[j]; 230639f9d9dSBarry Smith rows = cj + ci[col]; 231639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 232639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 233639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 234639f9d9dSBarry Smith for (k=0; k<m; k++) { 235639f9d9dSBarry Smith currentcol = *rows++; 236639f9d9dSBarry Smith /* is it already in the list? */ 237639f9d9dSBarry Smith do { 238639f9d9dSBarry Smith mfm = fm; 239639f9d9dSBarry Smith fm = rowhit[fm]; 240639f9d9dSBarry Smith } while (fm < currentcol); 241639f9d9dSBarry Smith /* not in list so add it */ 242639f9d9dSBarry Smith if (fm != currentcol) { 243639f9d9dSBarry Smith nrows++; 244639f9d9dSBarry Smith columnsforrow[currentcol] = col; 245639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 246639f9d9dSBarry Smith rowhit[mfm] = currentcol; 247639f9d9dSBarry Smith rowhit[currentcol] = fm; 248639f9d9dSBarry Smith fm = currentcol; 249639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 25071cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 251639f9d9dSBarry Smith } 252639f9d9dSBarry Smith } 253639f9d9dSBarry Smith c->nrows[i] = nrows; 25438baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 25538baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 256639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 257639f9d9dSBarry Smith nrows = 0; 258639f9d9dSBarry Smith fm = rowhit[N]; 259639f9d9dSBarry Smith do { 260639f9d9dSBarry Smith c->rows[i][nrows] = fm; 261639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 262639f9d9dSBarry Smith fm = rowhit[fm]; 263639f9d9dSBarry Smith } while (fm < N); 264639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 265639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 266639f9d9dSBarry Smith } 2673acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 268639f9d9dSBarry Smith 269606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 270606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 271639f9d9dSBarry Smith 27230b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 27330b34957SBarry Smith /* 27430b34957SBarry Smith see the version for MPIAIJ 27530b34957SBarry Smith */ 276ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 27738baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 27830b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 27938baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 28030b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 28130b34957SBarry Smith col = c->columnsforrow[k][l]; 2822205254eSKarl Rupp 28330b34957SBarry Smith c->vscaleforrow[k][l] = col; 28430b34957SBarry Smith } 28530b34957SBarry Smith } 286b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2873a40ed3dSBarry Smith PetscFunctionReturn(0); 28870c3da92SBarry Smith } 28979c1e64dSHong Zhang 290