170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3639f9d9dSBarry Smith 479c1e64dSHong Zhang #undef __FUNCT__ 579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 679c1e64dSHong Zhang /* 779c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 879c1e64dSHong Zhang */ 979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 1079c1e64dSHong Zhang { 1179c1e64dSHong Zhang PetscErrorCode ierr; 1279c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 1379c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 1479c1e64dSHong Zhang PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1,*spidx,*spidxhit,*den2sp,*den2sp_i; 1579c1e64dSHong Zhang IS *isa; 1679c1e64dSHong Zhang PetscBool flg1,flg2; 1779c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1879c1e64dSHong Zhang 1979c1e64dSHong Zhang PetscFunctionBegin; 2079c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2179c1e64dSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 2279c1e64dSHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 2379c1e64dSHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2479c1e64dSHong Zhang if (flg1 || flg2) { 2579c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2679c1e64dSHong Zhang } 2779c1e64dSHong Zhang 2879c1e64dSHong Zhang N = mat->cmap->N/bs; 2979c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 3079c1e64dSHong Zhang c->N = mat->cmap->N/bs; 3179c1e64dSHong Zhang c->m = mat->rmap->N/bs; 3279c1e64dSHong Zhang c->rstart = 0; 3379c1e64dSHong Zhang 3479c1e64dSHong Zhang c->ncolors = nis; 3579c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 3679c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 3779c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 3879c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 3979c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 4079c1e64dSHong Zhang ierr = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr); 4179c1e64dSHong Zhang den2sp_i = den2sp; 4279c1e64dSHong Zhang 436378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 4479c1e64dSHong Zhang 4579c1e64dSHong Zhang ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 4679c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 4779c1e64dSHong Zhang ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); // N=mat->cmap->N/bs or c->M ? 4879c1e64dSHong Zhang ierr = PetscMalloc((c->m+1)*sizeof(PetscInt),&spidxhit);CHKERRQ(ierr); 4979c1e64dSHong Zhang 5079c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 5179c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 5279c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 5379c1e64dSHong Zhang 5479c1e64dSHong Zhang c->ncolumns[i] = n; 5579c1e64dSHong Zhang if (n) { 5679c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 5779c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 5879c1e64dSHong Zhang } else { 5979c1e64dSHong Zhang c->columns[i] = 0; 6079c1e64dSHong Zhang } 6179c1e64dSHong Zhang 6279c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 6379c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 6479c1e64dSHong Zhang col = is[j]; 6579c1e64dSHong Zhang rows = cj + ci[col]; 6679c1e64dSHong Zhang m = ci[col+1] - ci[col]; 6779c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 6879c1e64dSHong Zhang rowhit[*rows] = col + 1; 6979c1e64dSHong Zhang spidxhit[*rows] = spidx[ci[col] + k]; 7079c1e64dSHong Zhang rows++; 7179c1e64dSHong Zhang } 7279c1e64dSHong Zhang } 7379c1e64dSHong Zhang /* get num of rows by counting the number of hits */ 7479c1e64dSHong Zhang nrows = 0; 7579c1e64dSHong Zhang for (j=0; j<N; j++) { 7679c1e64dSHong Zhang if (rowhit[j]) nrows++; 7779c1e64dSHong Zhang } 7879c1e64dSHong Zhang c->nrows[i] = nrows; 7979c1e64dSHong Zhang 8079c1e64dSHong Zhang ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 8179c1e64dSHong Zhang ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 8279c1e64dSHong Zhang nrows = 0; 8379c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 8479c1e64dSHong Zhang if (rowhit[j]) { 8579c1e64dSHong Zhang c->rows[i][nrows] = j; /* row index */ 8679c1e64dSHong Zhang c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */ 8779c1e64dSHong Zhang den2sp_i[nrows++] = spidxhit[j]; 8879c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 8979c1e64dSHong Zhang } 9079c1e64dSHong Zhang } 9179c1e64dSHong Zhang den2sp_i += nrows; 9279c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 9379c1e64dSHong Zhang } 946378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 9579c1e64dSHong Zhang 9679c1e64dSHong Zhang ierr = PetscFree(rowhit);CHKERRQ(ierr); 9779c1e64dSHong Zhang ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 9879c1e64dSHong Zhang ierr = PetscFree(spidxhit);CHKERRQ(ierr); 9979c1e64dSHong Zhang 10079c1e64dSHong Zhang /* Optimize by adding the vscale, and scaleforrow[][] fields -- needed for seqaij??? */ 10179c1e64dSHong Zhang /* 10279c1e64dSHong Zhang see the version for MPIAIJ 10379c1e64dSHong Zhang */ 10479c1e64dSHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 10579c1e64dSHong Zhang ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 10679c1e64dSHong Zhang for (k=0; k<c->ncolors; k++) { 10779c1e64dSHong Zhang ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 10879c1e64dSHong Zhang for (l=0; l<c->nrows[k]; l++) { 10979c1e64dSHong Zhang col = c->columnsforrow[k][l]; 11079c1e64dSHong Zhang 11179c1e64dSHong Zhang c->vscaleforrow[k][l] = col; 11279c1e64dSHong Zhang } 11379c1e64dSHong Zhang } 11479c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 11579c1e64dSHong Zhang c->den2sp = den2sp; 116*4737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 11779c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 11879c1e64dSHong Zhang PetscFunctionReturn(0); 11979c1e64dSHong Zhang } 12079c1e64dSHong Zhang 12179c1e64dSHong Zhang /* --------------------------------------------------------------- */ 12279c1e64dSHong Zhang 1234a2ae208SSatish Balay #undef __FUNCT__ 1244a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 1253acb8795SBarry Smith /* 1263acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 1273acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 1283acb8795SBarry Smith */ 129dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 130639f9d9dSBarry Smith { 1316849ba73SBarry Smith PetscErrorCode ierr; 1321a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 1331a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 1343acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 135b9617806SBarry Smith IS *isa; 136ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 137ace3abfcSBarry Smith PetscBool flg1,flg2; 13879c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 139639f9d9dSBarry Smith 1403a40ed3dSBarry Smith PetscFunctionBegin; 14179c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 14279c1e64dSHong Zhang if (new_impl){ 14379c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 14479c1e64dSHong Zhang PetscFunctionReturn(0); 14579c1e64dSHong Zhang } 146e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 147522c5e43SBarry Smith 148b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1493acb8795SBarry Smith /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 150251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 151251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1523acb8795SBarry Smith if (flg1 || flg2) { 1533acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1543acb8795SBarry Smith } 1553acb8795SBarry Smith 1563acb8795SBarry Smith N = mat->cmap->N/bs; 1573acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1583acb8795SBarry Smith c->N = mat->cmap->N/bs; 1593acb8795SBarry Smith c->m = mat->rmap->N/bs; 160005c665bSBarry Smith c->rstart = 0; 161005c665bSBarry Smith 162639f9d9dSBarry Smith c->ncolors = nis; 16338baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 16438baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 16538baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 16638baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 16738baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 16843a90d84SBarry Smith 1693acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 170ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 17170c3da92SBarry Smith 17270c3da92SBarry Smith /* 173639f9d9dSBarry Smith Temporary option to allow for debugging/testing 17470c3da92SBarry Smith */ 1750298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 17670c3da92SBarry Smith 17738baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 17838baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 17970c3da92SBarry Smith 180639f9d9dSBarry Smith for (i=0; i<nis; i++) { 181b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 182639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1832205254eSKarl Rupp 184639f9d9dSBarry Smith c->ncolumns[i] = n; 185639f9d9dSBarry Smith if (n) { 18638baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 18738baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 18870c3da92SBarry Smith } else { 189639f9d9dSBarry Smith c->columns[i] = 0; 19070c3da92SBarry Smith } 19170c3da92SBarry Smith 1923a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 1933a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 19438baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 195639f9d9dSBarry Smith /* loop over columns*/ 196639f9d9dSBarry Smith for (j=0; j<n; j++) { 197639f9d9dSBarry Smith col = is[j]; 198639f9d9dSBarry Smith rows = cj + ci[col]; 199639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 200639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 201639f9d9dSBarry Smith for (k=0; k<m; k++) { 202639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 20370c3da92SBarry Smith } 20470c3da92SBarry Smith } 205639f9d9dSBarry Smith /* count the number of hits */ 206639f9d9dSBarry Smith nrows = 0; 20770c3da92SBarry Smith for (j=0; j<N; j++) { 208639f9d9dSBarry Smith if (rowhit[j]) nrows++; 209639f9d9dSBarry Smith } 210639f9d9dSBarry Smith c->nrows[i] = nrows; 21138baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 21238baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 213639f9d9dSBarry Smith nrows = 0; 214639f9d9dSBarry Smith for (j=0; j<N; j++) { 215639f9d9dSBarry Smith if (rowhit[j]) { 216639f9d9dSBarry Smith c->rows[i][nrows] = j; 217639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 218639f9d9dSBarry Smith nrows++; 21970c3da92SBarry Smith } 22070c3da92SBarry Smith } 221639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 2223a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 22338baddfdSBarry Smith PetscInt currentcol,fm,mfm; 224639f9d9dSBarry Smith rowhit[N] = N; 225639f9d9dSBarry Smith nrows = 0; 226639f9d9dSBarry Smith /* loop over columns */ 227639f9d9dSBarry Smith for (j=0; j<n; j++) { 228639f9d9dSBarry Smith col = is[j]; 229639f9d9dSBarry Smith rows = cj + ci[col]; 230639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 231639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 232639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 233639f9d9dSBarry Smith for (k=0; k<m; k++) { 234639f9d9dSBarry Smith currentcol = *rows++; 235639f9d9dSBarry Smith /* is it already in the list? */ 236639f9d9dSBarry Smith do { 237639f9d9dSBarry Smith mfm = fm; 238639f9d9dSBarry Smith fm = rowhit[fm]; 239639f9d9dSBarry Smith } while (fm < currentcol); 240639f9d9dSBarry Smith /* not in list so add it */ 241639f9d9dSBarry Smith if (fm != currentcol) { 242639f9d9dSBarry Smith nrows++; 243639f9d9dSBarry Smith columnsforrow[currentcol] = col; 244639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 245639f9d9dSBarry Smith rowhit[mfm] = currentcol; 246639f9d9dSBarry Smith rowhit[currentcol] = fm; 247639f9d9dSBarry Smith fm = currentcol; 248639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 24971cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 250639f9d9dSBarry Smith } 251639f9d9dSBarry Smith } 252639f9d9dSBarry Smith c->nrows[i] = nrows; 25338baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 25438baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 255639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 256639f9d9dSBarry Smith nrows = 0; 257639f9d9dSBarry Smith fm = rowhit[N]; 258639f9d9dSBarry Smith do { 259639f9d9dSBarry Smith c->rows[i][nrows] = fm; 260639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 261639f9d9dSBarry Smith fm = rowhit[fm]; 262639f9d9dSBarry Smith } while (fm < N); 263639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 264639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 265639f9d9dSBarry Smith } 2663acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 267639f9d9dSBarry Smith 268606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 269606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 270639f9d9dSBarry Smith 27130b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 27230b34957SBarry Smith /* 27330b34957SBarry Smith see the version for MPIAIJ 27430b34957SBarry Smith */ 275ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 27638baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 27730b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 27838baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 27930b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 28030b34957SBarry Smith col = c->columnsforrow[k][l]; 2812205254eSKarl Rupp 28230b34957SBarry Smith c->vscaleforrow[k][l] = col; 28330b34957SBarry Smith } 28430b34957SBarry Smith } 285b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2863a40ed3dSBarry Smith PetscFunctionReturn(0); 28770c3da92SBarry Smith } 28879c1e64dSHong Zhang 289