xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 85740eac76d20a0cac9a36a21d5752ff4c70535a)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3639f9d9dSBarry Smith 
479c1e64dSHong Zhang #undef __FUNCT__
579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
679c1e64dSHong Zhang /*
779c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
879c1e64dSHong Zhang */
979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
1079c1e64dSHong Zhang {
1179c1e64dSHong Zhang   PetscErrorCode ierr;
1279c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1379c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
14476e0d0aSHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs=1,*spidx,*spidxhit,*den2sp,*den2sp_i;
1579c1e64dSHong Zhang   IS             *isa;
16476e0d0aSHong Zhang   PetscBool      flg1;
1779c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
18*85740eacSHong Zhang   PetscInt       nz_den=0;
1979c1e64dSHong Zhang 
2079c1e64dSHong Zhang   PetscFunctionBegin;
2179c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
22476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
23476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
2479c1e64dSHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
25476e0d0aSHong Zhang   if (flg1) {
2679c1e64dSHong Zhang     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2779c1e64dSHong Zhang   }
2879c1e64dSHong Zhang 
2979c1e64dSHong Zhang   N         = mat->cmap->N/bs;
3079c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3179c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
3279c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
3379c1e64dSHong Zhang   c->rstart = 0;
3479c1e64dSHong Zhang 
3579c1e64dSHong Zhang   c->ncolors = nis;
3679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
3779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
3879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
3979c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
4079c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
4179c1e64dSHong Zhang   ierr       = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr);
4279c1e64dSHong Zhang   den2sp_i   = den2sp;
43*85740eacSHong Zhang   ierr       = PetscMalloc2(csp->nz,PetscInt,&c->colorforrow,csp->nz,PetscInt,&c->colorforcolumn);CHKERRQ(ierr);
44*85740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
4579c1e64dSHong Zhang 
466378f32dSHong Zhang   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
4779c1e64dSHong Zhang 
48476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
4979c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
5079c1e64dSHong Zhang 
5179c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
5279c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
5379c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
5479c1e64dSHong Zhang 
5579c1e64dSHong Zhang     c->ncolumns[i] = n;
5679c1e64dSHong Zhang     if (n) {
5779c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
5879c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
5979c1e64dSHong Zhang     } else {
6079c1e64dSHong Zhang       c->columns[i] = 0;
6179c1e64dSHong Zhang     }
6279c1e64dSHong Zhang 
6379c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
64476e0d0aSHong Zhang     nrows = 0;
6579c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
6679c1e64dSHong Zhang       col  = is[j];
6779c1e64dSHong Zhang       rows = cj + ci[col];
6879c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
6979c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
7079c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
71476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
72476e0d0aSHong Zhang         nrows++;
7379c1e64dSHong Zhang       }
7479c1e64dSHong Zhang     }
75476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
7679c1e64dSHong Zhang 
7779c1e64dSHong Zhang     ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
7879c1e64dSHong Zhang     ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
7979c1e64dSHong Zhang     nrows = 0;
8079c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
8179c1e64dSHong Zhang       if (rowhit[j]) {
8279c1e64dSHong Zhang         c->rows[i][nrows]          = j;             /* row index */
8379c1e64dSHong Zhang         c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */
8479c1e64dSHong Zhang         den2sp_i[nrows++]          = spidxhit[j];
85*85740eacSHong Zhang 
86*85740eacSHong Zhang         c->colorforrow[nz_den]    = j;             /* row index */
87*85740eacSHong Zhang         c->colorforcolumn[nz_den] = rowhit[j] - 1; /* column index */
88*85740eacSHong Zhang 
89*85740eacSHong Zhang         c->rowcolden2sp3[3*nz_den]   = j;             /* row index */
90*85740eacSHong Zhang         c->rowcolden2sp3[3*nz_den+1] = rowhit[j] - 1; /* column index */
91*85740eacSHong Zhang         c->rowcolden2sp3[3*nz_den+2] = spidxhit[j];
92*85740eacSHong Zhang 
93*85740eacSHong Zhang         nz_den++;
9479c1e64dSHong Zhang         rowhit[j] = 0.0; /* zero rowhit for reuse */
9579c1e64dSHong Zhang       }
9679c1e64dSHong Zhang     }
9779c1e64dSHong Zhang     den2sp_i += nrows;
9879c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
9979c1e64dSHong Zhang   }
100*85740eacSHong Zhang   printf("nz_den %d, csp->nz %d\n",nz_den,csp->nz);
101*85740eacSHong Zhang 
1026378f32dSHong Zhang   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
103476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
10479c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
105476e0d0aSHong Zhang 
10679c1e64dSHong Zhang   c->den2sp                 = den2sp;
1074737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
10879c1e64dSHong Zhang   mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
10979c1e64dSHong Zhang   PetscFunctionReturn(0);
11079c1e64dSHong Zhang }
11179c1e64dSHong Zhang 
11279c1e64dSHong Zhang /* --------------------------------------------------------------- */
11379c1e64dSHong Zhang 
1144a2ae208SSatish Balay #undef __FUNCT__
1154a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
1163acb8795SBarry Smith /*
1173acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
1183acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
1193acb8795SBarry Smith */
120dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
121639f9d9dSBarry Smith {
1226849ba73SBarry Smith   PetscErrorCode ierr;
1231a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1241a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
1253acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
126b9617806SBarry Smith   IS             *isa;
127ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
128476e0d0aSHong Zhang   PetscBool      flg1;
12979c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
130639f9d9dSBarry Smith 
1313a40ed3dSBarry Smith   PetscFunctionBegin;
13279c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
13379c1e64dSHong Zhang   if (new_impl){
13479c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
13579c1e64dSHong Zhang     PetscFunctionReturn(0);
13679c1e64dSHong Zhang   }
137e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
138522c5e43SBarry Smith 
139b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
140476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
141476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
142251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
143476e0d0aSHong Zhang   if (flg1) {
1443acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1453acb8795SBarry Smith   }
1463acb8795SBarry Smith 
1473acb8795SBarry Smith   N         = mat->cmap->N/bs;
1483acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
1493acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
1503acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
151005c665bSBarry Smith   c->rstart = 0;
152005c665bSBarry Smith 
153639f9d9dSBarry Smith   c->ncolors = nis;
15438baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
15538baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
15638baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
15738baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
15838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
15943a90d84SBarry Smith 
1603acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
161ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
16270c3da92SBarry Smith 
16370c3da92SBarry Smith   /*
164639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
16570c3da92SBarry Smith   */
1660298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
16770c3da92SBarry Smith 
16838baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
16938baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
17070c3da92SBarry Smith 
171639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
172b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
173639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
1742205254eSKarl Rupp 
175639f9d9dSBarry Smith     c->ncolumns[i] = n;
176639f9d9dSBarry Smith     if (n) {
17738baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
17838baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
17970c3da92SBarry Smith     } else {
180639f9d9dSBarry Smith       c->columns[i] = 0;
18170c3da92SBarry Smith     }
18270c3da92SBarry Smith 
1833a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
1843a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
18538baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
186639f9d9dSBarry Smith       /* loop over columns*/
187639f9d9dSBarry Smith       for (j=0; j<n; j++) {
188639f9d9dSBarry Smith         col  = is[j];
189639f9d9dSBarry Smith         rows = cj + ci[col];
190639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
191639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
192639f9d9dSBarry Smith         for (k=0; k<m; k++) {
193639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
19470c3da92SBarry Smith         }
19570c3da92SBarry Smith       }
196639f9d9dSBarry Smith       /* count the number of hits */
197639f9d9dSBarry Smith       nrows = 0;
19870c3da92SBarry Smith       for (j=0; j<N; j++) {
199639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
200639f9d9dSBarry Smith       }
201639f9d9dSBarry Smith       c->nrows[i] = nrows;
20238baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
20338baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
204639f9d9dSBarry Smith       nrows       = 0;
205639f9d9dSBarry Smith       for (j=0; j<N; j++) {
206639f9d9dSBarry Smith         if (rowhit[j]) {
207639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
208639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
209639f9d9dSBarry Smith           nrows++;
21070c3da92SBarry Smith         }
21170c3da92SBarry Smith       }
212639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
2133a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
21438baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
215639f9d9dSBarry Smith       rowhit[N] = N;
216639f9d9dSBarry Smith       nrows     = 0;
217639f9d9dSBarry Smith       /* loop over columns */
218639f9d9dSBarry Smith       for (j=0; j<n; j++) {
219639f9d9dSBarry Smith         col  = is[j];
220639f9d9dSBarry Smith         rows = cj + ci[col];
221639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
222639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
223639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
224639f9d9dSBarry Smith         for (k=0; k<m; k++) {
225639f9d9dSBarry Smith           currentcol = *rows++;
226639f9d9dSBarry Smith           /* is it already in the list? */
227639f9d9dSBarry Smith           do {
228639f9d9dSBarry Smith             mfm = fm;
229639f9d9dSBarry Smith             fm  = rowhit[fm];
230639f9d9dSBarry Smith           } while (fm < currentcol);
231639f9d9dSBarry Smith           /* not in list so add it */
232639f9d9dSBarry Smith           if (fm != currentcol) {
233639f9d9dSBarry Smith             nrows++;
234639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
235639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
236639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
237639f9d9dSBarry Smith             rowhit[currentcol] = fm;
238639f9d9dSBarry Smith             fm                 = currentcol;
239639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
24071cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
241639f9d9dSBarry Smith         }
242639f9d9dSBarry Smith       }
243639f9d9dSBarry Smith       c->nrows[i] = nrows;
24438baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
24538baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
246639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
247639f9d9dSBarry Smith       nrows = 0;
248639f9d9dSBarry Smith       fm    = rowhit[N];
249639f9d9dSBarry Smith       do {
250639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
251639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
252639f9d9dSBarry Smith         fm                           = rowhit[fm];
253639f9d9dSBarry Smith       } while (fm < N);
254639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
255639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
256639f9d9dSBarry Smith   }
2573acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
258639f9d9dSBarry Smith 
259606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
260606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
261639f9d9dSBarry Smith 
26230b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
26330b34957SBarry Smith   /*
26430b34957SBarry Smith        see the version for MPIAIJ
26530b34957SBarry Smith   */
266ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
26738baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
26830b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
26938baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
27030b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
27130b34957SBarry Smith       col = c->columnsforrow[k][l];
2722205254eSKarl Rupp 
27330b34957SBarry Smith       c->vscaleforrow[k][l] = col;
27430b34957SBarry Smith     }
27530b34957SBarry Smith   }
276b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2773a40ed3dSBarry Smith   PetscFunctionReturn(0);
27870c3da92SBarry Smith }
27979c1e64dSHong Zhang 
280