xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 52011a10b53376dfcff65e768dfa248fdc1daf05)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3639f9d9dSBarry Smith 
479c1e64dSHong Zhang #undef __FUNCT__
579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
679c1e64dSHong Zhang /*
779c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
879c1e64dSHong Zhang */
979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
1079c1e64dSHong Zhang {
1179c1e64dSHong Zhang   PetscErrorCode ierr;
1279c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1379c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
14*52011a10SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz;
1579c1e64dSHong Zhang   IS             *isa;
16476e0d0aSHong Zhang   PetscBool      flg1;
1779c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1879c1e64dSHong Zhang 
1979c1e64dSHong Zhang   PetscFunctionBegin;
2079c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
21*52011a10SHong Zhang 
22476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
23476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
2479c1e64dSHong Zhang   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
25*52011a10SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
26*52011a10SHong Zhang   if (!flg1) {
27*52011a10SHong Zhang     bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */
2879c1e64dSHong Zhang   }
2979c1e64dSHong Zhang   N         = mat->cmap->N/bs;
3079c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3179c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
3279c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
3379c1e64dSHong Zhang   c->rstart = 0;
3479c1e64dSHong Zhang 
3579c1e64dSHong Zhang   c->ncolors = nis;
3679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
3779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
3879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
3985740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
4079c1e64dSHong Zhang 
416378f32dSHong Zhang   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
4279c1e64dSHong Zhang 
43476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
4479c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
4579c1e64dSHong Zhang 
4617a7dee1SHong Zhang   nz = 0;
4779c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
4879c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
4979c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
5079c1e64dSHong Zhang 
5179c1e64dSHong Zhang     c->ncolumns[i] = n;
5279c1e64dSHong Zhang     if (n) {
5379c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
5479c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
5579c1e64dSHong Zhang     } else {
5679c1e64dSHong Zhang       c->columns[i] = 0;
5779c1e64dSHong Zhang     }
5879c1e64dSHong Zhang 
5979c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
60476e0d0aSHong Zhang     nrows = 0;
6179c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
6279c1e64dSHong Zhang       col  = is[j];
6379c1e64dSHong Zhang       rows = cj + ci[col];
6479c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
6579c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
6679c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
67476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
68476e0d0aSHong Zhang         nrows++;
6979c1e64dSHong Zhang       }
7079c1e64dSHong Zhang     }
71476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
7279c1e64dSHong Zhang 
7379c1e64dSHong Zhang     nrows = 0;
7479c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
7579c1e64dSHong Zhang       if (rowhit[j]) {
7617a7dee1SHong Zhang         c->rowcolden2sp3[nz++]   = j;             /* row index */
7717a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
7817a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
7979c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
8079c1e64dSHong Zhang       }
8179c1e64dSHong Zhang     }
8279c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
8379c1e64dSHong Zhang   }
8417a7dee1SHong Zhang   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
8585740eacSHong Zhang 
866378f32dSHong Zhang   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
87476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
8879c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
89476e0d0aSHong Zhang 
904737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
9179c1e64dSHong Zhang   mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
92*52011a10SHong Zhang   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
9379c1e64dSHong Zhang   PetscFunctionReturn(0);
9479c1e64dSHong Zhang }
9579c1e64dSHong Zhang 
9679c1e64dSHong Zhang /* --------------------------------------------------------------- */
9779c1e64dSHong Zhang 
984a2ae208SSatish Balay #undef __FUNCT__
994a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
1003acb8795SBarry Smith /*
1013acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
1023acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
1033acb8795SBarry Smith */
104dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
105639f9d9dSBarry Smith {
1066849ba73SBarry Smith   PetscErrorCode ierr;
1071a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1081a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
1093acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
110b9617806SBarry Smith   IS             *isa;
111ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
112476e0d0aSHong Zhang   PetscBool      flg1;
11379c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
114639f9d9dSBarry Smith 
1153a40ed3dSBarry Smith   PetscFunctionBegin;
11679c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
11779c1e64dSHong Zhang   if (new_impl){
11879c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
11979c1e64dSHong Zhang     PetscFunctionReturn(0);
12079c1e64dSHong Zhang   }
121e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
122522c5e43SBarry Smith 
123b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
124476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
125476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
126251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
127476e0d0aSHong Zhang   if (flg1) {
1283acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1293acb8795SBarry Smith   }
1303acb8795SBarry Smith 
1313acb8795SBarry Smith   N         = mat->cmap->N/bs;
1323acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
1333acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
1343acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
135005c665bSBarry Smith   c->rstart = 0;
136005c665bSBarry Smith 
137639f9d9dSBarry Smith   c->ncolors = nis;
13838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
13938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
14038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
14138baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
14238baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
14343a90d84SBarry Smith 
1443acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
145ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
14670c3da92SBarry Smith 
14770c3da92SBarry Smith   /*
148639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
14970c3da92SBarry Smith   */
1500298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
15170c3da92SBarry Smith 
15238baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
15338baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
15470c3da92SBarry Smith 
155639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
156b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
157639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
1582205254eSKarl Rupp 
159639f9d9dSBarry Smith     c->ncolumns[i] = n;
160639f9d9dSBarry Smith     if (n) {
16138baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
16238baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
16370c3da92SBarry Smith     } else {
164639f9d9dSBarry Smith       c->columns[i] = 0;
16570c3da92SBarry Smith     }
16670c3da92SBarry Smith 
1673a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
1683a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
16938baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
170639f9d9dSBarry Smith       /* loop over columns*/
171639f9d9dSBarry Smith       for (j=0; j<n; j++) {
172639f9d9dSBarry Smith         col  = is[j];
173639f9d9dSBarry Smith         rows = cj + ci[col];
174639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
175639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
176639f9d9dSBarry Smith         for (k=0; k<m; k++) {
177639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
17870c3da92SBarry Smith         }
17970c3da92SBarry Smith       }
180639f9d9dSBarry Smith       /* count the number of hits */
181639f9d9dSBarry Smith       nrows = 0;
18270c3da92SBarry Smith       for (j=0; j<N; j++) {
183639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
184639f9d9dSBarry Smith       }
185639f9d9dSBarry Smith       c->nrows[i] = nrows;
18638baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
18738baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
188639f9d9dSBarry Smith       nrows       = 0;
189639f9d9dSBarry Smith       for (j=0; j<N; j++) {
190639f9d9dSBarry Smith         if (rowhit[j]) {
191639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
192639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
193639f9d9dSBarry Smith           nrows++;
19470c3da92SBarry Smith         }
19570c3da92SBarry Smith       }
196639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
1973a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
19838baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
199639f9d9dSBarry Smith       rowhit[N] = N;
200639f9d9dSBarry Smith       nrows     = 0;
201639f9d9dSBarry Smith       /* loop over columns */
202639f9d9dSBarry Smith       for (j=0; j<n; j++) {
203639f9d9dSBarry Smith         col  = is[j];
204639f9d9dSBarry Smith         rows = cj + ci[col];
205639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
206639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
207639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
208639f9d9dSBarry Smith         for (k=0; k<m; k++) {
209639f9d9dSBarry Smith           currentcol = *rows++;
210639f9d9dSBarry Smith           /* is it already in the list? */
211639f9d9dSBarry Smith           do {
212639f9d9dSBarry Smith             mfm = fm;
213639f9d9dSBarry Smith             fm  = rowhit[fm];
214639f9d9dSBarry Smith           } while (fm < currentcol);
215639f9d9dSBarry Smith           /* not in list so add it */
216639f9d9dSBarry Smith           if (fm != currentcol) {
217639f9d9dSBarry Smith             nrows++;
218639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
219639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
220639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
221639f9d9dSBarry Smith             rowhit[currentcol] = fm;
222639f9d9dSBarry Smith             fm                 = currentcol;
223639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
22471cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
225639f9d9dSBarry Smith         }
226639f9d9dSBarry Smith       }
227639f9d9dSBarry Smith       c->nrows[i] = nrows;
22838baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
22938baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
230639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
231639f9d9dSBarry Smith       nrows = 0;
232639f9d9dSBarry Smith       fm    = rowhit[N];
233639f9d9dSBarry Smith       do {
234639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
235639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
236639f9d9dSBarry Smith         fm                           = rowhit[fm];
237639f9d9dSBarry Smith       } while (fm < N);
238639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
239639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
240639f9d9dSBarry Smith   }
2413acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
242639f9d9dSBarry Smith 
243606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
244606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
245639f9d9dSBarry Smith 
24630b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
24730b34957SBarry Smith   /*
24830b34957SBarry Smith        see the version for MPIAIJ
24930b34957SBarry Smith   */
250ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
25138baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
25230b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
25338baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
25430b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
25530b34957SBarry Smith       col = c->columnsforrow[k][l];
2562205254eSKarl Rupp 
25730b34957SBarry Smith       c->vscaleforrow[k][l] = col;
25830b34957SBarry Smith     }
25930b34957SBarry Smith   }
260b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2613a40ed3dSBarry Smith   PetscFunctionReturn(0);
26270c3da92SBarry Smith }
26379c1e64dSHong Zhang 
264