xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 476e0d0aa5a21558d06346d6913ded26b2072ae9)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3639f9d9dSBarry Smith 
479c1e64dSHong Zhang #undef __FUNCT__
579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
679c1e64dSHong Zhang /*
779c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
879c1e64dSHong Zhang */
979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
1079c1e64dSHong Zhang {
1179c1e64dSHong Zhang   PetscErrorCode ierr;
1279c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1379c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
14*476e0d0aSHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs=1,*spidx,*spidxhit,*den2sp,*den2sp_i;
1579c1e64dSHong Zhang   IS             *isa;
16*476e0d0aSHong Zhang   PetscBool      flg1;
1779c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1879c1e64dSHong Zhang 
1979c1e64dSHong Zhang   PetscFunctionBegin;
2079c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
21*476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
22*476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
2379c1e64dSHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
24*476e0d0aSHong Zhang   if (flg1) {
2579c1e64dSHong Zhang     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2679c1e64dSHong Zhang   }
2779c1e64dSHong Zhang 
2879c1e64dSHong Zhang   N         = mat->cmap->N/bs;
2979c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3079c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
3179c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
3279c1e64dSHong Zhang   c->rstart = 0;
3379c1e64dSHong Zhang 
3479c1e64dSHong Zhang   c->ncolors = nis;
3579c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
3679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
3779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
3879c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
3979c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
4079c1e64dSHong Zhang   ierr       = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr);
4179c1e64dSHong Zhang   den2sp_i   = den2sp;
4279c1e64dSHong Zhang 
436378f32dSHong Zhang   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
4479c1e64dSHong Zhang 
45*476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
4679c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
4779c1e64dSHong Zhang 
4879c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
4979c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
5079c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
5179c1e64dSHong Zhang 
5279c1e64dSHong Zhang     c->ncolumns[i] = n;
5379c1e64dSHong Zhang     if (n) {
5479c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
5579c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
5679c1e64dSHong Zhang     } else {
5779c1e64dSHong Zhang       c->columns[i] = 0;
5879c1e64dSHong Zhang     }
5979c1e64dSHong Zhang 
6079c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
61*476e0d0aSHong Zhang     nrows = 0;
6279c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
6379c1e64dSHong Zhang       col  = is[j];
6479c1e64dSHong Zhang       rows = cj + ci[col];
6579c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
6679c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
6779c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
68*476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
69*476e0d0aSHong Zhang         nrows++;
7079c1e64dSHong Zhang       }
7179c1e64dSHong Zhang     }
72*476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
7379c1e64dSHong Zhang 
7479c1e64dSHong Zhang     ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
7579c1e64dSHong Zhang     ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
7679c1e64dSHong Zhang     nrows = 0;
7779c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
7879c1e64dSHong Zhang       if (rowhit[j]) {
7979c1e64dSHong Zhang         c->rows[i][nrows]          = j;             /* row index */
8079c1e64dSHong Zhang         c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */
8179c1e64dSHong Zhang         den2sp_i[nrows++]          = spidxhit[j];
8279c1e64dSHong Zhang         rowhit[j] = 0.0; /* zero rowhit for reuse */
8379c1e64dSHong Zhang       }
8479c1e64dSHong Zhang     }
8579c1e64dSHong Zhang     den2sp_i += nrows;
8679c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
8779c1e64dSHong Zhang   }
886378f32dSHong Zhang   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
89*476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
9079c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
91*476e0d0aSHong Zhang 
9279c1e64dSHong Zhang   c->den2sp                 = den2sp;
934737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
9479c1e64dSHong Zhang   mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
9579c1e64dSHong Zhang   PetscFunctionReturn(0);
9679c1e64dSHong Zhang }
9779c1e64dSHong Zhang 
9879c1e64dSHong Zhang /* --------------------------------------------------------------- */
9979c1e64dSHong Zhang 
1004a2ae208SSatish Balay #undef __FUNCT__
1014a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
1023acb8795SBarry Smith /*
1033acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
1043acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
1053acb8795SBarry Smith */
106dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
107639f9d9dSBarry Smith {
1086849ba73SBarry Smith   PetscErrorCode ierr;
1091a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1101a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
1113acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
112b9617806SBarry Smith   IS             *isa;
113ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
114*476e0d0aSHong Zhang   PetscBool      flg1;
11579c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
116639f9d9dSBarry Smith 
1173a40ed3dSBarry Smith   PetscFunctionBegin;
11879c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
11979c1e64dSHong Zhang   if (new_impl){
12079c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
12179c1e64dSHong Zhang     PetscFunctionReturn(0);
12279c1e64dSHong Zhang   }
123e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
124522c5e43SBarry Smith 
125b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
126*476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
127*476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
128251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
129*476e0d0aSHong Zhang   if (flg1) {
1303acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1313acb8795SBarry Smith   }
1323acb8795SBarry Smith 
1333acb8795SBarry Smith   N         = mat->cmap->N/bs;
1343acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
1353acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
1363acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
137005c665bSBarry Smith   c->rstart = 0;
138005c665bSBarry Smith 
139639f9d9dSBarry Smith   c->ncolors = nis;
14038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
14138baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
14238baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
14338baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
14438baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
14543a90d84SBarry Smith 
1463acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
147ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
14870c3da92SBarry Smith 
14970c3da92SBarry Smith   /*
150639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
15170c3da92SBarry Smith   */
1520298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
15370c3da92SBarry Smith 
15438baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
15538baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
15670c3da92SBarry Smith 
157639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
158b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
159639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
1602205254eSKarl Rupp 
161639f9d9dSBarry Smith     c->ncolumns[i] = n;
162639f9d9dSBarry Smith     if (n) {
16338baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
16438baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
16570c3da92SBarry Smith     } else {
166639f9d9dSBarry Smith       c->columns[i] = 0;
16770c3da92SBarry Smith     }
16870c3da92SBarry Smith 
1693a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
1703a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
17138baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
172639f9d9dSBarry Smith       /* loop over columns*/
173639f9d9dSBarry Smith       for (j=0; j<n; j++) {
174639f9d9dSBarry Smith         col  = is[j];
175639f9d9dSBarry Smith         rows = cj + ci[col];
176639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
177639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
178639f9d9dSBarry Smith         for (k=0; k<m; k++) {
179639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
18070c3da92SBarry Smith         }
18170c3da92SBarry Smith       }
182639f9d9dSBarry Smith       /* count the number of hits */
183639f9d9dSBarry Smith       nrows = 0;
18470c3da92SBarry Smith       for (j=0; j<N; j++) {
185639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
186639f9d9dSBarry Smith       }
187639f9d9dSBarry Smith       c->nrows[i] = nrows;
18838baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
18938baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
190639f9d9dSBarry Smith       nrows       = 0;
191639f9d9dSBarry Smith       for (j=0; j<N; j++) {
192639f9d9dSBarry Smith         if (rowhit[j]) {
193639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
194639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
195639f9d9dSBarry Smith           nrows++;
19670c3da92SBarry Smith         }
19770c3da92SBarry Smith       }
198639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
1993a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
20038baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
201639f9d9dSBarry Smith       rowhit[N] = N;
202639f9d9dSBarry Smith       nrows     = 0;
203639f9d9dSBarry Smith       /* loop over columns */
204639f9d9dSBarry Smith       for (j=0; j<n; j++) {
205639f9d9dSBarry Smith         col  = is[j];
206639f9d9dSBarry Smith         rows = cj + ci[col];
207639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
208639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
209639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
210639f9d9dSBarry Smith         for (k=0; k<m; k++) {
211639f9d9dSBarry Smith           currentcol = *rows++;
212639f9d9dSBarry Smith           /* is it already in the list? */
213639f9d9dSBarry Smith           do {
214639f9d9dSBarry Smith             mfm = fm;
215639f9d9dSBarry Smith             fm  = rowhit[fm];
216639f9d9dSBarry Smith           } while (fm < currentcol);
217639f9d9dSBarry Smith           /* not in list so add it */
218639f9d9dSBarry Smith           if (fm != currentcol) {
219639f9d9dSBarry Smith             nrows++;
220639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
221639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
222639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
223639f9d9dSBarry Smith             rowhit[currentcol] = fm;
224639f9d9dSBarry Smith             fm                 = currentcol;
225639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
22671cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
227639f9d9dSBarry Smith         }
228639f9d9dSBarry Smith       }
229639f9d9dSBarry Smith       c->nrows[i] = nrows;
23038baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
23138baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
232639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
233639f9d9dSBarry Smith       nrows = 0;
234639f9d9dSBarry Smith       fm    = rowhit[N];
235639f9d9dSBarry Smith       do {
236639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
237639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
238639f9d9dSBarry Smith         fm                           = rowhit[fm];
239639f9d9dSBarry Smith       } while (fm < N);
240639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
241639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
242639f9d9dSBarry Smith   }
2433acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
244639f9d9dSBarry Smith 
245606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
246606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
247639f9d9dSBarry Smith 
24830b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
24930b34957SBarry Smith   /*
25030b34957SBarry Smith        see the version for MPIAIJ
25130b34957SBarry Smith   */
252ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
25338baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
25430b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
25538baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
25630b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
25730b34957SBarry Smith       col = c->columnsforrow[k][l];
2582205254eSKarl Rupp 
25930b34957SBarry Smith       c->vscaleforrow[k][l] = col;
26030b34957SBarry Smith     }
26130b34957SBarry Smith   }
262b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2633a40ed3dSBarry Smith   PetscFunctionReturn(0);
26470c3da92SBarry Smith }
26579c1e64dSHong Zhang 
266