xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 17a7dee193a40894b743a9285cb4ecacf5a39eae)
170c3da92SBarry Smith 
2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
3639f9d9dSBarry Smith 
479c1e64dSHong Zhang #undef __FUNCT__
579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
679c1e64dSHong Zhang /*
779c1e64dSHong Zhang     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
879c1e64dSHong Zhang */
979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
1079c1e64dSHong Zhang {
1179c1e64dSHong Zhang   PetscErrorCode ierr;
1279c1e64dSHong Zhang   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1379c1e64dSHong Zhang   const PetscInt *is,*rows,*ci,*cj;
14*17a7dee1SHong Zhang   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs=1,*spidx,*spidxhit,nz;
1579c1e64dSHong Zhang   IS             *isa;
16476e0d0aSHong Zhang   PetscBool      flg1;
1779c1e64dSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1879c1e64dSHong Zhang 
1979c1e64dSHong Zhang   PetscFunctionBegin;
2079c1e64dSHong Zhang   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
21476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
22476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
2379c1e64dSHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
24476e0d0aSHong Zhang   if (flg1) {
2579c1e64dSHong Zhang     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2679c1e64dSHong Zhang   }
2779c1e64dSHong Zhang 
2879c1e64dSHong Zhang   N         = mat->cmap->N/bs;
2979c1e64dSHong Zhang   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
3079c1e64dSHong Zhang   c->N      = mat->cmap->N/bs;
3179c1e64dSHong Zhang   c->m      = mat->rmap->N/bs;
3279c1e64dSHong Zhang   c->rstart = 0;
3379c1e64dSHong Zhang 
3479c1e64dSHong Zhang   c->ncolors = nis;
3579c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
3679c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
3779c1e64dSHong Zhang   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
3885740eacSHong Zhang   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
3979c1e64dSHong Zhang 
406378f32dSHong Zhang   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
4179c1e64dSHong Zhang 
42476e0d0aSHong Zhang   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
4379c1e64dSHong Zhang   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
4479c1e64dSHong Zhang 
45*17a7dee1SHong Zhang   nz = 0;
4679c1e64dSHong Zhang   for (i=0; i<nis; i++) { /* loop over colors */
4779c1e64dSHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
4879c1e64dSHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
4979c1e64dSHong Zhang 
5079c1e64dSHong Zhang     c->ncolumns[i] = n;
5179c1e64dSHong Zhang     if (n) {
5279c1e64dSHong Zhang       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
5379c1e64dSHong Zhang       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
5479c1e64dSHong Zhang     } else {
5579c1e64dSHong Zhang       c->columns[i] = 0;
5679c1e64dSHong Zhang     }
5779c1e64dSHong Zhang 
5879c1e64dSHong Zhang     /* fast, crude version requires O(N*N) work */
59476e0d0aSHong Zhang     nrows = 0;
6079c1e64dSHong Zhang     for (j=0; j<n; j++) {  /* loop over columns */
6179c1e64dSHong Zhang       col  = is[j];
6279c1e64dSHong Zhang       rows = cj + ci[col];
6379c1e64dSHong Zhang       m    = ci[col+1] - ci[col];
6479c1e64dSHong Zhang       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
6579c1e64dSHong Zhang         rowhit[*rows]     = col + 1;
66476e0d0aSHong Zhang         spidxhit[*rows++] = spidx[ci[col] + k];
67476e0d0aSHong Zhang         nrows++;
6879c1e64dSHong Zhang       }
6979c1e64dSHong Zhang     }
70476e0d0aSHong Zhang     c->nrows[i] = nrows; /* total num of rows for this color */
7179c1e64dSHong Zhang 
7279c1e64dSHong Zhang     nrows = 0;
7379c1e64dSHong Zhang     for (j=0; j<N; j++) { /* loop over rows */
7479c1e64dSHong Zhang       if (rowhit[j]) {
75*17a7dee1SHong Zhang         c->rowcolden2sp3[nz++]   = j;             /* row index */
76*17a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = rowhit[j] - 1;   /* column index */
77*17a7dee1SHong Zhang         c->rowcolden2sp3[nz++] = spidxhit[j];     /* den2sp index */
7879c1e64dSHong Zhang         rowhit[j] = 0.0;                          /* zero rowhit for reuse */
7979c1e64dSHong Zhang       }
8079c1e64dSHong Zhang     }
8179c1e64dSHong Zhang     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
8279c1e64dSHong Zhang   }
83*17a7dee1SHong Zhang   if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz);
8485740eacSHong Zhang 
856378f32dSHong Zhang   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
86476e0d0aSHong Zhang   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
8779c1e64dSHong Zhang   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
88476e0d0aSHong Zhang 
894737c552SHong Zhang   c->ctype                  = IS_COLORING_GHOSTED;
9079c1e64dSHong Zhang   mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
9179c1e64dSHong Zhang   PetscFunctionReturn(0);
9279c1e64dSHong Zhang }
9379c1e64dSHong Zhang 
9479c1e64dSHong Zhang /* --------------------------------------------------------------- */
9579c1e64dSHong Zhang 
964a2ae208SSatish Balay #undef __FUNCT__
974a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
983acb8795SBarry Smith /*
993acb8795SBarry Smith     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
1003acb8795SBarry Smith     This is why it has the ugly code with the MatGetBlockSize()
1013acb8795SBarry Smith */
102dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
103639f9d9dSBarry Smith {
1046849ba73SBarry Smith   PetscErrorCode ierr;
1051a83f524SJed Brown   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
1061a83f524SJed Brown   const PetscInt *is,*rows,*ci,*cj;
1073acb8795SBarry Smith   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
108b9617806SBarry Smith   IS             *isa;
109ace3abfcSBarry Smith   PetscBool      done,flg = PETSC_FALSE;
110476e0d0aSHong Zhang   PetscBool      flg1;
11179c1e64dSHong Zhang   PetscBool      new_impl=PETSC_FALSE;
112639f9d9dSBarry Smith 
1133a40ed3dSBarry Smith   PetscFunctionBegin;
11479c1e64dSHong Zhang   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
11579c1e64dSHong Zhang   if (new_impl){
11679c1e64dSHong Zhang     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
11779c1e64dSHong Zhang     PetscFunctionReturn(0);
11879c1e64dSHong Zhang   }
119e7e72b3dSBarry Smith   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
120522c5e43SBarry Smith 
121b9617806SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
122476e0d0aSHong Zhang   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
123476e0d0aSHong Zhang      SeqBAIJ calls this routine, thus check it */
124251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
125476e0d0aSHong Zhang   if (flg1) {
1263acb8795SBarry Smith     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1273acb8795SBarry Smith   }
1283acb8795SBarry Smith 
1293acb8795SBarry Smith   N         = mat->cmap->N/bs;
1303acb8795SBarry Smith   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
1313acb8795SBarry Smith   c->N      = mat->cmap->N/bs;
1323acb8795SBarry Smith   c->m      = mat->rmap->N/bs;
133005c665bSBarry Smith   c->rstart = 0;
134005c665bSBarry Smith 
135639f9d9dSBarry Smith   c->ncolors = nis;
13638baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
13738baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
13838baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
13938baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
14038baddfdSBarry Smith   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
14143a90d84SBarry Smith 
1423acb8795SBarry Smith   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
143ce94432eSBarry Smith   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
14470c3da92SBarry Smith 
14570c3da92SBarry Smith   /*
146639f9d9dSBarry Smith      Temporary option to allow for debugging/testing
14770c3da92SBarry Smith   */
1480298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
14970c3da92SBarry Smith 
15038baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
15138baddfdSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
15270c3da92SBarry Smith 
153639f9d9dSBarry Smith   for (i=0; i<nis; i++) {
154b9b97703SBarry Smith     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
155639f9d9dSBarry Smith     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
1562205254eSKarl Rupp 
157639f9d9dSBarry Smith     c->ncolumns[i] = n;
158639f9d9dSBarry Smith     if (n) {
15938baddfdSBarry Smith       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
16038baddfdSBarry Smith       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
16170c3da92SBarry Smith     } else {
162639f9d9dSBarry Smith       c->columns[i] = 0;
16370c3da92SBarry Smith     }
16470c3da92SBarry Smith 
1653a7fca6bSBarry Smith     if (!flg) { /* ------------------------------------------------------------------------------*/
1663a7fca6bSBarry Smith       /* fast, crude version requires O(N*N) work */
16738baddfdSBarry Smith       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
168639f9d9dSBarry Smith       /* loop over columns*/
169639f9d9dSBarry Smith       for (j=0; j<n; j++) {
170639f9d9dSBarry Smith         col  = is[j];
171639f9d9dSBarry Smith         rows = cj + ci[col];
172639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
173639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
174639f9d9dSBarry Smith         for (k=0; k<m; k++) {
175639f9d9dSBarry Smith           rowhit[*rows++] = col + 1;
17670c3da92SBarry Smith         }
17770c3da92SBarry Smith       }
178639f9d9dSBarry Smith       /* count the number of hits */
179639f9d9dSBarry Smith       nrows = 0;
18070c3da92SBarry Smith       for (j=0; j<N; j++) {
181639f9d9dSBarry Smith         if (rowhit[j]) nrows++;
182639f9d9dSBarry Smith       }
183639f9d9dSBarry Smith       c->nrows[i] = nrows;
18438baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
18538baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
186639f9d9dSBarry Smith       nrows       = 0;
187639f9d9dSBarry Smith       for (j=0; j<N; j++) {
188639f9d9dSBarry Smith         if (rowhit[j]) {
189639f9d9dSBarry Smith           c->rows[i][nrows]          = j;
190639f9d9dSBarry Smith           c->columnsforrow[i][nrows] = rowhit[j] - 1;
191639f9d9dSBarry Smith           nrows++;
19270c3da92SBarry Smith         }
19370c3da92SBarry Smith       }
194639f9d9dSBarry Smith     } else {  /*-------------------------------------------------------------------------------*/
1953a7fca6bSBarry Smith       /* slow version, using rowhit as a linked list */
19638baddfdSBarry Smith       PetscInt currentcol,fm,mfm;
197639f9d9dSBarry Smith       rowhit[N] = N;
198639f9d9dSBarry Smith       nrows     = 0;
199639f9d9dSBarry Smith       /* loop over columns */
200639f9d9dSBarry Smith       for (j=0; j<n; j++) {
201639f9d9dSBarry Smith         col  = is[j];
202639f9d9dSBarry Smith         rows = cj + ci[col];
203639f9d9dSBarry Smith         m    = ci[col+1] - ci[col];
204639f9d9dSBarry Smith         /* loop over columns marking them in rowhit */
205639f9d9dSBarry Smith         fm = N;    /* fm points to first entry in linked list */
206639f9d9dSBarry Smith         for (k=0; k<m; k++) {
207639f9d9dSBarry Smith           currentcol = *rows++;
208639f9d9dSBarry Smith           /* is it already in the list? */
209639f9d9dSBarry Smith           do {
210639f9d9dSBarry Smith             mfm = fm;
211639f9d9dSBarry Smith             fm  = rowhit[fm];
212639f9d9dSBarry Smith           } while (fm < currentcol);
213639f9d9dSBarry Smith           /* not in list so add it */
214639f9d9dSBarry Smith           if (fm != currentcol) {
215639f9d9dSBarry Smith             nrows++;
216639f9d9dSBarry Smith             columnsforrow[currentcol] = col;
217639f9d9dSBarry Smith             /* next three lines insert new entry into linked list */
218639f9d9dSBarry Smith             rowhit[mfm]        = currentcol;
219639f9d9dSBarry Smith             rowhit[currentcol] = fm;
220639f9d9dSBarry Smith             fm                 = currentcol;
221639f9d9dSBarry Smith             /* fm points to present position in list since we know the columns are sorted */
22271cd77b2SBarry Smith           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
223639f9d9dSBarry Smith         }
224639f9d9dSBarry Smith       }
225639f9d9dSBarry Smith       c->nrows[i] = nrows;
22638baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
22738baddfdSBarry Smith       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
228639f9d9dSBarry Smith       /* now store the linked list of rows into c->rows[i] */
229639f9d9dSBarry Smith       nrows = 0;
230639f9d9dSBarry Smith       fm    = rowhit[N];
231639f9d9dSBarry Smith       do {
232639f9d9dSBarry Smith         c->rows[i][nrows]            = fm;
233639f9d9dSBarry Smith         c->columnsforrow[i][nrows++] = columnsforrow[fm];
234639f9d9dSBarry Smith         fm                           = rowhit[fm];
235639f9d9dSBarry Smith       } while (fm < N);
236639f9d9dSBarry Smith     } /* ---------------------------------------------------------------------------------------*/
237639f9d9dSBarry Smith     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
238639f9d9dSBarry Smith   }
2393acb8795SBarry Smith   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
240639f9d9dSBarry Smith 
241606d414cSSatish Balay   ierr = PetscFree(rowhit);CHKERRQ(ierr);
242606d414cSSatish Balay   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
243639f9d9dSBarry Smith 
24430b34957SBarry Smith   /* Optimize by adding the vscale, and scaleforrow[][] fields */
24530b34957SBarry Smith   /*
24630b34957SBarry Smith        see the version for MPIAIJ
24730b34957SBarry Smith   */
248ce94432eSBarry Smith   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
24938baddfdSBarry Smith   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
25030b34957SBarry Smith   for (k=0; k<c->ncolors; k++) {
25138baddfdSBarry Smith     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
25230b34957SBarry Smith     for (l=0; l<c->nrows[k]; l++) {
25330b34957SBarry Smith       col = c->columnsforrow[k][l];
2542205254eSKarl Rupp 
25530b34957SBarry Smith       c->vscaleforrow[k][l] = col;
25630b34957SBarry Smith     }
25730b34957SBarry Smith   }
258b9617806SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2593a40ed3dSBarry Smith   PetscFunctionReturn(0);
26070c3da92SBarry Smith }
26179c1e64dSHong Zhang 
262