xref: /petsc/src/mat/impls/aij/seq/fdaij.c (revision 85740eac76d20a0cac9a36a21d5752ff4c70535a)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 
4 #undef __FUNCT__
5 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp"
6 /*
7     This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array
8 */
9 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c)
10 {
11   PetscErrorCode ierr;
12   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
13   const PetscInt *is,*rows,*ci,*cj;
14   PetscInt       nis=iscoloring->n,*rowhit,*columnsforrow,bs=1,*spidx,*spidxhit,*den2sp,*den2sp_i;
15   IS             *isa;
16   PetscBool      flg1;
17   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
18   PetscInt       nz_den=0;
19 
20   PetscFunctionBegin;
21   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
22   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
23      SeqBAIJ calls this routine, thus check it */
24   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
25   if (flg1) {
26     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
27   }
28 
29   N         = mat->cmap->N/bs;
30   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
31   c->N      = mat->cmap->N/bs;
32   c->m      = mat->rmap->N/bs;
33   c->rstart = 0;
34 
35   c->ncolors = nis;
36   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
37   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
38   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
39   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
40   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
41   ierr       = PetscMalloc((csp->nz+1)*sizeof(PetscInt),&den2sp);CHKERRQ(ierr);
42   den2sp_i   = den2sp;
43   ierr       = PetscMalloc2(csp->nz,PetscInt,&c->colorforrow,csp->nz,PetscInt,&c->colorforcolumn);CHKERRQ(ierr);
44   ierr       = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr);
45 
46   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
47 
48   ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr);
49   ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr);
50 
51   for (i=0; i<nis; i++) { /* loop over colors */
52     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
53     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
54 
55     c->ncolumns[i] = n;
56     if (n) {
57       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
58       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
59     } else {
60       c->columns[i] = 0;
61     }
62 
63     /* fast, crude version requires O(N*N) work */
64     nrows = 0;
65     for (j=0; j<n; j++) {  /* loop over columns */
66       col  = is[j];
67       rows = cj + ci[col];
68       m    = ci[col+1] - ci[col];
69       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
70         rowhit[*rows]     = col + 1;
71         spidxhit[*rows++] = spidx[ci[col] + k];
72         nrows++;
73       }
74     }
75     c->nrows[i] = nrows; /* total num of rows for this color */
76 
77     ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
78     ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
79     nrows = 0;
80     for (j=0; j<N; j++) { /* loop over rows */
81       if (rowhit[j]) {
82         c->rows[i][nrows]          = j;             /* row index */
83         c->columnsforrow[i][nrows] = rowhit[j] - 1; /* column index */
84         den2sp_i[nrows++]          = spidxhit[j];
85 
86         c->colorforrow[nz_den]    = j;             /* row index */
87         c->colorforcolumn[nz_den] = rowhit[j] - 1; /* column index */
88 
89         c->rowcolden2sp3[3*nz_den]   = j;             /* row index */
90         c->rowcolden2sp3[3*nz_den+1] = rowhit[j] - 1; /* column index */
91         c->rowcolden2sp3[3*nz_den+2] = spidxhit[j];
92 
93         nz_den++;
94         rowhit[j] = 0.0; /* zero rowhit for reuse */
95       }
96     }
97     den2sp_i += nrows;
98     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
99   }
100   printf("nz_den %d, csp->nz %d\n",nz_den,csp->nz);
101 
102   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
103   ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr);
104   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
105 
106   c->den2sp                 = den2sp;
107   c->ctype                  = IS_COLORING_GHOSTED;
108   mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ;
109   PetscFunctionReturn(0);
110 }
111 
112 /* --------------------------------------------------------------- */
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ"
116 /*
117     This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks.
118     This is why it has the ugly code with the MatGetBlockSize()
119 */
120 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
121 {
122   PetscErrorCode ierr;
123   PetscInt       i,n,nrows,N,j,k,m,ncols,col;
124   const PetscInt *is,*rows,*ci,*cj;
125   PetscInt       nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1;
126   IS             *isa;
127   PetscBool      done,flg = PETSC_FALSE;
128   PetscBool      flg1;
129   PetscBool      new_impl=PETSC_FALSE;
130 
131   PetscFunctionBegin;
132   ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr);
133   if (new_impl){
134     ierr =  MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr);
135     PetscFunctionReturn(0);
136   }
137   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();");
138 
139   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
140   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1.
141      SeqBAIJ calls this routine, thus check it */
142   ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
143   if (flg1) {
144     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
145   }
146 
147   N         = mat->cmap->N/bs;
148   c->M      = mat->rmap->N/bs;   /* set total rows, columns and local rows */
149   c->N      = mat->cmap->N/bs;
150   c->m      = mat->rmap->N/bs;
151   c->rstart = 0;
152 
153   c->ncolors = nis;
154   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
155   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
156   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
157   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
158   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
159 
160   ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
161   if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
162 
163   /*
164      Temporary option to allow for debugging/testing
165   */
166   ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
167 
168   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
169   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
170 
171   for (i=0; i<nis; i++) {
172     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
173     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
174 
175     c->ncolumns[i] = n;
176     if (n) {
177       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
178       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
179     } else {
180       c->columns[i] = 0;
181     }
182 
183     if (!flg) { /* ------------------------------------------------------------------------------*/
184       /* fast, crude version requires O(N*N) work */
185       ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr);
186       /* loop over columns*/
187       for (j=0; j<n; j++) {
188         col  = is[j];
189         rows = cj + ci[col];
190         m    = ci[col+1] - ci[col];
191         /* loop over columns marking them in rowhit */
192         for (k=0; k<m; k++) {
193           rowhit[*rows++] = col + 1;
194         }
195       }
196       /* count the number of hits */
197       nrows = 0;
198       for (j=0; j<N; j++) {
199         if (rowhit[j]) nrows++;
200       }
201       c->nrows[i] = nrows;
202       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
203       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
204       nrows       = 0;
205       for (j=0; j<N; j++) {
206         if (rowhit[j]) {
207           c->rows[i][nrows]          = j;
208           c->columnsforrow[i][nrows] = rowhit[j] - 1;
209           nrows++;
210         }
211       }
212     } else {  /*-------------------------------------------------------------------------------*/
213       /* slow version, using rowhit as a linked list */
214       PetscInt currentcol,fm,mfm;
215       rowhit[N] = N;
216       nrows     = 0;
217       /* loop over columns */
218       for (j=0; j<n; j++) {
219         col  = is[j];
220         rows = cj + ci[col];
221         m    = ci[col+1] - ci[col];
222         /* loop over columns marking them in rowhit */
223         fm = N;    /* fm points to first entry in linked list */
224         for (k=0; k<m; k++) {
225           currentcol = *rows++;
226           /* is it already in the list? */
227           do {
228             mfm = fm;
229             fm  = rowhit[fm];
230           } while (fm < currentcol);
231           /* not in list so add it */
232           if (fm != currentcol) {
233             nrows++;
234             columnsforrow[currentcol] = col;
235             /* next three lines insert new entry into linked list */
236             rowhit[mfm]        = currentcol;
237             rowhit[currentcol] = fm;
238             fm                 = currentcol;
239             /* fm points to present position in list since we know the columns are sorted */
240           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring");
241         }
242       }
243       c->nrows[i] = nrows;
244       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
245       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
246       /* now store the linked list of rows into c->rows[i] */
247       nrows = 0;
248       fm    = rowhit[N];
249       do {
250         c->rows[i][nrows]            = fm;
251         c->columnsforrow[i][nrows++] = columnsforrow[fm];
252         fm                           = rowhit[fm];
253       } while (fm < N);
254     } /* ---------------------------------------------------------------------------------------*/
255     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
256   }
257   ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr);
258 
259   ierr = PetscFree(rowhit);CHKERRQ(ierr);
260   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
261 
262   /* Optimize by adding the vscale, and scaleforrow[][] fields */
263   /*
264        see the version for MPIAIJ
265   */
266   ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr);
267   ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
268   for (k=0; k<c->ncolors; k++) {
269     ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
270     for (l=0; l<c->nrows[k]; l++) {
271       col = c->columnsforrow[k][l];
272 
273       c->vscaleforrow[k][l] = col;
274     }
275   }
276   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 
280