xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 671beff6e3ba45af6435d99e65a22b6833b66ff5)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatPtAP"
12 /*@
13    MatPtAP - Creates the matrix projection C = P^T * A * P
14 
15    Collective on Mat
16 
17    Input Parameters:
18 +  A - the matrix
19 .  P - the projection matrix
20 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
22 
23    Output Parameters:
24 .  C - the product matrix
25 
26    Notes:
27    C will be created and must be destroyed by the user with MatDestroy().
28 
29    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
30    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
31 
32    Level: intermediate
33 
34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
35 @*/
36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37   PetscErrorCode ierr;
38   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40 
41   PetscFunctionBegin;
42   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
43   PetscValidType(A,1);
44   MatPreallocated(A);
45   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
46   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
47   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
48   PetscValidType(P,2);
49   MatPreallocated(P);
50   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
51   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
52   PetscValidPointer(C,3);
53 
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   /* For now, we do not dispatch based on the type of A and P */
59   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60   fA = A->ops->ptap;
61   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62   fP = P->ops->ptap;
63   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65 
66   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
68   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*);
73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*);
74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
78 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
79 {
80   PetscErrorCode    ierr;
81 
82   PetscFunctionBegin;
83   if (scall == MAT_INITIAL_MATRIX){
84     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
85   } else if (scall == MAT_REUSE_MATRIX){
86     ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
87   } else {
88     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
89   }
90   PetscFunctionReturn(0);
91 }
92 
93 #undef __FUNCT__
94 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
95 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
96 {
97   PetscErrorCode    ierr;
98   Mat               P_seq,A_loc,C_seq;
99   int               prstart,prend,m=P->m;
100   IS                isrowp,iscolp;
101 
102   PetscFunctionBegin;
103   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
104   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
105   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
106 
107   /* get A_loc = submatrix of A by taking all local rows of A */
108   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
109   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
110 
111   /* compute C_seq = P_loc^T * A_loc * P_seq */
112   prend   = prstart + m;
113   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
114   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
115   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
116 
117   /* add C_seq into mpi C */
118   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
119 
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
125 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
126 {
127   PetscErrorCode       ierr;
128   Mat                  P_seq,A_loc,C_seq;
129   int                  prstart,prend,m=P->m;
130   IS                   isrowp,iscolp;
131   Mat_Merge_SeqsToMPI  *merge;
132   PetscObjectContainer container;
133 
134   PetscFunctionBegin;
135   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
136   if (container) {
137     ierr  = PetscObjectContainerGetPointer(container,(void *)&merge);CHKERRQ(ierr);
138   }
139 
140   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
141   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
142   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
143 
144   /* get A_loc = submatrix of A by taking all local rows of A */
145   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
146   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
147 
148   /* compute C_seq = P_loc^T * A_loc * P_seq */
149   prend = prstart + m;
150   C_seq = merge->C_seq;
151   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr);
152   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
153   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
154 
155   /* add C_seq into mpi C */
156   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
157 
158   PetscFunctionReturn(0);
159 }
160 
161 #undef __FUNCT__
162 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
163 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
164 {
165   PetscErrorCode ierr;
166   PetscFunctionBegin;
167   if (scall == MAT_INITIAL_MATRIX){
168     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
169     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
170     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
171   }
172   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
173   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
174   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatPtAPSymbolic"
180 /*
181    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
182 
183    Collective on Mat
184 
185    Input Parameters:
186 +  A - the matrix
187 -  P - the projection matrix
188 
189    Output Parameters:
190 .  C - the (i,j) structure of the product matrix
191 
192    Notes:
193    C will be created and must be destroyed by the user with MatDestroy().
194 
195    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
196    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
197    this (i,j) structure by calling MatPtAPNumeric().
198 
199    Level: intermediate
200 
201 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
202 */
203 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
204   PetscErrorCode ierr;
205   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
206   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
207 
208   PetscFunctionBegin;
209 
210   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
211   PetscValidType(A,1);
212   MatPreallocated(A);
213   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
214   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
215 
216   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
217   PetscValidType(P,2);
218   MatPreallocated(P);
219   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
220   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
221 
222   PetscValidPointer(C,3);
223 
224   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
225   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
226 
227   /* For now, we do not dispatch based on the type of A and P */
228   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
229   fA = A->ops->ptapsymbolic;
230   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
231   fP = P->ops->ptapsymbolic;
232   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
233   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
234 
235   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
236   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
237   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
238 
239   PetscFunctionReturn(0);
240 }
241 
242 typedef struct {
243   Mat    symAP;
244 } Mat_PtAPstruct;
245 
246 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
250 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
251 {
252   PetscErrorCode    ierr;
253   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
254 
255   PetscFunctionBegin;
256   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
257   ierr = PetscFree(ptap);CHKERRQ(ierr);
258   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
264 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
265   PetscErrorCode ierr;
266   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
267   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
268   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
269   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
270   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
271   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
272   MatScalar      *ca;
273 
274   PetscFunctionBegin;
275   /* Get ij structure of P^T */
276   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
277   ptJ=ptj;
278 
279   /* Allocate ci array, arrays for fill computation and */
280   /* free space for accumulating nonzero column info */
281   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
282   ci[0] = 0;
283 
284   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
285   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
286   ptasparserow = ptadenserow  + an;
287   denserow     = ptasparserow + an;
288   sparserow    = denserow     + pn;
289 
290   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
291   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
292   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
293   current_space = free_space;
294 
295   /* Determine symbolic info for each row of C: */
296   for (i=0;i<pn;i++) {
297     ptnzi  = pti[i+1] - pti[i];
298     ptanzi = 0;
299     /* Determine symbolic row of PtA: */
300     for (j=0;j<ptnzi;j++) {
301       arow = *ptJ++;
302       anzj = ai[arow+1] - ai[arow];
303       ajj  = aj + ai[arow];
304       for (k=0;k<anzj;k++) {
305         if (!ptadenserow[ajj[k]]) {
306           ptadenserow[ajj[k]]    = -1;
307           ptasparserow[ptanzi++] = ajj[k];
308         }
309       }
310     }
311       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
312     ptaj = ptasparserow;
313     cnzi   = 0;
314     for (j=0;j<ptanzi;j++) {
315       prow = *ptaj++;
316       pnzj = pi[prow+1] - pi[prow];
317       pjj  = pj + pi[prow];
318       for (k=0;k<pnzj;k++) {
319         if (!denserow[pjj[k]]) {
320             denserow[pjj[k]]  = -1;
321             sparserow[cnzi++] = pjj[k];
322         }
323       }
324     }
325 
326     /* sort sparserow */
327     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
328 
329     /* If free space is not available, make more free space */
330     /* Double the amount of total space in the list */
331     if (current_space->local_remaining<cnzi) {
332       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
333     }
334 
335     /* Copy data into free space, and zero out denserows */
336     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
337     current_space->array           += cnzi;
338     current_space->local_used      += cnzi;
339     current_space->local_remaining -= cnzi;
340 
341     for (j=0;j<ptanzi;j++) {
342       ptadenserow[ptasparserow[j]] = 0;
343     }
344     for (j=0;j<cnzi;j++) {
345       denserow[sparserow[j]] = 0;
346     }
347       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
348       /*        For now, we will recompute what is needed. */
349     ci[i+1] = ci[i] + cnzi;
350   }
351   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
352   /* Allocate space for cj, initialize cj, and */
353   /* destroy list of free space and other temporary array(s) */
354   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
355   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
356   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
357 
358   /* Allocate space for ca */
359   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
360   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
361 
362   /* put together the new matrix */
363   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
364 
365   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
366   /* Since these are PETSc arrays, change flags to free them as necessary. */
367   c = (Mat_SeqAIJ *)((*C)->data);
368   c->freedata = PETSC_TRUE;
369   c->nonew    = 0;
370 
371   /* Clean up. */
372   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
373 
374   PetscFunctionReturn(0);
375 }
376 
377 #include "src/mat/impls/maij/maij.h"
378 EXTERN_C_BEGIN
379 #undef __FUNCT__
380 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
381 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
382   /* This routine requires testing -- I don't think it works. */
383   PetscErrorCode ierr;
384   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
385   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
386   Mat            P=pp->AIJ;
387   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
388   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
389   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
390   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
391   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
392   MatScalar      *ca;
393 
394   PetscFunctionBegin;
395   /* Start timer */
396   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
397 
398   /* Get ij structure of P^T */
399   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
400 
401   /* Allocate ci array, arrays for fill computation and */
402   /* free space for accumulating nonzero column info */
403   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
404   ci[0] = 0;
405 
406   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
407   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
408   ptasparserow = ptadenserow  + an;
409   denserow     = ptasparserow + an;
410   sparserow    = denserow     + pn;
411 
412   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
413   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
414   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
415   current_space = free_space;
416 
417   /* Determine symbolic info for each row of C: */
418   for (i=0;i<pn/ppdof;i++) {
419     ptnzi  = pti[i+1] - pti[i];
420     ptanzi = 0;
421     ptJ    = ptj + pti[i];
422     for (dof=0;dof<ppdof;dof++) {
423     /* Determine symbolic row of PtA: */
424       for (j=0;j<ptnzi;j++) {
425         arow = ptJ[j] + dof;
426         anzj = ai[arow+1] - ai[arow];
427         ajj  = aj + ai[arow];
428         for (k=0;k<anzj;k++) {
429           if (!ptadenserow[ajj[k]]) {
430             ptadenserow[ajj[k]]    = -1;
431             ptasparserow[ptanzi++] = ajj[k];
432           }
433         }
434       }
435       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
436       ptaj = ptasparserow;
437       cnzi   = 0;
438       for (j=0;j<ptanzi;j++) {
439         pdof = *ptaj%dof;
440         prow = (*ptaj++)/dof;
441         pnzj = pi[prow+1] - pi[prow];
442         pjj  = pj + pi[prow];
443         for (k=0;k<pnzj;k++) {
444           if (!denserow[pjj[k]+pdof]) {
445             denserow[pjj[k]+pdof] = -1;
446             sparserow[cnzi++]     = pjj[k]+pdof;
447           }
448         }
449       }
450 
451       /* sort sparserow */
452       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
453 
454       /* If free space is not available, make more free space */
455       /* Double the amount of total space in the list */
456       if (current_space->local_remaining<cnzi) {
457         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
458       }
459 
460       /* Copy data into free space, and zero out denserows */
461       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
462       current_space->array           += cnzi;
463       current_space->local_used      += cnzi;
464       current_space->local_remaining -= cnzi;
465 
466       for (j=0;j<ptanzi;j++) {
467         ptadenserow[ptasparserow[j]] = 0;
468       }
469       for (j=0;j<cnzi;j++) {
470         denserow[sparserow[j]] = 0;
471       }
472       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
473       /*        For now, we will recompute what is needed. */
474       ci[i+1+dof] = ci[i+dof] + cnzi;
475     }
476   }
477   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
478   /* Allocate space for cj, initialize cj, and */
479   /* destroy list of free space and other temporary array(s) */
480   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
481   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
482   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
483 
484   /* Allocate space for ca */
485   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
486   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
487 
488   /* put together the new matrix */
489   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
490 
491   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
492   /* Since these are PETSc arrays, change flags to free them as necessary. */
493   c = (Mat_SeqAIJ *)((*C)->data);
494   c->freedata = PETSC_TRUE;
495   c->nonew    = 0;
496 
497   /* Clean up. */
498   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
499 
500   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 EXTERN_C_END
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatPtAPNumeric"
507 /*
508    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
509 
510    Collective on Mat
511 
512    Input Parameters:
513 +  A - the matrix
514 -  P - the projection matrix
515 
516    Output Parameters:
517 .  C - the product matrix
518 
519    Notes:
520    C must have been created by calling MatPtAPSymbolic and must be destroyed by
521    the user using MatDeatroy().
522 
523    This routine is currently only implemented for pairs of AIJ matrices and classes
524    which inherit from AIJ.  C will be of type MATAIJ.
525 
526    Level: intermediate
527 
528 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
529 */
530 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
531   PetscErrorCode ierr;
532   PetscErrorCode (*fA)(Mat,Mat,Mat);
533   PetscErrorCode (*fP)(Mat,Mat,Mat);
534 
535   PetscFunctionBegin;
536 
537   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
538   PetscValidType(A,1);
539   MatPreallocated(A);
540   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
541   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
542 
543   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
544   PetscValidType(P,2);
545   MatPreallocated(P);
546   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
547   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
548 
549   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
550   PetscValidType(C,3);
551   MatPreallocated(C);
552   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
553   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
554 
555   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
556   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
557   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
558   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
559 
560   /* For now, we do not dispatch based on the type of A and P */
561   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
562   fA = A->ops->ptapnumeric;
563   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
564   fP = P->ops->ptapnumeric;
565   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
566   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
567 
568   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
569   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
570   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
571 
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
577 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
578 {
579   PetscErrorCode ierr;
580   int            flops=0;
581   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
582   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
583   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
584   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
585   int            *ci=c->i,*cj=c->j,*cjj;
586   int            am=A->M,cn=C->N,cm=C->M;
587   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
588   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
589 
590   PetscFunctionBegin;
591   /* Allocate temporary array for storage of one row of A*P */
592   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
593   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
594 
595   apj      = (int *)(apa + cn);
596   apjdense = apj + cn;
597 
598   /* Clear old values in C */
599   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
600 
601   for (i=0;i<am;i++) {
602     /* Form sparse row of A*P */
603     anzi  = ai[i+1] - ai[i];
604     apnzj = 0;
605     for (j=0;j<anzi;j++) {
606       prow = *aj++;
607       pnzj = pi[prow+1] - pi[prow];
608       pjj  = pj + pi[prow];
609       paj  = pa + pi[prow];
610       for (k=0;k<pnzj;k++) {
611         if (!apjdense[pjj[k]]) {
612           apjdense[pjj[k]] = -1;
613           apj[apnzj++]     = pjj[k];
614         }
615         apa[pjj[k]] += (*aa)*paj[k];
616       }
617       flops += 2*pnzj;
618       aa++;
619     }
620 
621     /* Sort the j index array for quick sparse axpy. */
622     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
623 
624     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
625     pnzi = pi[i+1] - pi[i];
626     for (j=0;j<pnzi;j++) {
627       nextap = 0;
628       crow   = *pJ++;
629       cjj    = cj + ci[crow];
630       caj    = ca + ci[crow];
631       /* Perform sparse axpy operation.  Note cjj includes apj. */
632       for (k=0;nextap<apnzj;k++) {
633         if (cjj[k]==apj[nextap]) {
634           caj[k] += (*pA)*apa[apj[nextap++]];
635         }
636       }
637       flops += 2*apnzj;
638       pA++;
639     }
640 
641     /* Zero the current row info for A*P */
642     for (j=0;j<apnzj;j++) {
643       apa[apj[j]]      = 0.;
644       apjdense[apj[j]] = 0;
645     }
646   }
647 
648   /* Assemble the final matrix and clean up */
649   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
650   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
651   ierr = PetscFree(apa);CHKERRQ(ierr);
652   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
653 
654   PetscFunctionReturn(0);
655 }
656 
657 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
658 
659 #undef __FUNCT__
660 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
661 /*@
662    MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices,
663                 used by MatPtAP_MPIAIJ_MPIAIJ()
664 
665    Collective on Mat
666 
667    Input Parameters:
668 +  A - the matrix in seqaij format
669 .  P - the projection matrix in seqaij format
670 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
671 .  fill - expected fill (not being used when scall==MAT_REUSE_MATRIX)
672 .  prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose
673 
674    Output Parameters:
675 .  C - the product matrix in seqaij format
676 
677    Level: developer
678 
679 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
680 @*/
681 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C)
682 {
683   PetscErrorCode ierr;
684   int            flops=0;
685   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
686   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
687   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
688   int            *ai=a->i,*aj=a->j,*apj,*apjdense;
689   int            *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
690   int            *ci=c->i,*cj=c->j,*cjj;
691   int            am=A->M,cn=C->N,cm=C->M;
692   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
693   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
694 
695   PetscFunctionBegin;
696   pA=p->a+pi[prstart];
697   /* Allocate temporary array for storage of one row of A*P */
698   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
699   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
700 
701   apj      = (int *)(apa + cn);
702   apjdense = apj + cn;
703 
704   /* Clear old values in C */
705   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
706 
707   for (i=0;i<am;i++) {
708     /* Form sparse row of A*P */
709     anzi  = ai[i+1] - ai[i];
710     apnzj = 0;
711     for (j=0;j<anzi;j++) {
712       prow = *aj++;
713       pnzj = pi[prow+1] - pi[prow];
714       pjj  = pj + pi[prow];
715       paj  = pa + pi[prow];
716       for (k=0;k<pnzj;k++) {
717         if (!apjdense[pjj[k]]) {
718           apjdense[pjj[k]] = -1;
719           apj[apnzj++]     = pjj[k];
720         }
721         apa[pjj[k]] += (*aa)*paj[k];
722       }
723       flops += 2*pnzj;
724       aa++;
725     }
726 
727     /* Sort the j index array for quick sparse axpy. */
728     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
729 
730     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
731     pnzi = pi[i+1+prstart] - pi[i+prstart];
732     for (j=0;j<pnzi;j++) {
733       nextap = 0;
734       crow   = *pJ++;
735       cjj    = cj + ci[crow];
736       caj    = ca + ci[crow];
737       /* Perform sparse axpy operation.  Note cjj includes apj. */
738       for (k=0;nextap<apnzj;k++) {
739         if (cjj[k]==apj[nextap]) {
740           caj[k] += (*pA)*apa[apj[nextap++]];
741         }
742       }
743       flops += 2*apnzj;
744       pA++;
745     }
746 
747     /* Zero the current row info for A*P */
748     for (j=0;j<apnzj;j++) {
749       apa[apj[j]]      = 0.;
750       apjdense[apj[j]] = 0;
751     }
752   }
753 
754   /* Assemble the final matrix and clean up */
755   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
757   ierr = PetscFree(apa);CHKERRQ(ierr);
758   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
759 
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
765 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) {
766   PetscErrorCode ierr;
767   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
768   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
769   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
770   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
771   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
772   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
773   MatScalar      *ca;
774   Mat            *psub,P_sub;
775   IS             isrow,iscol;
776   int            m = prend - prstart;
777 
778   PetscFunctionBegin;
779   /* Get ij structure of P[rstart:rend,:]^T */
780   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
781   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
782   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
783   ierr = ISDestroy(isrow);CHKERRQ(ierr);
784   ierr = ISDestroy(iscol);CHKERRQ(ierr);
785   P_sub = psub[0];
786   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
787   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
788   ptJ=ptj;
789 
790   /* Allocate ci array, arrays for fill computation and */
791   /* free space for accumulating nonzero column info */
792   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
793   ci[0] = 0;
794 
795   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
796   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
797   ptasparserow = ptadenserow  + an;
798   denserow     = ptasparserow + an;
799   sparserow    = denserow     + pn;
800 
801   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
802   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
803   ierr          = GetMoreSpace((int)(fill*ai[am]/pm)*pn,&free_space);
804   current_space = free_space;
805 
806   /* Determine symbolic info for each row of C: */
807   for (i=0;i<pn;i++) {
808     ptnzi  = pti[i+1] - pti[i];
809     ptanzi = 0;
810     /* Determine symbolic row of PtA_reduced: */
811     for (j=0;j<ptnzi;j++) {
812       arow = *ptJ++;
813       anzj = ai[arow+1] - ai[arow];
814       ajj  = aj + ai[arow];
815       for (k=0;k<anzj;k++) {
816         if (!ptadenserow[ajj[k]]) {
817           ptadenserow[ajj[k]]    = -1;
818           ptasparserow[ptanzi++] = ajj[k];
819         }
820       }
821     }
822       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
823     ptaj = ptasparserow;
824     cnzi   = 0;
825     for (j=0;j<ptanzi;j++) {
826       prow = *ptaj++;
827       pnzj = pi[prow+1] - pi[prow];
828       pjj  = pj + pi[prow];
829       for (k=0;k<pnzj;k++) {
830         if (!denserow[pjj[k]]) {
831             denserow[pjj[k]]  = -1;
832             sparserow[cnzi++] = pjj[k];
833         }
834       }
835     }
836 
837     /* sort sparserow */
838     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
839 
840     /* If free space is not available, make more free space */
841     /* Double the amount of total space in the list */
842     if (current_space->local_remaining<cnzi) {
843       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
844     }
845 
846     /* Copy data into free space, and zero out denserows */
847     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
848     current_space->array           += cnzi;
849     current_space->local_used      += cnzi;
850     current_space->local_remaining -= cnzi;
851 
852     for (j=0;j<ptanzi;j++) {
853       ptadenserow[ptasparserow[j]] = 0;
854     }
855     for (j=0;j<cnzi;j++) {
856       denserow[sparserow[j]] = 0;
857     }
858       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
859       /*        For now, we will recompute what is needed. */
860     ci[i+1] = ci[i] + cnzi;
861   }
862   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
863   /* Allocate space for cj, initialize cj, and */
864   /* destroy list of free space and other temporary array(s) */
865   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
866   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
867   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
868 
869   /* Allocate space for ca */
870   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
871   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
872 
873   /* put together the new matrix */
874   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
875 
876   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
877   /* Since these are PETSc arrays, change flags to free them as necessary. */
878   c = (Mat_SeqAIJ *)((*C)->data);
879   c->freedata = PETSC_TRUE;
880   c->nonew    = 0;
881 
882   /* Clean up. */
883   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
884 
885   PetscFunctionReturn(0);
886 }
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
890 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C)
891 {
892   PetscErrorCode ierr;
893   PetscFunctionBegin;
894   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
895   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
896   if (scall == MAT_INITIAL_MATRIX){
897     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
898   }
899 
900   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
901 
902   PetscFunctionReturn(0);
903 }
904 
905