xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision e9b43d0f224bbe4aed6dc1cb9ef9a101ac74b063)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatPtAP"
12 /*@
13    MatPtAP - Creates the matrix projection C = P^T * A * P
14 
15    Collective on Mat
16 
17    Input Parameters:
18 +  A - the matrix
19 .  P - the projection matrix
20 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
22 
23    Output Parameters:
24 .  C - the product matrix
25 
26    Notes:
27    C will be created and must be destroyed by the user with MatDestroy().
28 
29    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
30    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
31 
32    Level: intermediate
33 
34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
35 @*/
36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37   PetscErrorCode ierr;
38   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40 
41   PetscFunctionBegin;
42   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
43   PetscValidType(A,1);
44   MatPreallocated(A);
45   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
46   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
47   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
48   PetscValidType(P,2);
49   MatPreallocated(P);
50   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
51   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
52   PetscValidPointer(C,3);
53 
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   /* For now, we do not dispatch based on the type of A and P */
59   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60   fA = A->ops->ptap;
61   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62   fP = P->ops->ptap;
63   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65 
66   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
68   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*);
73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*);
74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
78 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
79 {
80   PetscErrorCode    ierr;
81 
82   PetscFunctionBegin;
83   if (scall == MAT_INITIAL_MATRIX){
84     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
85   } else if (scall == MAT_REUSE_MATRIX){
86     ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
87   } else {
88     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
89   }
90   PetscFunctionReturn(0);
91 }
92 
93 #undef __FUNCT__
94 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
95 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
96 {
97   PetscErrorCode    ierr;
98   Mat               P_seq,A_loc,C_seq;
99   int               prstart,prend,m=P->m;
100   IS                isrowp,iscolp;
101 
102   PetscFunctionBegin;
103   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
104   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
105   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
106 
107   /* get A_loc = submatrix of A by taking all local rows of A */
108   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
109   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
110 
111   /* compute C_seq = P_loc^T * A_loc * P_seq */
112   prend   = prstart + m;
113   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
114   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
115   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
116 
117   /* add C_seq into mpi C */
118   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
119   /* ierr = MatDestroy(C_seq);CHKERRQ(ierr); */
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
126 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
127 {
128   PetscErrorCode    ierr;
129   Mat               P_seq,A_loc,C_seq;
130   int               prstart,prend,m=P->m;
131   IS                isrowp,iscolp;
132   Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)C->spptr;
133 
134   PetscFunctionBegin;
135   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
136   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
137   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
138 
139   /* get A_loc = submatrix of A by taking all local rows of A */
140   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
141   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
142 
143   /* compute C_seq = P_loc^T * A_loc * P_seq */
144   prend = prstart + m;
145   C_seq = merge->C_seq;
146   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr);
147   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
148   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
149 
150   /* add C_seq into mpi C */
151   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
152 
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
158 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
159 {
160   PetscErrorCode ierr;
161   PetscFunctionBegin;
162   if (scall == MAT_INITIAL_MATRIX){
163     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
164     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
165     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
166   }
167   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
168   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
169   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatPtAPSymbolic"
175 /*
176    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
177 
178    Collective on Mat
179 
180    Input Parameters:
181 +  A - the matrix
182 -  P - the projection matrix
183 
184    Output Parameters:
185 .  C - the (i,j) structure of the product matrix
186 
187    Notes:
188    C will be created and must be destroyed by the user with MatDestroy().
189 
190    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
191    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
192    this (i,j) structure by calling MatPtAPNumeric().
193 
194    Level: intermediate
195 
196 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
197 */
198 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
199   PetscErrorCode ierr;
200   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
201   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
202 
203   PetscFunctionBegin;
204 
205   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
206   PetscValidType(A,1);
207   MatPreallocated(A);
208   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
209   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
210 
211   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
212   PetscValidType(P,2);
213   MatPreallocated(P);
214   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
215   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
216 
217   PetscValidPointer(C,3);
218 
219   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
220   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
221 
222   /* For now, we do not dispatch based on the type of A and P */
223   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
224   fA = A->ops->ptapsymbolic;
225   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
226   fP = P->ops->ptapsymbolic;
227   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
228   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
229 
230   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
231   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
232   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
233 
234   PetscFunctionReturn(0);
235 }
236 
237 typedef struct {
238   Mat    symAP;
239 } Mat_PtAPstruct;
240 
241 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
245 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
246 {
247   PetscErrorCode    ierr;
248   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
249 
250   PetscFunctionBegin;
251   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
252   ierr = PetscFree(ptap);CHKERRQ(ierr);
253   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
254   PetscFunctionReturn(0);
255 }
256 
257 #undef __FUNCT__
258 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
259 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
260   PetscErrorCode ierr;
261   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
262   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
263   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
264   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
265   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
266   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
267   MatScalar      *ca;
268 
269   PetscFunctionBegin;
270   /* Get ij structure of P^T */
271   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
272   ptJ=ptj;
273 
274   /* Allocate ci array, arrays for fill computation and */
275   /* free space for accumulating nonzero column info */
276   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
277   ci[0] = 0;
278 
279   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
280   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
281   ptasparserow = ptadenserow  + an;
282   denserow     = ptasparserow + an;
283   sparserow    = denserow     + pn;
284 
285   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
286   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
287   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
288   current_space = free_space;
289 
290   /* Determine symbolic info for each row of C: */
291   for (i=0;i<pn;i++) {
292     ptnzi  = pti[i+1] - pti[i];
293     ptanzi = 0;
294     /* Determine symbolic row of PtA: */
295     for (j=0;j<ptnzi;j++) {
296       arow = *ptJ++;
297       anzj = ai[arow+1] - ai[arow];
298       ajj  = aj + ai[arow];
299       for (k=0;k<anzj;k++) {
300         if (!ptadenserow[ajj[k]]) {
301           ptadenserow[ajj[k]]    = -1;
302           ptasparserow[ptanzi++] = ajj[k];
303         }
304       }
305     }
306       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
307     ptaj = ptasparserow;
308     cnzi   = 0;
309     for (j=0;j<ptanzi;j++) {
310       prow = *ptaj++;
311       pnzj = pi[prow+1] - pi[prow];
312       pjj  = pj + pi[prow];
313       for (k=0;k<pnzj;k++) {
314         if (!denserow[pjj[k]]) {
315             denserow[pjj[k]]  = -1;
316             sparserow[cnzi++] = pjj[k];
317         }
318       }
319     }
320 
321     /* sort sparserow */
322     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
323 
324     /* If free space is not available, make more free space */
325     /* Double the amount of total space in the list */
326     if (current_space->local_remaining<cnzi) {
327       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
328     }
329 
330     /* Copy data into free space, and zero out denserows */
331     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
332     current_space->array           += cnzi;
333     current_space->local_used      += cnzi;
334     current_space->local_remaining -= cnzi;
335 
336     for (j=0;j<ptanzi;j++) {
337       ptadenserow[ptasparserow[j]] = 0;
338     }
339     for (j=0;j<cnzi;j++) {
340       denserow[sparserow[j]] = 0;
341     }
342       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
343       /*        For now, we will recompute what is needed. */
344     ci[i+1] = ci[i] + cnzi;
345   }
346   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
347   /* Allocate space for cj, initialize cj, and */
348   /* destroy list of free space and other temporary array(s) */
349   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
350   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
351   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
352 
353   /* Allocate space for ca */
354   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
355   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
356 
357   /* put together the new matrix */
358   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
359 
360   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
361   /* Since these are PETSc arrays, change flags to free them as necessary. */
362   c = (Mat_SeqAIJ *)((*C)->data);
363   c->freedata = PETSC_TRUE;
364   c->nonew    = 0;
365 
366   /* Clean up. */
367   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
368 
369   PetscFunctionReturn(0);
370 }
371 
372 #include "src/mat/impls/maij/maij.h"
373 EXTERN_C_BEGIN
374 #undef __FUNCT__
375 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
376 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
377   /* This routine requires testing -- I don't think it works. */
378   PetscErrorCode ierr;
379   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
380   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
381   Mat            P=pp->AIJ;
382   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
383   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
384   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
385   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
386   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
387   MatScalar      *ca;
388 
389   PetscFunctionBegin;
390   /* Start timer */
391   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
392 
393   /* Get ij structure of P^T */
394   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
395 
396   /* Allocate ci array, arrays for fill computation and */
397   /* free space for accumulating nonzero column info */
398   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
399   ci[0] = 0;
400 
401   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
402   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
403   ptasparserow = ptadenserow  + an;
404   denserow     = ptasparserow + an;
405   sparserow    = denserow     + pn;
406 
407   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
408   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
409   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
410   current_space = free_space;
411 
412   /* Determine symbolic info for each row of C: */
413   for (i=0;i<pn/ppdof;i++) {
414     ptnzi  = pti[i+1] - pti[i];
415     ptanzi = 0;
416     ptJ    = ptj + pti[i];
417     for (dof=0;dof<ppdof;dof++) {
418     /* Determine symbolic row of PtA: */
419       for (j=0;j<ptnzi;j++) {
420         arow = ptJ[j] + dof;
421         anzj = ai[arow+1] - ai[arow];
422         ajj  = aj + ai[arow];
423         for (k=0;k<anzj;k++) {
424           if (!ptadenserow[ajj[k]]) {
425             ptadenserow[ajj[k]]    = -1;
426             ptasparserow[ptanzi++] = ajj[k];
427           }
428         }
429       }
430       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
431       ptaj = ptasparserow;
432       cnzi   = 0;
433       for (j=0;j<ptanzi;j++) {
434         pdof = *ptaj%dof;
435         prow = (*ptaj++)/dof;
436         pnzj = pi[prow+1] - pi[prow];
437         pjj  = pj + pi[prow];
438         for (k=0;k<pnzj;k++) {
439           if (!denserow[pjj[k]+pdof]) {
440             denserow[pjj[k]+pdof] = -1;
441             sparserow[cnzi++]     = pjj[k]+pdof;
442           }
443         }
444       }
445 
446       /* sort sparserow */
447       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
448 
449       /* If free space is not available, make more free space */
450       /* Double the amount of total space in the list */
451       if (current_space->local_remaining<cnzi) {
452         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
453       }
454 
455       /* Copy data into free space, and zero out denserows */
456       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
457       current_space->array           += cnzi;
458       current_space->local_used      += cnzi;
459       current_space->local_remaining -= cnzi;
460 
461       for (j=0;j<ptanzi;j++) {
462         ptadenserow[ptasparserow[j]] = 0;
463       }
464       for (j=0;j<cnzi;j++) {
465         denserow[sparserow[j]] = 0;
466       }
467       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
468       /*        For now, we will recompute what is needed. */
469       ci[i+1+dof] = ci[i+dof] + cnzi;
470     }
471   }
472   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
473   /* Allocate space for cj, initialize cj, and */
474   /* destroy list of free space and other temporary array(s) */
475   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
476   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
477   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
478 
479   /* Allocate space for ca */
480   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
481   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
482 
483   /* put together the new matrix */
484   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
485 
486   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
487   /* Since these are PETSc arrays, change flags to free them as necessary. */
488   c = (Mat_SeqAIJ *)((*C)->data);
489   c->freedata = PETSC_TRUE;
490   c->nonew    = 0;
491 
492   /* Clean up. */
493   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
494 
495   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 EXTERN_C_END
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatPtAPNumeric"
502 /*
503    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
504 
505    Collective on Mat
506 
507    Input Parameters:
508 +  A - the matrix
509 -  P - the projection matrix
510 
511    Output Parameters:
512 .  C - the product matrix
513 
514    Notes:
515    C must have been created by calling MatPtAPSymbolic and must be destroyed by
516    the user using MatDeatroy().
517 
518    This routine is currently only implemented for pairs of AIJ matrices and classes
519    which inherit from AIJ.  C will be of type MATAIJ.
520 
521    Level: intermediate
522 
523 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
524 */
525 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
526   PetscErrorCode ierr;
527   PetscErrorCode (*fA)(Mat,Mat,Mat);
528   PetscErrorCode (*fP)(Mat,Mat,Mat);
529 
530   PetscFunctionBegin;
531 
532   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
533   PetscValidType(A,1);
534   MatPreallocated(A);
535   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
536   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
537 
538   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
539   PetscValidType(P,2);
540   MatPreallocated(P);
541   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
542   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
543 
544   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
545   PetscValidType(C,3);
546   MatPreallocated(C);
547   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
548   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549 
550   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
551   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
552   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
553   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
554 
555   /* For now, we do not dispatch based on the type of A and P */
556   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
557   fA = A->ops->ptapnumeric;
558   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
559   fP = P->ops->ptapnumeric;
560   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
561   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
562 
563   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
564   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
565   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
566 
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
572 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
573 {
574   PetscErrorCode ierr;
575   int            flops=0;
576   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
577   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
578   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
579   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
580   int            *ci=c->i,*cj=c->j,*cjj;
581   int            am=A->M,cn=C->N,cm=C->M;
582   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
583   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
584 
585   PetscFunctionBegin;
586   /* Allocate temporary array for storage of one row of A*P */
587   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
588   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
589 
590   apj      = (int *)(apa + cn);
591   apjdense = apj + cn;
592 
593   /* Clear old values in C */
594   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
595 
596   for (i=0;i<am;i++) {
597     /* Form sparse row of A*P */
598     anzi  = ai[i+1] - ai[i];
599     apnzj = 0;
600     for (j=0;j<anzi;j++) {
601       prow = *aj++;
602       pnzj = pi[prow+1] - pi[prow];
603       pjj  = pj + pi[prow];
604       paj  = pa + pi[prow];
605       for (k=0;k<pnzj;k++) {
606         if (!apjdense[pjj[k]]) {
607           apjdense[pjj[k]] = -1;
608           apj[apnzj++]     = pjj[k];
609         }
610         apa[pjj[k]] += (*aa)*paj[k];
611       }
612       flops += 2*pnzj;
613       aa++;
614     }
615 
616     /* Sort the j index array for quick sparse axpy. */
617     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
618 
619     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
620     pnzi = pi[i+1] - pi[i];
621     for (j=0;j<pnzi;j++) {
622       nextap = 0;
623       crow   = *pJ++;
624       cjj    = cj + ci[crow];
625       caj    = ca + ci[crow];
626       /* Perform sparse axpy operation.  Note cjj includes apj. */
627       for (k=0;nextap<apnzj;k++) {
628         if (cjj[k]==apj[nextap]) {
629           caj[k] += (*pA)*apa[apj[nextap++]];
630         }
631       }
632       flops += 2*apnzj;
633       pA++;
634     }
635 
636     /* Zero the current row info for A*P */
637     for (j=0;j<apnzj;j++) {
638       apa[apj[j]]      = 0.;
639       apjdense[apj[j]] = 0;
640     }
641   }
642 
643   /* Assemble the final matrix and clean up */
644   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
645   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
646   ierr = PetscFree(apa);CHKERRQ(ierr);
647   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
648 
649   PetscFunctionReturn(0);
650 }
651 
652 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
653 
654 #undef __FUNCT__
655 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
656 /*@
657    MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices,
658                 used by MatPtAP_MPIAIJ_MPIAIJ()
659 
660    Collective on Mat
661 
662    Input Parameters:
663 +  A - the matrix in seqaij format
664 .  P - the projection matrix in seqaij format
665 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
666 .  fill - expected fill (not being used when scall==MAT_REUSE_MATRIX)
667 .  prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose
668 
669    Output Parameters:
670 .  C - the product matrix in seqaij format
671 
672    Level: developer
673 
674 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
675 @*/
676 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C)
677 {
678   PetscErrorCode ierr;
679   int            flops=0;
680   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
681   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
682   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
683   int            *ai=a->i,*aj=a->j,*apj,*apjdense;
684   int            *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
685   int            *ci=c->i,*cj=c->j,*cjj;
686   int            am=A->M,cn=C->N,cm=C->M;
687   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
688   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
689 
690   PetscFunctionBegin;
691   pA=p->a+pi[prstart];
692   /* Allocate temporary array for storage of one row of A*P */
693   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
694   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
695 
696   apj      = (int *)(apa + cn);
697   apjdense = apj + cn;
698 
699   /* Clear old values in C */
700   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
701 
702   for (i=0;i<am;i++) {
703     /* Form sparse row of A*P */
704     anzi  = ai[i+1] - ai[i];
705     apnzj = 0;
706     for (j=0;j<anzi;j++) {
707       prow = *aj++;
708       pnzj = pi[prow+1] - pi[prow];
709       pjj  = pj + pi[prow];
710       paj  = pa + pi[prow];
711       for (k=0;k<pnzj;k++) {
712         if (!apjdense[pjj[k]]) {
713           apjdense[pjj[k]] = -1;
714           apj[apnzj++]     = pjj[k];
715         }
716         apa[pjj[k]] += (*aa)*paj[k];
717       }
718       flops += 2*pnzj;
719       aa++;
720     }
721 
722     /* Sort the j index array for quick sparse axpy. */
723     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
724 
725     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
726     pnzi = pi[i+1+prstart] - pi[i+prstart];
727     for (j=0;j<pnzi;j++) {
728       nextap = 0;
729       crow   = *pJ++;
730       cjj    = cj + ci[crow];
731       caj    = ca + ci[crow];
732       /* Perform sparse axpy operation.  Note cjj includes apj. */
733       for (k=0;nextap<apnzj;k++) {
734         if (cjj[k]==apj[nextap]) {
735           caj[k] += (*pA)*apa[apj[nextap++]];
736         }
737       }
738       flops += 2*apnzj;
739       pA++;
740     }
741 
742     /* Zero the current row info for A*P */
743     for (j=0;j<apnzj;j++) {
744       apa[apj[j]]      = 0.;
745       apjdense[apj[j]] = 0;
746     }
747   }
748 
749   /* Assemble the final matrix and clean up */
750   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
752   ierr = PetscFree(apa);CHKERRQ(ierr);
753   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
754 
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
760 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) {
761   PetscErrorCode ierr;
762   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
763   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
764   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
765   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
766   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
767   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
768   MatScalar      *ca;
769   Mat            *psub,P_sub;
770   IS             isrow,iscol;
771   int            m = prend - prstart;
772 
773   PetscFunctionBegin;
774   /* Get ij structure of P[rstart:rend,:]^T */
775   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
776   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
777   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
778   ierr = ISDestroy(isrow);CHKERRQ(ierr);
779   ierr = ISDestroy(iscol);CHKERRQ(ierr);
780   P_sub = psub[0];
781   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
782   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
783   ptJ=ptj;
784 
785   /* Allocate ci array, arrays for fill computation and */
786   /* free space for accumulating nonzero column info */
787   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
788   ci[0] = 0;
789 
790   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
791   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
792   ptasparserow = ptadenserow  + an;
793   denserow     = ptasparserow + an;
794   sparserow    = denserow     + pn;
795 
796   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
797   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
798   ierr          = GetMoreSpace((int)(fill*ai[am]/pm)*pn,&free_space);
799   current_space = free_space;
800 
801   /* Determine symbolic info for each row of C: */
802   for (i=0;i<pn;i++) {
803     ptnzi  = pti[i+1] - pti[i];
804     ptanzi = 0;
805     /* Determine symbolic row of PtA_reduced: */
806     for (j=0;j<ptnzi;j++) {
807       arow = *ptJ++;
808       anzj = ai[arow+1] - ai[arow];
809       ajj  = aj + ai[arow];
810       for (k=0;k<anzj;k++) {
811         if (!ptadenserow[ajj[k]]) {
812           ptadenserow[ajj[k]]    = -1;
813           ptasparserow[ptanzi++] = ajj[k];
814         }
815       }
816     }
817       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
818     ptaj = ptasparserow;
819     cnzi   = 0;
820     for (j=0;j<ptanzi;j++) {
821       prow = *ptaj++;
822       pnzj = pi[prow+1] - pi[prow];
823       pjj  = pj + pi[prow];
824       for (k=0;k<pnzj;k++) {
825         if (!denserow[pjj[k]]) {
826             denserow[pjj[k]]  = -1;
827             sparserow[cnzi++] = pjj[k];
828         }
829       }
830     }
831 
832     /* sort sparserow */
833     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
834 
835     /* If free space is not available, make more free space */
836     /* Double the amount of total space in the list */
837     if (current_space->local_remaining<cnzi) {
838       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
839     }
840 
841     /* Copy data into free space, and zero out denserows */
842     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
843     current_space->array           += cnzi;
844     current_space->local_used      += cnzi;
845     current_space->local_remaining -= cnzi;
846 
847     for (j=0;j<ptanzi;j++) {
848       ptadenserow[ptasparserow[j]] = 0;
849     }
850     for (j=0;j<cnzi;j++) {
851       denserow[sparserow[j]] = 0;
852     }
853       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
854       /*        For now, we will recompute what is needed. */
855     ci[i+1] = ci[i] + cnzi;
856   }
857   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
858   /* Allocate space for cj, initialize cj, and */
859   /* destroy list of free space and other temporary array(s) */
860   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
861   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
862   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
863 
864   /* Allocate space for ca */
865   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
866   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
867 
868   /* put together the new matrix */
869   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
870 
871   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
872   /* Since these are PETSc arrays, change flags to free them as necessary. */
873   c = (Mat_SeqAIJ *)((*C)->data);
874   c->freedata = PETSC_TRUE;
875   c->nonew    = 0;
876 
877   /* Clean up. */
878   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
879 
880   PetscFunctionReturn(0);
881 }
882 
883 #undef __FUNCT__
884 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
885 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C)
886 {
887   PetscErrorCode ierr;
888   PetscFunctionBegin;
889   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
890   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
891   if (scall == MAT_INITIAL_MATRIX){
892     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
893   }
894 
895   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
896 
897   PetscFunctionReturn(0);
898 }
899 
900