xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 22e3da613d90ee96afe3e759fd76128ad75bdda0)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*);
75 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*);
76 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat);
77 
78 EXTERN PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*);
79 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*);
80 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat);
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
84 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
85 {
86   PetscErrorCode    ierr;
87 
88   PetscFunctionBegin;
89   if (scall == MAT_INITIAL_MATRIX){
90     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
91     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
92     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
93   } else if (scall == MAT_REUSE_MATRIX){
94     ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
95     ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
96     ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
97   } else {
98     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
105 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
106 {
107   PetscErrorCode    ierr;
108   Mat               P_seq,C_seq; /* A_loc */
109   PetscInt          prstart,prend,m=P->m;
110   /* IS                isrowp,iscolp; */
111 
112   PetscFunctionBegin;
113   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
114   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
115   /*
116   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
117   ierr = ISDestroy(iscolp);CHKERRQ(ierr); */
118 
119   /* get A_loc = submatrix of A by taking all local rows of A */
120   /*
121   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
122   */
123   /* ierr = ISDestroy(isrowp);CHKERRQ(ierr); */
124 
125   /* compute C_seq = P_loc^T * A_loc * P_seq */
126   prend   = prstart + m;
127   ierr = MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
128   /*
129   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
130   */
131   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
132   /* ierr = MatDestroy(A_loc);CHKERRQ(ierr); */
133 
134   /* add C_seq into mpi C */
135   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
136 
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
142 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
143 {
144   PetscErrorCode       ierr;
145   Mat                  P_seq,A_loc,C_seq;
146   PetscInt             prstart,prend,m=P->m;
147   IS                   isrowp,iscolp;
148   Mat_Merge_SeqsToMPI  *merge;
149   PetscObjectContainer container;
150 
151   PetscFunctionBegin;
152   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
153   if (container) {
154     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
155   } else {
156     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container");
157   }
158 
159   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
160   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
161   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
162 
163   /* get A_loc = submatrix of A by taking all local rows of A */
164   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
165   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
166 
167   /* compute C_seq = P_loc^T * A_loc * P_seq */
168   prend = prstart + m;
169   C_seq = merge->C_seq;
170   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr);
171   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
172   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
173 
174   /* add C_seq into mpi C */
175   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
176 
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
182 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
183 {
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (scall == MAT_INITIAL_MATRIX){
188     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
189     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
190     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
191   }
192   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
193   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
194   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatPtAPSymbolic"
200 /*
201    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
202 
203    Collective on Mat
204 
205    Input Parameters:
206 +  A - the matrix
207 -  P - the projection matrix
208 
209    Output Parameters:
210 .  C - the (i,j) structure of the product matrix
211 
212    Notes:
213    C will be created and must be destroyed by the user with MatDestroy().
214 
215    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
216    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
217    this (i,j) structure by calling MatPtAPNumeric().
218 
219    Level: intermediate
220 
221 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
222 */
223 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
224 {
225   PetscErrorCode ierr;
226   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
227   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
228 
229   PetscFunctionBegin;
230 
231   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
232   PetscValidType(A,1);
233   MatPreallocated(A);
234   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
235   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
236 
237   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
238   PetscValidType(P,2);
239   MatPreallocated(P);
240   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
241   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
242 
243   PetscValidPointer(C,3);
244 
245   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
246   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
247 
248   /* For now, we do not dispatch based on the type of A and P */
249   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
250   fA = A->ops->ptapsymbolic;
251   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
252   fP = P->ops->ptapsymbolic;
253   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
254   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
255 
256   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
257   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
258   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
259 
260   PetscFunctionReturn(0);
261 }
262 
263 typedef struct {
264   Mat    symAP;
265 } Mat_PtAPstruct;
266 
267 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
271 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
272 {
273   PetscErrorCode    ierr;
274   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
275 
276   PetscFunctionBegin;
277   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
278   ierr = PetscFree(ptap);CHKERRQ(ierr);
279   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
285 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
286 {
287   PetscErrorCode ierr;
288   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
289   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
290   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
291   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
292   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
293   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
294   MatScalar      *ca;
295   PetscBT        lnkbt;
296 
297   PetscFunctionBegin;
298   /* Get ij structure of P^T */
299   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
300   ptJ=ptj;
301 
302   /* Allocate ci array, arrays for fill computation and */
303   /* free space for accumulating nonzero column info */
304   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
305   ci[0] = 0;
306 
307   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
308   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
309   ptasparserow = ptadenserow  + an;
310 
311   /* create and initialize a linked list */
312   nlnk = pn+1;
313   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
314 
315   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
316   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
317   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
318   current_space = free_space;
319 
320   /* Determine symbolic info for each row of C: */
321   for (i=0;i<pn;i++) {
322     ptnzi  = pti[i+1] - pti[i];
323     ptanzi = 0;
324     /* Determine symbolic row of PtA: */
325     for (j=0;j<ptnzi;j++) {
326       arow = *ptJ++;
327       anzj = ai[arow+1] - ai[arow];
328       ajj  = aj + ai[arow];
329       for (k=0;k<anzj;k++) {
330         if (!ptadenserow[ajj[k]]) {
331           ptadenserow[ajj[k]]    = -1;
332           ptasparserow[ptanzi++] = ajj[k];
333         }
334       }
335     }
336       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
337     ptaj = ptasparserow;
338     cnzi   = 0;
339     for (j=0;j<ptanzi;j++) {
340       prow = *ptaj++;
341       pnzj = pi[prow+1] - pi[prow];
342       pjj  = pj + pi[prow];
343       /* add non-zero cols of P into the sorted linked list lnk */
344       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
345       cnzi += nlnk;
346     }
347 
348     /* If free space is not available, make more free space */
349     /* Double the amount of total space in the list */
350     if (current_space->local_remaining<cnzi) {
351       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
352     }
353 
354     /* Copy data into free space, and zero out denserows */
355     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
356     current_space->array           += cnzi;
357     current_space->local_used      += cnzi;
358     current_space->local_remaining -= cnzi;
359 
360     for (j=0;j<ptanzi;j++) {
361       ptadenserow[ptasparserow[j]] = 0;
362     }
363     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
364     /*        For now, we will recompute what is needed. */
365     ci[i+1] = ci[i] + cnzi;
366   }
367   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
368   /* Allocate space for cj, initialize cj, and */
369   /* destroy list of free space and other temporary array(s) */
370   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
371   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
372   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
374 
375   /* Allocate space for ca */
376   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
377   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
381 
382   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
383   /* Since these are PETSc arrays, change flags to free them as necessary. */
384   c = (Mat_SeqAIJ *)((*C)->data);
385   c->freedata = PETSC_TRUE;
386   c->nonew    = 0;
387 
388   /* Clean up. */
389   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 #include "src/mat/impls/maij/maij.h"
395 EXTERN_C_BEGIN
396 #undef __FUNCT__
397 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
398 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
399 {
400   /* This routine requires testing -- I don't think it works. */
401   PetscErrorCode ierr;
402   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
403   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
404   Mat            P=pp->AIJ;
405   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
406   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
407   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
408   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
409   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
410   MatScalar      *ca;
411 
412   PetscFunctionBegin;
413   /* Start timer */
414   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
415 
416   /* Get ij structure of P^T */
417   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
418 
419   /* Allocate ci array, arrays for fill computation and */
420   /* free space for accumulating nonzero column info */
421   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
422   ci[0] = 0;
423 
424   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
425   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
426   ptasparserow = ptadenserow  + an;
427   denserow     = ptasparserow + an;
428   sparserow    = denserow     + pn;
429 
430   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
431   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
432   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
433   current_space = free_space;
434 
435   /* Determine symbolic info for each row of C: */
436   for (i=0;i<pn/ppdof;i++) {
437     ptnzi  = pti[i+1] - pti[i];
438     ptanzi = 0;
439     ptJ    = ptj + pti[i];
440     for (dof=0;dof<ppdof;dof++) {
441     /* Determine symbolic row of PtA: */
442       for (j=0;j<ptnzi;j++) {
443         arow = ptJ[j] + dof;
444         anzj = ai[arow+1] - ai[arow];
445         ajj  = aj + ai[arow];
446         for (k=0;k<anzj;k++) {
447           if (!ptadenserow[ajj[k]]) {
448             ptadenserow[ajj[k]]    = -1;
449             ptasparserow[ptanzi++] = ajj[k];
450           }
451         }
452       }
453       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
454       ptaj = ptasparserow;
455       cnzi   = 0;
456       for (j=0;j<ptanzi;j++) {
457         pdof = *ptaj%dof;
458         prow = (*ptaj++)/dof;
459         pnzj = pi[prow+1] - pi[prow];
460         pjj  = pj + pi[prow];
461         for (k=0;k<pnzj;k++) {
462           if (!denserow[pjj[k]+pdof]) {
463             denserow[pjj[k]+pdof] = -1;
464             sparserow[cnzi++]     = pjj[k]+pdof;
465           }
466         }
467       }
468 
469       /* sort sparserow */
470       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
471 
472       /* If free space is not available, make more free space */
473       /* Double the amount of total space in the list */
474       if (current_space->local_remaining<cnzi) {
475         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
476       }
477 
478       /* Copy data into free space, and zero out denserows */
479       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
480       current_space->array           += cnzi;
481       current_space->local_used      += cnzi;
482       current_space->local_remaining -= cnzi;
483 
484       for (j=0;j<ptanzi;j++) {
485         ptadenserow[ptasparserow[j]] = 0;
486       }
487       for (j=0;j<cnzi;j++) {
488         denserow[sparserow[j]] = 0;
489       }
490       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
491       /*        For now, we will recompute what is needed. */
492       ci[i+1+dof] = ci[i+dof] + cnzi;
493     }
494   }
495   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
496   /* Allocate space for cj, initialize cj, and */
497   /* destroy list of free space and other temporary array(s) */
498   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
499   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
500   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
501 
502   /* Allocate space for ca */
503   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
504   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
505 
506   /* put together the new matrix */
507   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
508 
509   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
510   /* Since these are PETSc arrays, change flags to free them as necessary. */
511   c = (Mat_SeqAIJ *)((*C)->data);
512   c->freedata = PETSC_TRUE;
513   c->nonew    = 0;
514 
515   /* Clean up. */
516   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
517 
518   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 EXTERN_C_END
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatPtAPNumeric"
525 /*
526    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
527 
528    Collective on Mat
529 
530    Input Parameters:
531 +  A - the matrix
532 -  P - the projection matrix
533 
534    Output Parameters:
535 .  C - the product matrix
536 
537    Notes:
538    C must have been created by calling MatPtAPSymbolic and must be destroyed by
539    the user using MatDeatroy().
540 
541    This routine is currently only implemented for pairs of AIJ matrices and classes
542    which inherit from AIJ.  C will be of type MATAIJ.
543 
544    Level: intermediate
545 
546 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
547 */
548 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
549 {
550   PetscErrorCode ierr;
551   PetscErrorCode (*fA)(Mat,Mat,Mat);
552   PetscErrorCode (*fP)(Mat,Mat,Mat);
553 
554   PetscFunctionBegin;
555 
556   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
557   PetscValidType(A,1);
558   MatPreallocated(A);
559   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
560   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
561 
562   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
563   PetscValidType(P,2);
564   MatPreallocated(P);
565   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
566   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
567 
568   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
569   PetscValidType(C,3);
570   MatPreallocated(C);
571   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
572   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
573 
574   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
575   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
576   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
577   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
578 
579   /* For now, we do not dispatch based on the type of A and P */
580   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
581   fA = A->ops->ptapnumeric;
582   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
583   fP = P->ops->ptapnumeric;
584   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
585   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
586 
587   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
588   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
589   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
590 
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
596 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
597 {
598   PetscErrorCode ierr;
599   PetscInt       flops=0;
600   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
601   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
602   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
603   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
604   PetscInt       *ci=c->i,*cj=c->j,*cjj;
605   PetscInt       am=A->M,cn=C->N,cm=C->M;
606   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
607   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
608 
609   PetscFunctionBegin;
610   /* Allocate temporary array for storage of one row of A*P */
611   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
612   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
613 
614   apj      = (PetscInt *)(apa + cn);
615   apjdense = apj + cn;
616 
617   /* Clear old values in C */
618   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
619 
620   for (i=0;i<am;i++) {
621     /* Form sparse row of A*P */
622     anzi  = ai[i+1] - ai[i];
623     apnzj = 0;
624     for (j=0;j<anzi;j++) {
625       prow = *aj++;
626       pnzj = pi[prow+1] - pi[prow];
627       pjj  = pj + pi[prow];
628       paj  = pa + pi[prow];
629       for (k=0;k<pnzj;k++) {
630         if (!apjdense[pjj[k]]) {
631           apjdense[pjj[k]] = -1;
632           apj[apnzj++]     = pjj[k];
633         }
634         apa[pjj[k]] += (*aa)*paj[k];
635       }
636       flops += 2*pnzj;
637       aa++;
638     }
639 
640     /* Sort the j index array for quick sparse axpy. */
641     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
642 
643     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
644     pnzi = pi[i+1] - pi[i];
645     for (j=0;j<pnzi;j++) {
646       nextap = 0;
647       crow   = *pJ++;
648       cjj    = cj + ci[crow];
649       caj    = ca + ci[crow];
650       /* Perform sparse axpy operation.  Note cjj includes apj. */
651       for (k=0;nextap<apnzj;k++) {
652         if (cjj[k]==apj[nextap]) {
653           caj[k] += (*pA)*apa[apj[nextap++]];
654         }
655       }
656       flops += 2*apnzj;
657       pA++;
658     }
659 
660     /* Zero the current row info for A*P */
661     for (j=0;j<apnzj;j++) {
662       apa[apj[j]]      = 0.;
663       apjdense[apj[j]] = 0;
664     }
665   }
666 
667   /* Assemble the final matrix and clean up */
668   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
669   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
670   ierr = PetscFree(apa);CHKERRQ(ierr);
671   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
672 
673   PetscFunctionReturn(0);
674 }
675 
676 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
680 /*@C
681    MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices,
682                 used by MatPtAP_MPIAIJ_MPIAIJ()
683 
684    Collective on Mat
685 
686    Input Parameters:
687 +  A - the matrix in seqaij format
688 .  P - the projection matrix in seqaij format
689 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
690 .  fill - expected fill (not being used when scall==MAT_REUSE_MATRIX)
691 .  prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose
692 
693    Output Parameters:
694 .  C - the product matrix in seqaij format
695 
696    Level: developer
697 
698 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
699 @*/
700 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C)
701 {
702   PetscErrorCode ierr;
703   PetscInt       flops=0;
704   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
705   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
706   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
707   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense;
708   PetscInt       *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
709   PetscInt       *ci=c->i,*cj=c->j,*cjj;
710   PetscInt       am=A->M,cn=C->N,cm=C->M;
711   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
712   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
713 
714   PetscFunctionBegin;
715   pA=p->a+pi[prstart];
716   /* Allocate temporary array for storage of one row of A*P */
717   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
718   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
719 
720   apj      = (PetscInt *)(apa + cn);
721   apjdense = apj + cn;
722 
723   /* Clear old values in C */
724   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
725 
726   for (i=0;i<am;i++) {
727     /* Form sparse row of A*P */
728     anzi  = ai[i+1] - ai[i];
729     apnzj = 0;
730     for (j=0;j<anzi;j++) {
731       prow = *aj++;
732       pnzj = pi[prow+1] - pi[prow];
733       pjj  = pj + pi[prow];
734       paj  = pa + pi[prow];
735       for (k=0;k<pnzj;k++) {
736         if (!apjdense[pjj[k]]) {
737           apjdense[pjj[k]] = -1;
738           apj[apnzj++]     = pjj[k];
739         }
740         apa[pjj[k]] += (*aa)*paj[k];
741       }
742       flops += 2*pnzj;
743       aa++;
744     }
745 
746     /* Sort the j index array for quick sparse axpy. */
747     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
748 
749     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
750     pnzi = pi[i+1+prstart] - pi[i+prstart];
751     for (j=0;j<pnzi;j++) {
752       nextap = 0;
753       crow   = *pJ++;
754       cjj    = cj + ci[crow];
755       caj    = ca + ci[crow];
756       /* Perform sparse axpy operation.  Note cjj includes apj. */
757       for (k=0;nextap<apnzj;k++) {
758         if (cjj[k]==apj[nextap]) {
759           caj[k] += (*pA)*apa[apj[nextap++]];
760         }
761       }
762       flops += 2*apnzj;
763       pA++;
764     }
765 
766     /* Zero the current row info for A*P */
767     for (j=0;j<apnzj;j++) {
768       apa[apj[j]]      = 0.;
769       apjdense[apj[j]] = 0;
770     }
771   }
772 
773   /* Assemble the final matrix and clean up */
774   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
775   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
776   ierr = PetscFree(apa);CHKERRQ(ierr);
777   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
778 
779   int rank;
780   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
781   if (rank==1){
782     ierr = MatView(C,PETSC_VIEWER_STDOUT_SELF);
783   }
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
790 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
791 {
792   PetscErrorCode ierr;
793   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
794   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
795   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
796   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
797   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
798   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
799   PetscInt       m = prend - prstart,nlnk,*lnk;
800   MatScalar      *ca;
801   Mat            *psub,P_sub;
802   IS             isrow,iscol;
803   PetscBT        lnkbt;
804 
805   PetscFunctionBegin;
806   /* Get ij structure of P[rstart:rend,:]^T */
807   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
808   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
809   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
810   ierr = ISDestroy(isrow);CHKERRQ(ierr);
811   ierr = ISDestroy(iscol);CHKERRQ(ierr);
812   P_sub = psub[0];
813   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
815   ptJ=ptj;
816 
817   /* Allocate ci array, arrays for fill computation and */
818   /* free space for accumulating nonzero column info */
819   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
820   ci[0] = 0;
821 
822   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
823   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
824   ptasparserow = ptadenserow  + an;
825 
826   /* create and initialize a linked list */
827   nlnk = pn+1;
828   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
829 
830   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
831   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
832   ierr          = GetMoreSpace((PetscInt)(fill*ai[am]/pm)*pn,&free_space);
833   current_space = free_space;
834 
835   int rank;
836   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
837   /* printf(" [%d] Symbolic_SeqAIJ_SeqAIJ_ReducedPt, an: %d, am: %d, nnz: %d\n",rank,an,am,ai[am]);*/
838 
839   /* Determine symbolic info for each row of C: */
840   for (i=0;i<pn;i++) {
841     ptnzi  = pti[i+1] - pti[i];
842     ptanzi = 0;
843     /* Determine symbolic row of PtA_reduced: */
844     for (j=0;j<ptnzi;j++) {
845       arow = *ptJ++;
846       anzj = ai[arow+1] - ai[arow];
847       ajj  = aj + ai[arow];
848       for (k=0;k<anzj;k++) {
849         if (!ptadenserow[ajj[k]]) {
850           ptadenserow[ajj[k]]    = -1;
851           ptasparserow[ptanzi++] = ajj[k];
852         }
853       }
854     }
855     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
856     ptaj = ptasparserow;
857     cnzi   = 0;
858     for (j=0;j<ptanzi;j++) {
859       prow = *ptaj++;
860       pnzj = pi[prow+1] - pi[prow];
861       pjj  = pj + pi[prow];
862       /* add non-zero cols of P into the sorted linked list lnk */
863       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
864       cnzi += nlnk;
865     }
866 
867     /* If free space is not available, make more free space */
868     /* Double the amount of total space in the list */
869     if (current_space->local_remaining<cnzi) {
870       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
871     }
872 
873     /* Copy data into free space, and zero out denserows */
874     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
875     current_space->array           += cnzi;
876     current_space->local_used      += cnzi;
877     current_space->local_remaining -= cnzi;
878 
879     for (j=0;j<ptanzi;j++) {
880       ptadenserow[ptasparserow[j]] = 0;
881     }
882 
883     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
884     /*        For now, we will recompute what is needed. */
885     ci[i+1] = ci[i] + cnzi;
886   }
887   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
888   /* Allocate space for cj, initialize cj, and */
889   /* destroy list of free space and other temporary array(s) */
890   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
891   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
892   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
893   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
894 
895   /* Allocate space for ca */
896   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
897   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
898 
899   /* put together the new matrix */
900   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
901 
902   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
903   /* Since these are PETSc arrays, change flags to free them as necessary. */
904   c = (Mat_SeqAIJ *)((*C)->data);
905   c->freedata = PETSC_TRUE;
906   c->nonew    = 0;
907 
908   /* Clean up. */
909   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
910 
911   if (rank==1){
912     ierr = MatView((*C), PETSC_VIEWER_STDOUT_SELF);
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
919 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
920 {
921   PetscErrorCode ierr;
922 
923   PetscFunctionBegin;
924   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
925   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
926   if (scall == MAT_INITIAL_MATRIX){
927     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
928   }
929 
930   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
931 
932   PetscFunctionReturn(0);
933 }
934 
935 /* ----------- new ------------------ */
936 #undef __FUNCT__
937 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt"
938 PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C)
939 {
940   PetscErrorCode ierr;
941   PetscInt       flops=0;
942   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
943   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
944   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
945   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
946   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
947   PetscInt       *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
948   PetscInt       *ci=c->i,*cj=c->j,*cjj;
949   PetscInt       am=A->m,cn=C->N,cm=C->M;
950   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
951   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
952 
953   PetscFunctionBegin;
954   pA=p->a+pi[prstart];
955   /* Allocate temporary array for storage of one row of A*P */
956   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
957   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
958 
959   apj      = (PetscInt *)(apa + cn);
960   apjdense = apj + cn;
961 
962   /* Clear old values in C */
963   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
964 
965   for (i=0;i<am;i++) {
966     /* Form sparse row of A*P */
967     /* diagonal portion of A */
968     anzi  = adi[i+1] - adi[i];
969     apnzj = 0;
970     for (j=0;j<anzi;j++) {
971       prow = *adj + prstart;
972       adj++;
973       pnzj = pi[prow+1] - pi[prow];
974       pjj  = pj + pi[prow];
975       paj  = pa + pi[prow];
976       for (k=0;k<pnzj;k++) {
977         if (!apjdense[pjj[k]]) {
978           apjdense[pjj[k]] = -1;
979           apj[apnzj++]     = pjj[k];
980         }
981         apa[pjj[k]] += (*ada)*paj[k];
982       }
983       flops += 2*pnzj;
984       ada++;
985     }
986     /* off-diagonal portion of A */
987     anzi  = aoi[i+1] - aoi[i];
988     for (j=0;j<anzi;j++) {
989       col = a->garray[*aoj];
990       if (col < cstart){
991         prow = *aoj;
992       } else if (col >= cend){
993         prow = *aoj + prend-prstart;
994       } else {
995         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
996       }
997       aoj++;
998       pnzj = pi[prow+1] - pi[prow];
999       pjj  = pj + pi[prow];
1000       paj  = pa + pi[prow];
1001       for (k=0;k<pnzj;k++) {
1002         if (!apjdense[pjj[k]]) {
1003           apjdense[pjj[k]] = -1;
1004           apj[apnzj++]     = pjj[k];
1005         }
1006         apa[pjj[k]] += (*aoa)*paj[k];
1007       }
1008       flops += 2*pnzj;
1009       aoa++;
1010     }
1011 
1012     /* Sort the j index array for quick sparse axpy. */
1013     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1014 
1015     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
1016     pnzi = pi[i+1+prstart] - pi[i+prstart];
1017     for (j=0;j<pnzi;j++) {
1018       nextap = 0;
1019       crow   = *pJ++;
1020       cjj    = cj + ci[crow];
1021       caj    = ca + ci[crow];
1022       /* Perform sparse axpy operation.  Note cjj includes apj. */
1023       for (k=0;nextap<apnzj;k++) {
1024         if (cjj[k]==apj[nextap]) {
1025           caj[k] += (*pA)*apa[apj[nextap++]];
1026         }
1027       }
1028       flops += 2*apnzj;
1029       pA++;
1030     }
1031 
1032     /* Zero the current row info for A*P */
1033     for (j=0;j<apnzj;j++) {
1034       apa[apj[j]]      = 0.;
1035       apjdense[apj[j]] = 0;
1036     }
1037   }
1038 
1039   /* Assemble the final matrix and clean up */
1040   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042   ierr = PetscFree(apa);CHKERRQ(ierr);
1043   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1044 
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt"
1050 PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
1051 {
1052   PetscErrorCode ierr;
1053   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1054   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1055   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
1056   Mat_SeqAIJ     *p=(Mat_SeqAIJ*)P->data,*c;
1057   PetscInt       *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj;
1058   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1059   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
1060   PetscInt       an=A->N,am=A->m,pn=P->N,pm=P->M;
1061   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
1062   PetscInt       m=prend-prstart,nlnk,*lnk;
1063   MatScalar      *ca;
1064   Mat            *psub,P_sub;
1065   IS             isrow,iscol;
1066   PetscBT        lnkbt;
1067 
1068   PetscFunctionBegin;
1069 
1070   /* Get ij structure of P[rstart:rend,:]^T */
1071   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
1072   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
1073   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
1074   ierr = ISDestroy(isrow);CHKERRQ(ierr);
1075   ierr = ISDestroy(iscol);CHKERRQ(ierr);
1076   P_sub = psub[0];
1077   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
1078   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
1079   ptJ=ptj;
1080 
1081   /* Allocate ci array, arrays for fill computation and */
1082   /* free space for accumulating nonzero column info */
1083   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1084   ci[0] = 0;
1085 
1086   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
1087   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1088   ptasparserow = ptadenserow  + an;
1089 
1090   /* create and initialize a linked list */
1091   nlnk = pn+1;
1092   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1093 
1094   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1095   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1096   nnz           = adi[am] + aoi[am];
1097   ierr          = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space);
1098   current_space = free_space;
1099 
1100   /* Determine symbolic info for each row of C: */
1101   for (i=0;i<pn;i++) {
1102     ptnzi  = pti[i+1] - pti[i];
1103     ptanzi = 0;
1104     /* Determine symbolic row of PtA_reduced: */
1105     for (j=0;j<ptnzi;j++) {
1106       arow = *ptJ++;
1107       /* diagonal portion of A */
1108       anzj = adi[arow+1] - adi[arow];
1109       ajj  = adj + adi[arow];
1110       for (k=0;k<anzj;k++) {
1111         col = ajj[k]+prstart;
1112         if (!ptadenserow[col]) {
1113           ptadenserow[col]    = -1;
1114           ptasparserow[ptanzi++] = col;
1115         }
1116       }
1117       /* off-diagonal portion of A */
1118       anzj = aoi[arow+1] - aoi[arow];
1119       ajj  = aoj + aoi[arow];
1120       for (k=0;k<anzj;k++) {
1121         col = a->garray[ajj[k]];  /* global col */
1122         if (col < cstart){
1123           col = ajj[k];
1124         } else if (col >= cend){
1125           col = ajj[k] + m;
1126         } else {
1127           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1128         }
1129         if (!ptadenserow[col]) {
1130           ptadenserow[col]    = -1;
1131           ptasparserow[ptanzi++] = col;
1132         }
1133       }
1134     }
1135     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1136     ptaj = ptasparserow;
1137     cnzi   = 0;
1138     for (j=0;j<ptanzi;j++) {
1139       prow = *ptaj++;
1140       pnzj = pi[prow+1] - pi[prow];
1141       pjj  = pj + pi[prow];
1142       /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */
1143       /* add non-zero cols of P into the sorted linked list lnk */
1144       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1145       cnzi += nlnk;
1146     }
1147 
1148     /* If free space is not available, make more free space */
1149     /* Double the amount of total space in the list */
1150     if (current_space->local_remaining<cnzi) {
1151       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1152     }
1153 
1154     /* Copy data into free space, and zero out denserows */
1155     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1156     current_space->array           += cnzi;
1157     current_space->local_used      += cnzi;
1158     current_space->local_remaining -= cnzi;
1159 
1160     for (j=0;j<ptanzi;j++) {
1161       ptadenserow[ptasparserow[j]] = 0;
1162     }
1163 
1164     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1165     /*        For now, we will recompute what is needed. */
1166     ci[i+1] = ci[i] + cnzi;
1167   }
1168   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1169   /* Allocate space for cj, initialize cj, and */
1170   /* destroy list of free space and other temporary array(s) */
1171   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1172   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1173   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
1174   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1175 
1176   /* Allocate space for ca */
1177   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1178   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1179 
1180   /* put together the new matrix */
1181   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
1182 
1183   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1184   /* Since these are PETSc arrays, change flags to free them as necessary. */
1185   c = (Mat_SeqAIJ *)((*C)->data);
1186   c->freedata = PETSC_TRUE;
1187   c->nonew    = 0;
1188 
1189   /* Clean up. */
1190   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 PetscEvent PtAP_ReducedPt = 0;
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatPtAP_MPIAIJ_SeqAIJ_ReducedPt"
1198 PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
1199 {
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
1204   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
1205   ierr = PetscLogEventRegister(&PtAP_ReducedPt,"MatPtAP_ReducedPt",MAT_COOKIE);
1206   PetscLogEventBegin(PtAP_ReducedPt,A,P,0,0);CHKERRQ(ierr);
1207 
1208   if (scall == MAT_INITIAL_MATRIX){
1209     ierr = MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
1210   }
1211   ierr = MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
1212   ierr = PetscLogEventEnd(PtAP_ReducedPt,A,P,0,0);CHKERRQ(ierr);
1213 
1214   PetscFunctionReturn(0);
1215 }
1216 
1217