xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision e462e02cad27a364f9591204d05339cc17342307)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*);
75 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*);
76 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat);
77 
78 EXTERN PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*);
79 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*);
80 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat);
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
84 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
85 {
86   PetscErrorCode    ierr;
87 
88   PetscFunctionBegin;
89   if (scall == MAT_INITIAL_MATRIX){
90     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
91     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
92     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
93   } else if (scall == MAT_REUSE_MATRIX){
94     ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
95     ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
96     ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
97   } else {
98     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
105 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
106 {
107   PetscErrorCode    ierr;
108   Mat               P_seq,C_seq; /* A_loc */
109   PetscInt          prstart,prend,m=P->m;
110   /* IS                isrowp,iscolp; */
111 
112   PetscFunctionBegin;
113   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
114   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
115   /*
116   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
117   ierr = ISDestroy(iscolp);CHKERRQ(ierr); */
118 
119   /* get A_loc = submatrix of A by taking all local rows of A */
120   /*
121   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
122   */
123   /* ierr = ISDestroy(isrowp);CHKERRQ(ierr); */
124 
125   /* compute C_seq = P_loc^T * A_loc * P_seq */
126   prend   = prstart + m;
127   ierr = MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
128   /*
129   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
130   */
131   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
132   /* ierr = MatDestroy(A_loc);CHKERRQ(ierr); */
133 
134   /* add C_seq into mpi C */
135   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
136 
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
142 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
143 {
144   PetscErrorCode       ierr;
145   Mat                  P_seq,A_loc,C_seq;
146   PetscInt             prstart,prend,m=P->m;
147   IS                   isrowp,iscolp;
148   Mat_Merge_SeqsToMPI  *merge;
149   PetscObjectContainer container;
150 
151   PetscFunctionBegin;
152   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
153   if (container) {
154     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
155   } else {
156     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container");
157   }
158 
159   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
160   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
161   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
162 
163   /* get A_loc = submatrix of A by taking all local rows of A */
164   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
165   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
166 
167   /* compute C_seq = P_loc^T * A_loc * P_seq */
168   prend = prstart + m;
169   C_seq = merge->C_seq;
170   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr);
171   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
172   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
173 
174   /* add C_seq into mpi C */
175   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
176 
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
182 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
183 {
184   PetscErrorCode ierr;
185 
186   PetscFunctionBegin;
187   if (scall == MAT_INITIAL_MATRIX){
188     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
189     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
190     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
191   }
192   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
193   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
194   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatPtAPSymbolic"
200 /*
201    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
202 
203    Collective on Mat
204 
205    Input Parameters:
206 +  A - the matrix
207 -  P - the projection matrix
208 
209    Output Parameters:
210 .  C - the (i,j) structure of the product matrix
211 
212    Notes:
213    C will be created and must be destroyed by the user with MatDestroy().
214 
215    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
216    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
217    this (i,j) structure by calling MatPtAPNumeric().
218 
219    Level: intermediate
220 
221 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
222 */
223 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
224 {
225   PetscErrorCode ierr;
226   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
227   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
228 
229   PetscFunctionBegin;
230 
231   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
232   PetscValidType(A,1);
233   MatPreallocated(A);
234   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
235   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
236 
237   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
238   PetscValidType(P,2);
239   MatPreallocated(P);
240   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
241   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
242 
243   PetscValidPointer(C,3);
244 
245   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
246   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
247 
248   /* For now, we do not dispatch based on the type of A and P */
249   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
250   fA = A->ops->ptapsymbolic;
251   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
252   fP = P->ops->ptapsymbolic;
253   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
254   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
255 
256   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
257   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
258   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
259 
260   PetscFunctionReturn(0);
261 }
262 
263 typedef struct {
264   Mat    symAP;
265 } Mat_PtAPstruct;
266 
267 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
271 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
272 {
273   PetscErrorCode    ierr;
274   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
275 
276   PetscFunctionBegin;
277   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
278   ierr = PetscFree(ptap);CHKERRQ(ierr);
279   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
285 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
286 {
287   PetscErrorCode ierr;
288   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
289   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
290   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
291   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
292   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
293   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
294   MatScalar      *ca;
295   PetscBT        lnkbt;
296 
297   PetscFunctionBegin;
298   /* Get ij structure of P^T */
299   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
300   ptJ=ptj;
301 
302   /* Allocate ci array, arrays for fill computation and */
303   /* free space for accumulating nonzero column info */
304   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
305   ci[0] = 0;
306 
307   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
308   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
309   ptasparserow = ptadenserow  + an;
310 
311   /* create and initialize a linked list */
312   nlnk = pn+1;
313   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
314 
315   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
316   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
317   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
318   current_space = free_space;
319 
320   /* Determine symbolic info for each row of C: */
321   for (i=0;i<pn;i++) {
322     ptnzi  = pti[i+1] - pti[i];
323     ptanzi = 0;
324     /* Determine symbolic row of PtA: */
325     for (j=0;j<ptnzi;j++) {
326       arow = *ptJ++;
327       anzj = ai[arow+1] - ai[arow];
328       ajj  = aj + ai[arow];
329       for (k=0;k<anzj;k++) {
330         if (!ptadenserow[ajj[k]]) {
331           ptadenserow[ajj[k]]    = -1;
332           ptasparserow[ptanzi++] = ajj[k];
333         }
334       }
335     }
336       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
337     ptaj = ptasparserow;
338     cnzi   = 0;
339     for (j=0;j<ptanzi;j++) {
340       prow = *ptaj++;
341       pnzj = pi[prow+1] - pi[prow];
342       pjj  = pj + pi[prow];
343       /* add non-zero cols of P into the sorted linked list lnk */
344       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
345       cnzi += nlnk;
346     }
347 
348     /* If free space is not available, make more free space */
349     /* Double the amount of total space in the list */
350     if (current_space->local_remaining<cnzi) {
351       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
352     }
353 
354     /* Copy data into free space, and zero out denserows */
355     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
356     current_space->array           += cnzi;
357     current_space->local_used      += cnzi;
358     current_space->local_remaining -= cnzi;
359 
360     for (j=0;j<ptanzi;j++) {
361       ptadenserow[ptasparserow[j]] = 0;
362     }
363     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
364     /*        For now, we will recompute what is needed. */
365     ci[i+1] = ci[i] + cnzi;
366   }
367   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
368   /* Allocate space for cj, initialize cj, and */
369   /* destroy list of free space and other temporary array(s) */
370   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
371   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
372   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
374 
375   /* Allocate space for ca */
376   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
377   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
378 
379   /* put together the new matrix */
380   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
381 
382   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
383   /* Since these are PETSc arrays, change flags to free them as necessary. */
384   c = (Mat_SeqAIJ *)((*C)->data);
385   c->freedata = PETSC_TRUE;
386   c->nonew    = 0;
387 
388   /* Clean up. */
389   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 #include "src/mat/impls/maij/maij.h"
395 EXTERN_C_BEGIN
396 #undef __FUNCT__
397 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
398 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
399 {
400   /* This routine requires testing -- I don't think it works. */
401   PetscErrorCode ierr;
402   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
403   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
404   Mat            P=pp->AIJ;
405   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
406   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
407   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
408   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
409   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
410   MatScalar      *ca;
411 
412   PetscFunctionBegin;
413   /* Start timer */
414   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
415 
416   /* Get ij structure of P^T */
417   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
418 
419   /* Allocate ci array, arrays for fill computation and */
420   /* free space for accumulating nonzero column info */
421   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
422   ci[0] = 0;
423 
424   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
425   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
426   ptasparserow = ptadenserow  + an;
427   denserow     = ptasparserow + an;
428   sparserow    = denserow     + pn;
429 
430   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
431   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
432   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
433   current_space = free_space;
434 
435   /* Determine symbolic info for each row of C: */
436   for (i=0;i<pn/ppdof;i++) {
437     ptnzi  = pti[i+1] - pti[i];
438     ptanzi = 0;
439     ptJ    = ptj + pti[i];
440     for (dof=0;dof<ppdof;dof++) {
441     /* Determine symbolic row of PtA: */
442       for (j=0;j<ptnzi;j++) {
443         arow = ptJ[j] + dof;
444         anzj = ai[arow+1] - ai[arow];
445         ajj  = aj + ai[arow];
446         for (k=0;k<anzj;k++) {
447           if (!ptadenserow[ajj[k]]) {
448             ptadenserow[ajj[k]]    = -1;
449             ptasparserow[ptanzi++] = ajj[k];
450           }
451         }
452       }
453       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
454       ptaj = ptasparserow;
455       cnzi   = 0;
456       for (j=0;j<ptanzi;j++) {
457         pdof = *ptaj%dof;
458         prow = (*ptaj++)/dof;
459         pnzj = pi[prow+1] - pi[prow];
460         pjj  = pj + pi[prow];
461         for (k=0;k<pnzj;k++) {
462           if (!denserow[pjj[k]+pdof]) {
463             denserow[pjj[k]+pdof] = -1;
464             sparserow[cnzi++]     = pjj[k]+pdof;
465           }
466         }
467       }
468 
469       /* sort sparserow */
470       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
471 
472       /* If free space is not available, make more free space */
473       /* Double the amount of total space in the list */
474       if (current_space->local_remaining<cnzi) {
475         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
476       }
477 
478       /* Copy data into free space, and zero out denserows */
479       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
480       current_space->array           += cnzi;
481       current_space->local_used      += cnzi;
482       current_space->local_remaining -= cnzi;
483 
484       for (j=0;j<ptanzi;j++) {
485         ptadenserow[ptasparserow[j]] = 0;
486       }
487       for (j=0;j<cnzi;j++) {
488         denserow[sparserow[j]] = 0;
489       }
490       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
491       /*        For now, we will recompute what is needed. */
492       ci[i+1+dof] = ci[i+dof] + cnzi;
493     }
494   }
495   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
496   /* Allocate space for cj, initialize cj, and */
497   /* destroy list of free space and other temporary array(s) */
498   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
499   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
500   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
501 
502   /* Allocate space for ca */
503   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
504   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
505 
506   /* put together the new matrix */
507   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
508 
509   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
510   /* Since these are PETSc arrays, change flags to free them as necessary. */
511   c = (Mat_SeqAIJ *)((*C)->data);
512   c->freedata = PETSC_TRUE;
513   c->nonew    = 0;
514 
515   /* Clean up. */
516   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
517 
518   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 EXTERN_C_END
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatPtAPNumeric"
525 /*
526    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
527 
528    Collective on Mat
529 
530    Input Parameters:
531 +  A - the matrix
532 -  P - the projection matrix
533 
534    Output Parameters:
535 .  C - the product matrix
536 
537    Notes:
538    C must have been created by calling MatPtAPSymbolic and must be destroyed by
539    the user using MatDeatroy().
540 
541    This routine is currently only implemented for pairs of AIJ matrices and classes
542    which inherit from AIJ.  C will be of type MATAIJ.
543 
544    Level: intermediate
545 
546 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
547 */
548 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
549 {
550   PetscErrorCode ierr;
551   PetscErrorCode (*fA)(Mat,Mat,Mat);
552   PetscErrorCode (*fP)(Mat,Mat,Mat);
553 
554   PetscFunctionBegin;
555 
556   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
557   PetscValidType(A,1);
558   MatPreallocated(A);
559   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
560   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
561 
562   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
563   PetscValidType(P,2);
564   MatPreallocated(P);
565   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
566   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
567 
568   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
569   PetscValidType(C,3);
570   MatPreallocated(C);
571   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
572   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
573 
574   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
575   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
576   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
577   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
578 
579   /* For now, we do not dispatch based on the type of A and P */
580   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
581   fA = A->ops->ptapnumeric;
582   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
583   fP = P->ops->ptapnumeric;
584   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
585   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
586 
587   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
588   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
589   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
590 
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
596 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
597 {
598   PetscErrorCode ierr;
599   PetscInt       flops=0;
600   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
601   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
602   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
603   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
604   PetscInt       *ci=c->i,*cj=c->j,*cjj;
605   PetscInt       am=A->M,cn=C->N,cm=C->M;
606   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
607   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
608 
609   PetscFunctionBegin;
610   /* Allocate temporary array for storage of one row of A*P */
611   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
612   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
613 
614   apj      = (PetscInt *)(apa + cn);
615   apjdense = apj + cn;
616 
617   /* Clear old values in C */
618   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
619 
620   for (i=0;i<am;i++) {
621     /* Form sparse row of A*P */
622     anzi  = ai[i+1] - ai[i];
623     apnzj = 0;
624     for (j=0;j<anzi;j++) {
625       prow = *aj++;
626       pnzj = pi[prow+1] - pi[prow];
627       pjj  = pj + pi[prow];
628       paj  = pa + pi[prow];
629       for (k=0;k<pnzj;k++) {
630         if (!apjdense[pjj[k]]) {
631           apjdense[pjj[k]] = -1;
632           apj[apnzj++]     = pjj[k];
633         }
634         apa[pjj[k]] += (*aa)*paj[k];
635       }
636       flops += 2*pnzj;
637       aa++;
638     }
639 
640     /* Sort the j index array for quick sparse axpy. */
641     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
642 
643     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
644     pnzi = pi[i+1] - pi[i];
645     for (j=0;j<pnzi;j++) {
646       nextap = 0;
647       crow   = *pJ++;
648       cjj    = cj + ci[crow];
649       caj    = ca + ci[crow];
650       /* Perform sparse axpy operation.  Note cjj includes apj. */
651       for (k=0;nextap<apnzj;k++) {
652         if (cjj[k]==apj[nextap]) {
653           caj[k] += (*pA)*apa[apj[nextap++]];
654         }
655       }
656       flops += 2*apnzj;
657       pA++;
658     }
659 
660     /* Zero the current row info for A*P */
661     for (j=0;j<apnzj;j++) {
662       apa[apj[j]]      = 0.;
663       apjdense[apj[j]] = 0;
664     }
665   }
666 
667   /* Assemble the final matrix and clean up */
668   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
669   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
670   ierr = PetscFree(apa);CHKERRQ(ierr);
671   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
672 
673   PetscFunctionReturn(0);
674 }
675 
676 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
680 /*@C
681    MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices,
682                 used by MatPtAP_MPIAIJ_MPIAIJ()
683 
684    Collective on Mat
685 
686    Input Parameters:
687 +  A - the matrix in seqaij format
688 .  P - the projection matrix in seqaij format
689 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
690 .  fill - expected fill (not being used when scall==MAT_REUSE_MATRIX)
691 .  prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose
692 
693    Output Parameters:
694 .  C - the product matrix in seqaij format
695 
696    Level: developer
697 
698 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
699 @*/
700 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C)
701 {
702   PetscErrorCode ierr;
703   PetscInt       flops=0;
704   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
705   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
706   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
707   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense;
708   PetscInt       *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
709   PetscInt       *ci=c->i,*cj=c->j,*cjj;
710   PetscInt       am=A->M,cn=C->N,cm=C->M;
711   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
712   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
713 
714   PetscFunctionBegin;
715   pA=p->a+pi[prstart];
716   /* Allocate temporary array for storage of one row of A*P */
717   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
718   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
719 
720   apj      = (PetscInt *)(apa + cn);
721   apjdense = apj + cn;
722 
723   /* Clear old values in C */
724   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
725 
726   for (i=0;i<am;i++) {
727     /* Form sparse row of A*P */
728     anzi  = ai[i+1] - ai[i];
729     apnzj = 0;
730     for (j=0;j<anzi;j++) {
731       prow = *aj++;
732       pnzj = pi[prow+1] - pi[prow];
733       pjj  = pj + pi[prow];
734       paj  = pa + pi[prow];
735       for (k=0;k<pnzj;k++) {
736         if (!apjdense[pjj[k]]) {
737           apjdense[pjj[k]] = -1;
738           apj[apnzj++]     = pjj[k];
739         }
740         apa[pjj[k]] += (*aa)*paj[k];
741       }
742       flops += 2*pnzj;
743       aa++;
744     }
745 
746     /* Sort the j index array for quick sparse axpy. */
747     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
748 
749     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
750     pnzi = pi[i+1+prstart] - pi[i+prstart];
751     for (j=0;j<pnzi;j++) {
752       nextap = 0;
753       crow   = *pJ++;
754       cjj    = cj + ci[crow];
755       caj    = ca + ci[crow];
756       /* Perform sparse axpy operation.  Note cjj includes apj. */
757       for (k=0;nextap<apnzj;k++) {
758         if (cjj[k]==apj[nextap]) {
759           caj[k] += (*pA)*apa[apj[nextap++]];
760         }
761       }
762       flops += 2*apnzj;
763       pA++;
764     }
765 
766     /* Zero the current row info for A*P */
767     for (j=0;j<apnzj;j++) {
768       apa[apj[j]]      = 0.;
769       apjdense[apj[j]] = 0;
770     }
771   }
772 
773   /* Assemble the final matrix and clean up */
774   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
775   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
776   ierr = PetscFree(apa);CHKERRQ(ierr);
777   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
778 
779   int rank;
780   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
781   if (rank==1){
782     ierr = MatView(C,PETSC_VIEWER_STDOUT_SELF);
783   }
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
790 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
791 {
792   PetscErrorCode ierr;
793   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
794   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
795   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
796   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
797   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
798   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
799   PetscInt       m = prend - prstart,nlnk,*lnk;
800   MatScalar      *ca;
801   Mat            *psub,P_sub;
802   IS             isrow,iscol;
803   PetscBT        lnkbt;
804 
805   PetscFunctionBegin;
806   /* Get ij structure of P[rstart:rend,:]^T */
807   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
808   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
809   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
810   ierr = ISDestroy(isrow);CHKERRQ(ierr);
811   ierr = ISDestroy(iscol);CHKERRQ(ierr);
812   P_sub = psub[0];
813   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
815   ptJ=ptj;
816 
817   /* Allocate ci array, arrays for fill computation and */
818   /* free space for accumulating nonzero column info */
819   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
820   ci[0] = 0;
821 
822   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
823   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
824   ptasparserow = ptadenserow  + an;
825 
826   /* create and initialize a linked list */
827   nlnk = pn+1;
828   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
829 
830   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
831   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
832   ierr          = GetMoreSpace((PetscInt)(fill*ai[am]/pm)*pn,&free_space);
833   current_space = free_space;
834 
835   int rank;
836   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
837   /* printf(" [%d] Symbolic_SeqAIJ_SeqAIJ_ReducedPt, an: %d, am: %d, nnz: %d\n",rank,an,am,ai[am]);*/
838 
839   /* Determine symbolic info for each row of C: */
840   for (i=0;i<pn;i++) {
841     ptnzi  = pti[i+1] - pti[i];
842     ptanzi = 0;
843     /* Determine symbolic row of PtA_reduced: */
844     for (j=0;j<ptnzi;j++) {
845       arow = *ptJ++;
846       anzj = ai[arow+1] - ai[arow];
847       ajj  = aj + ai[arow];
848       for (k=0;k<anzj;k++) {
849         if (!ptadenserow[ajj[k]]) {
850           ptadenserow[ajj[k]]    = -1;
851           ptasparserow[ptanzi++] = ajj[k];
852         }
853       }
854     }
855     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
856     ptaj = ptasparserow;
857     cnzi   = 0;
858     for (j=0;j<ptanzi;j++) {
859       prow = *ptaj++;
860       pnzj = pi[prow+1] - pi[prow];
861       pjj  = pj + pi[prow];
862       /* add non-zero cols of P into the sorted linked list lnk */
863       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
864       cnzi += nlnk;
865     }
866 
867     /* If free space is not available, make more free space */
868     /* Double the amount of total space in the list */
869     if (current_space->local_remaining<cnzi) {
870       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
871     }
872 
873     /* Copy data into free space, and zero out denserows */
874     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
875     current_space->array           += cnzi;
876     current_space->local_used      += cnzi;
877     current_space->local_remaining -= cnzi;
878 
879     for (j=0;j<ptanzi;j++) {
880       ptadenserow[ptasparserow[j]] = 0;
881     }
882 
883     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
884     /*        For now, we will recompute what is needed. */
885     ci[i+1] = ci[i] + cnzi;
886   }
887   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
888   /* Allocate space for cj, initialize cj, and */
889   /* destroy list of free space and other temporary array(s) */
890   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
891   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
892   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
893   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
894 
895   /* Allocate space for ca */
896   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
897   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
898 
899   /* put together the new matrix */
900   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
901 
902   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
903   /* Since these are PETSc arrays, change flags to free them as necessary. */
904   c = (Mat_SeqAIJ *)((*C)->data);
905   c->freedata = PETSC_TRUE;
906   c->nonew    = 0;
907 
908   /* Clean up. */
909   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
910 
911   if (rank==1){
912     ierr = MatView((*C), PETSC_VIEWER_STDOUT_SELF);
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
919 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
920 {
921   PetscErrorCode ierr;
922 
923   PetscFunctionBegin;
924   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
925   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
926   if (scall == MAT_INITIAL_MATRIX){
927     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
928   }
929 
930   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
931 
932   PetscFunctionReturn(0);
933 }
934 
935 /* ----------- new ------------------ */
936 #undef __FUNCT__
937 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt"
938 PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C)
939 {
940   PetscErrorCode ierr;
941   PetscInt       flops=0;
942   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
943   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
944   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
945   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
946   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
947   PetscInt       *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
948   PetscInt       *ci=c->i,*cj=c->j,*cjj;
949   PetscInt       am=A->m,cn=C->N,cm=C->M;
950   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
951   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
952 
953   PetscFunctionBegin;
954   pA=p->a+pi[prstart];
955   /* Allocate temporary array for storage of one row of A*P */
956   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
957   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
958 
959   apj      = (PetscInt *)(apa + cn);
960   apjdense = apj + cn;
961 
962   /* Clear old values in C */
963   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
964 
965   for (i=0;i<am;i++) {
966     /* Form sparse row of A*P */
967     /* diagonal portion of A */
968     anzi  = adi[i+1] - adi[i];
969     apnzj = 0;
970     for (j=0;j<anzi;j++) {
971       prow = *adj + prstart;
972       adj++;
973       pnzj = pi[prow+1] - pi[prow];
974       pjj  = pj + pi[prow];
975       paj  = pa + pi[prow];
976       for (k=0;k<pnzj;k++) {
977         if (!apjdense[pjj[k]]) {
978           apjdense[pjj[k]] = -1;
979           apj[apnzj++]     = pjj[k];
980         }
981         apa[pjj[k]] += (*ada)*paj[k];
982       }
983       flops += 2*pnzj;
984       ada++;
985     }
986     /* off-diagonal portion of A */
987     anzi  = aoi[i+1] - aoi[i];
988     for (j=0;j<anzi;j++) {
989       col = a->garray[*aoj];
990       if (col < cstart){
991         prow = *aoj;
992       } else if (col >= cend){
993         prow = *aoj + prend-prstart;
994       } else {
995         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
996       }
997       aoj++;
998       pnzj = pi[prow+1] - pi[prow];
999       pjj  = pj + pi[prow];
1000       paj  = pa + pi[prow];
1001       for (k=0;k<pnzj;k++) {
1002         if (!apjdense[pjj[k]]) {
1003           apjdense[pjj[k]] = -1;
1004           apj[apnzj++]     = pjj[k];
1005         }
1006         apa[pjj[k]] += (*aoa)*paj[k];
1007       }
1008       flops += 2*pnzj;
1009       aoa++;
1010     }
1011 
1012     /* Sort the j index array for quick sparse axpy. */
1013     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1014 
1015     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
1016     pnzi = pi[i+1+prstart] - pi[i+prstart];
1017     for (j=0;j<pnzi;j++) {
1018       nextap = 0;
1019       crow   = *pJ++;
1020       cjj    = cj + ci[crow];
1021       caj    = ca + ci[crow];
1022       /* Perform sparse axpy operation.  Note cjj includes apj. */
1023       for (k=0;nextap<apnzj;k++) {
1024         if (cjj[k]==apj[nextap]) {
1025           caj[k] += (*pA)*apa[apj[nextap++]];
1026         }
1027       }
1028       flops += 2*apnzj;
1029       pA++;
1030     }
1031 
1032     /* Zero the current row info for A*P */
1033     for (j=0;j<apnzj;j++) {
1034       apa[apj[j]]      = 0.;
1035       apjdense[apj[j]] = 0;
1036     }
1037   }
1038 
1039   /* Assemble the final matrix and clean up */
1040   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042   ierr = PetscFree(apa);CHKERRQ(ierr);
1043   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1044 
1045   PetscFunctionReturn(0);
1046 }
1047 static PetscEvent logkey_getsymtransreduced = 0;
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatGetSymbolicTransposeReduced_SeqIJ"
1050 PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[])
1051 {
1052   PetscErrorCode ierr;
1053   PetscInt       i,j,anzj;
1054   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)A->data;
1055   PetscInt       an=A->N,am=A->M;
1056   PetscInt       *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
1057 
1058   PetscFunctionBegin;
1059 
1060   ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr);
1061 
1062   /* Set up timers */
1063   if (!logkey_getsymtransreduced) {
1064     ierr = PetscLogEventRegister(&logkey_getsymtransreduced,"MatGetSymbolicTransposeReduced",MAT_COOKIE);CHKERRQ(ierr);
1065   }
1066   ierr = PetscLogEventBegin(logkey_getsymtransreduced,A,0,0,0);CHKERRQ(ierr);
1067 
1068   /* Allocate space for symbolic transpose info and work array */
1069   ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr);
1070   anzj = ai[rend] - ai[rstart];
1071   ierr = PetscMalloc((anzj+1)*sizeof(PetscInt),&atj);CHKERRQ(ierr);
1072   ierr = PetscMalloc((an+1)*sizeof(PetscInt),&atfill);CHKERRQ(ierr);
1073   ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1074 
1075   /* Walk through aj and count ## of non-zeros in each row of A^T. */
1076   /* Note: offset by 1 for fast conversion into csr format. */
1077   for (i=ai[rstart]; i<ai[rend]; i++) {
1078     ati[aj[i]+1] += 1;
1079   }
1080   /* Form ati for csr format of A^T. */
1081   for (i=0;i<an;i++) {
1082     ati[i+1] += ati[i];
1083   }
1084 
1085   /* Copy ati into atfill so we have locations of the next free space in atj */
1086   ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr);
1087 
1088   /* Walk through A row-wise and mark nonzero entries of A^T. */
1089   aj = aj + ai[rstart];
1090   for (i=rstart; i<rend; i++) {
1091     anzj = ai[i+1] - ai[i];
1092     for (j=0;j<anzj;j++) {
1093       atj[atfill[*aj]] = i-rstart;
1094       atfill[*aj++]   += 1;
1095     }
1096   }
1097 
1098   /* Clean up temporary space and complete requests. */
1099   ierr = PetscFree(atfill);CHKERRQ(ierr);
1100   *Ati = ati;
1101   *Atj = atj;
1102 
1103   ierr = PetscLogEventEnd(logkey_getsymtransreduced,A,0,0,0);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt"
1109 PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
1110 {
1111   PetscErrorCode ierr;
1112   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1113   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1114   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
1115   Mat_SeqAIJ     *p=(Mat_SeqAIJ*)P->data,*c;
1116   PetscInt       *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj;
1117   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1118   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
1119   PetscInt       an=A->N,am=A->m,pn=P->N,pm=P->M;
1120   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
1121   PetscInt       m=prend-prstart,nlnk,*lnk;
1122   MatScalar      *ca;
1123   PetscBT        lnkbt;
1124 
1125   PetscFunctionBegin;
1126 
1127   /* Get ij structure of P[rstart:rend,:]^T */
1128   ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr);
1129   ptJ=ptj;
1130 
1131   /* Allocate ci array, arrays for fill computation and */
1132   /* free space for accumulating nonzero column info */
1133   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1134   ci[0] = 0;
1135 
1136   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
1137   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1138   ptasparserow = ptadenserow  + an;
1139 
1140   /* create and initialize a linked list */
1141   nlnk = pn+1;
1142   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1143 
1144   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1145   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1146   nnz           = adi[am] + aoi[am];
1147   ierr          = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space);
1148   current_space = free_space;
1149 
1150   /* Determine symbolic info for each row of C: */
1151   for (i=0;i<pn;i++) {
1152     ptnzi  = pti[i+1] - pti[i];
1153     ptanzi = 0;
1154     /* Determine symbolic row of PtA_reduced: */
1155     for (j=0;j<ptnzi;j++) {
1156       arow = *ptJ++;
1157       /* diagonal portion of A */
1158       anzj = adi[arow+1] - adi[arow];
1159       ajj  = adj + adi[arow];
1160       for (k=0;k<anzj;k++) {
1161         col = ajj[k]+prstart;
1162         if (!ptadenserow[col]) {
1163           ptadenserow[col]    = -1;
1164           ptasparserow[ptanzi++] = col;
1165         }
1166       }
1167       /* off-diagonal portion of A */
1168       anzj = aoi[arow+1] - aoi[arow];
1169       ajj  = aoj + aoi[arow];
1170       for (k=0;k<anzj;k++) {
1171         col = a->garray[ajj[k]];  /* global col */
1172         if (col < cstart){
1173           col = ajj[k];
1174         } else if (col >= cend){
1175           col = ajj[k] + m;
1176         } else {
1177           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1178         }
1179         if (!ptadenserow[col]) {
1180           ptadenserow[col]    = -1;
1181           ptasparserow[ptanzi++] = col;
1182         }
1183       }
1184     }
1185     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1186     ptaj = ptasparserow;
1187     cnzi   = 0;
1188     for (j=0;j<ptanzi;j++) {
1189       prow = *ptaj++;
1190       pnzj = pi[prow+1] - pi[prow];
1191       pjj  = pj + pi[prow];
1192       /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */
1193       /* add non-zero cols of P into the sorted linked list lnk */
1194       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1195       cnzi += nlnk;
1196     }
1197 
1198     /* If free space is not available, make more free space */
1199     /* Double the amount of total space in the list */
1200     if (current_space->local_remaining<cnzi) {
1201       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1202     }
1203 
1204     /* Copy data into free space, and zero out denserows */
1205     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1206     current_space->array           += cnzi;
1207     current_space->local_used      += cnzi;
1208     current_space->local_remaining -= cnzi;
1209 
1210     for (j=0;j<ptanzi;j++) {
1211       ptadenserow[ptasparserow[j]] = 0;
1212     }
1213 
1214     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1215     /*        For now, we will recompute what is needed. */
1216     ci[i+1] = ci[i] + cnzi;
1217   }
1218   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1219   /* Allocate space for cj, initialize cj, and */
1220   /* destroy list of free space and other temporary array(s) */
1221   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1222   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1223   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
1224   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1225 
1226   /* Allocate space for ca */
1227   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1228   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1229 
1230   /* put together the new matrix */
1231   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
1232 
1233   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1234   /* Since these are PETSc arrays, change flags to free them as necessary. */
1235   c = (Mat_SeqAIJ *)((*C)->data);
1236   c->freedata = PETSC_TRUE;
1237   c->nonew    = 0;
1238 
1239   /* Clean up. */
1240   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
1241 
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 static PetscEvent logkey_PtAPReducedPt = 0;
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatPtAP_MPIAIJ_SeqAIJ_ReducedPt"
1248 PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
1249 {
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
1254   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
1255   if (!logkey_PtAPReducedPt) {
1256     ierr = PetscLogEventRegister(&logkey_PtAPReducedPt,"MatPtAP_ReducedPt",MAT_COOKIE);
1257   }
1258   PetscLogEventBegin(logkey_PtAPReducedPt,A,P,0,0);CHKERRQ(ierr);
1259 
1260   if (scall == MAT_INITIAL_MATRIX){
1261     ierr = MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
1262   }
1263   ierr = MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
1264   ierr = PetscLogEventEnd(logkey_PtAPReducedPt,A,P,0,0);CHKERRQ(ierr);
1265 
1266   PetscFunctionReturn(0);
1267 }
1268 
1269