xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 737d121a2b57b182d532aa04ba3c4e4d7f0dd66e)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatPtAP"
12 /*@
13    MatPtAP - Creates the matrix projection C = P^T * A * P
14 
15    Collective on Mat
16 
17    Input Parameters:
18 +  A - the matrix
19 .  P - the projection matrix
20 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
22 
23    Output Parameters:
24 .  C - the product matrix
25 
26    Notes:
27    C will be created and must be destroyed by the user with MatDestroy().
28 
29    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
30    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
31 
32    Level: intermediate
33 
34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
35 @*/
36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37   PetscErrorCode ierr;
38   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40 
41   PetscFunctionBegin;
42   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
43   PetscValidType(A,1);
44   MatPreallocated(A);
45   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
46   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
47   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
48   PetscValidType(P,2);
49   MatPreallocated(P);
50   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
51   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
52   PetscValidPointer(C,3);
53 
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   /* For now, we do not dispatch based on the type of A and P */
59   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60   fA = A->ops->ptap;
61   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62   fP = P->ops->ptap;
63   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65 
66   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
68   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*);
73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*);
74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat);
75 #undef __FUNCT__
76 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
77 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
78 {
79   PetscErrorCode    ierr;
80   Mat               AP,P_seq,A_loc,C_seq;
81   Mat_MatMatMultMPI *mult;
82   int               prstart,prend,m=P->m;
83   int               rank,prid=10;
84 
85   PetscFunctionBegin;
86   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
87 
88   /* compute symbolic and numeric AP = A*P */
89   ierr = MatMatMult_MPIAIJ_MPIAIJ(A,P,scall,fill,&AP);CHKERRQ(ierr);
90   mult = (Mat_MatMatMultMPI*)AP->spptr;
91   P_seq = mult->bseq[0]; /* = submatrix of P by taking rows of P that equal to nonzero col of A */
92 
93   if (rank == prid){
94     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_loc: %d, %d\n",rank,A_loc->m,A_loc->n);CHKERRQ(ierr);
95     ierr = MatView(A_loc,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
96     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] P_seq: %d, %d\n",rank,P_seq->m,P_seq->n);CHKERRQ(ierr);
97     ierr = MatView(P_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
98     ierr = PetscPrintf(PETSC_COMM_SELF," [%d]  AP_seq: %d, %d\n",rank,AP->m,AP->n);
99     ierr = MatView(AP,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
100   }
101 
102   prstart = mult->brstart;
103   prend   = prstart + m;
104 
105   A_loc = mult->aseq[0]; /* = submatrix of A by taking all local rows of A */
106   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
107 
108   /* add C_seq into C */
109   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr);
110 
111   /* clean up */
112   ierr = MatDestroy(AP);CHKERRQ(ierr);
113   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
114 
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
120 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
121 {
122 
123   PetscFunctionBegin;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
129 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
130 {
131 
132   PetscFunctionBegin;
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
138 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
139 {
140   PetscErrorCode ierr;
141   PetscFunctionBegin;
142   if (scall == MAT_INITIAL_MATRIX){
143     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
144     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
145     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
146   }
147   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
148   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
149   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "MatPtAPSymbolic"
155 /*
156    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
157 
158    Collective on Mat
159 
160    Input Parameters:
161 +  A - the matrix
162 -  P - the projection matrix
163 
164    Output Parameters:
165 .  C - the (i,j) structure of the product matrix
166 
167    Notes:
168    C will be created and must be destroyed by the user with MatDestroy().
169 
170    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
171    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
172    this (i,j) structure by calling MatPtAPNumeric().
173 
174    Level: intermediate
175 
176 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
177 */
178 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
179   PetscErrorCode ierr;
180   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
181   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
182 
183   PetscFunctionBegin;
184 
185   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
186   PetscValidType(A,1);
187   MatPreallocated(A);
188   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
189   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
190 
191   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
192   PetscValidType(P,2);
193   MatPreallocated(P);
194   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
195   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
196 
197   PetscValidPointer(C,3);
198 
199   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
200   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
201 
202   /* For now, we do not dispatch based on the type of A and P */
203   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
204   fA = A->ops->ptapsymbolic;
205   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
206   fP = P->ops->ptapsymbolic;
207   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
208   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
209 
210   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
211   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
212   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
213 
214   PetscFunctionReturn(0);
215 }
216 
217 typedef struct {
218   Mat    symAP;
219 } Mat_PtAPstruct;
220 
221 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
222 
223 #undef __FUNCT__
224 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
225 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
226 {
227   PetscErrorCode    ierr;
228   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
229 
230   PetscFunctionBegin;
231   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
232   ierr = PetscFree(ptap);CHKERRQ(ierr);
233   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
239 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
240   PetscErrorCode ierr;
241   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
242   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
243   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
244   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
245   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
246   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
247   MatScalar      *ca;
248 
249   PetscFunctionBegin;
250   /* Get ij structure of P^T */
251   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
252   ptJ=ptj;
253 
254   /* Allocate ci array, arrays for fill computation and */
255   /* free space for accumulating nonzero column info */
256   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
257   ci[0] = 0;
258 
259   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
260   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
261   ptasparserow = ptadenserow  + an;
262   denserow     = ptasparserow + an;
263   sparserow    = denserow     + pn;
264 
265   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
266   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
267   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
268   current_space = free_space;
269 
270   /* Determine symbolic info for each row of C: */
271   for (i=0;i<pn;i++) {
272     ptnzi  = pti[i+1] - pti[i];
273     ptanzi = 0;
274     /* Determine symbolic row of PtA: */
275     for (j=0;j<ptnzi;j++) {
276       arow = *ptJ++;
277       anzj = ai[arow+1] - ai[arow];
278       ajj  = aj + ai[arow];
279       for (k=0;k<anzj;k++) {
280         if (!ptadenserow[ajj[k]]) {
281           ptadenserow[ajj[k]]    = -1;
282           ptasparserow[ptanzi++] = ajj[k];
283         }
284       }
285     }
286       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
287     ptaj = ptasparserow;
288     cnzi   = 0;
289     for (j=0;j<ptanzi;j++) {
290       prow = *ptaj++;
291       pnzj = pi[prow+1] - pi[prow];
292       pjj  = pj + pi[prow];
293       for (k=0;k<pnzj;k++) {
294         if (!denserow[pjj[k]]) {
295             denserow[pjj[k]]  = -1;
296             sparserow[cnzi++] = pjj[k];
297         }
298       }
299     }
300 
301     /* sort sparserow */
302     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
303 
304     /* If free space is not available, make more free space */
305     /* Double the amount of total space in the list */
306     if (current_space->local_remaining<cnzi) {
307       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
308     }
309 
310     /* Copy data into free space, and zero out denserows */
311     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
312     current_space->array           += cnzi;
313     current_space->local_used      += cnzi;
314     current_space->local_remaining -= cnzi;
315 
316     for (j=0;j<ptanzi;j++) {
317       ptadenserow[ptasparserow[j]] = 0;
318     }
319     for (j=0;j<cnzi;j++) {
320       denserow[sparserow[j]] = 0;
321     }
322       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
323       /*        For now, we will recompute what is needed. */
324     ci[i+1] = ci[i] + cnzi;
325   }
326   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
327   /* Allocate space for cj, initialize cj, and */
328   /* destroy list of free space and other temporary array(s) */
329   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
330   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
331   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
332 
333   /* Allocate space for ca */
334   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
335   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
336 
337   /* put together the new matrix */
338   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
339 
340   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
341   /* Since these are PETSc arrays, change flags to free them as necessary. */
342   c = (Mat_SeqAIJ *)((*C)->data);
343   c->freedata = PETSC_TRUE;
344   c->nonew    = 0;
345 
346   /* Clean up. */
347   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #include "src/mat/impls/maij/maij.h"
353 EXTERN_C_BEGIN
354 #undef __FUNCT__
355 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
356 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
357   /* This routine requires testing -- I don't think it works. */
358   PetscErrorCode ierr;
359   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
360   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
361   Mat            P=pp->AIJ;
362   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
363   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
364   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
365   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
366   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
367   MatScalar      *ca;
368 
369   PetscFunctionBegin;
370   /* Start timer */
371   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
372 
373   /* Get ij structure of P^T */
374   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
375 
376   /* Allocate ci array, arrays for fill computation and */
377   /* free space for accumulating nonzero column info */
378   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
379   ci[0] = 0;
380 
381   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
382   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
383   ptasparserow = ptadenserow  + an;
384   denserow     = ptasparserow + an;
385   sparserow    = denserow     + pn;
386 
387   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
388   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
389   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
390   current_space = free_space;
391 
392   /* Determine symbolic info for each row of C: */
393   for (i=0;i<pn/ppdof;i++) {
394     ptnzi  = pti[i+1] - pti[i];
395     ptanzi = 0;
396     ptJ    = ptj + pti[i];
397     for (dof=0;dof<ppdof;dof++) {
398     /* Determine symbolic row of PtA: */
399       for (j=0;j<ptnzi;j++) {
400         arow = ptJ[j] + dof;
401         anzj = ai[arow+1] - ai[arow];
402         ajj  = aj + ai[arow];
403         for (k=0;k<anzj;k++) {
404           if (!ptadenserow[ajj[k]]) {
405             ptadenserow[ajj[k]]    = -1;
406             ptasparserow[ptanzi++] = ajj[k];
407           }
408         }
409       }
410       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
411       ptaj = ptasparserow;
412       cnzi   = 0;
413       for (j=0;j<ptanzi;j++) {
414         pdof = *ptaj%dof;
415         prow = (*ptaj++)/dof;
416         pnzj = pi[prow+1] - pi[prow];
417         pjj  = pj + pi[prow];
418         for (k=0;k<pnzj;k++) {
419           if (!denserow[pjj[k]+pdof]) {
420             denserow[pjj[k]+pdof] = -1;
421             sparserow[cnzi++]     = pjj[k]+pdof;
422           }
423         }
424       }
425 
426       /* sort sparserow */
427       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
428 
429       /* If free space is not available, make more free space */
430       /* Double the amount of total space in the list */
431       if (current_space->local_remaining<cnzi) {
432         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
433       }
434 
435       /* Copy data into free space, and zero out denserows */
436       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
437       current_space->array           += cnzi;
438       current_space->local_used      += cnzi;
439       current_space->local_remaining -= cnzi;
440 
441       for (j=0;j<ptanzi;j++) {
442         ptadenserow[ptasparserow[j]] = 0;
443       }
444       for (j=0;j<cnzi;j++) {
445         denserow[sparserow[j]] = 0;
446       }
447       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
448       /*        For now, we will recompute what is needed. */
449       ci[i+1+dof] = ci[i+dof] + cnzi;
450     }
451   }
452   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
453   /* Allocate space for cj, initialize cj, and */
454   /* destroy list of free space and other temporary array(s) */
455   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
456   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
457   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
458 
459   /* Allocate space for ca */
460   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
461   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
462 
463   /* put together the new matrix */
464   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
465 
466   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
467   /* Since these are PETSc arrays, change flags to free them as necessary. */
468   c = (Mat_SeqAIJ *)((*C)->data);
469   c->freedata = PETSC_TRUE;
470   c->nonew    = 0;
471 
472   /* Clean up. */
473   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
474 
475   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 EXTERN_C_END
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatPtAPNumeric"
482 /*
483    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
484 
485    Collective on Mat
486 
487    Input Parameters:
488 +  A - the matrix
489 -  P - the projection matrix
490 
491    Output Parameters:
492 .  C - the product matrix
493 
494    Notes:
495    C must have been created by calling MatPtAPSymbolic and must be destroyed by
496    the user using MatDeatroy().
497 
498    This routine is currently only implemented for pairs of AIJ matrices and classes
499    which inherit from AIJ.  C will be of type MATAIJ.
500 
501    Level: intermediate
502 
503 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
504 */
505 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
506   PetscErrorCode ierr;
507   PetscErrorCode (*fA)(Mat,Mat,Mat);
508   PetscErrorCode (*fP)(Mat,Mat,Mat);
509 
510   PetscFunctionBegin;
511 
512   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
513   PetscValidType(A,1);
514   MatPreallocated(A);
515   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
516   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
517 
518   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
519   PetscValidType(P,2);
520   MatPreallocated(P);
521   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
522   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
523 
524   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
525   PetscValidType(C,3);
526   MatPreallocated(C);
527   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
528   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
529 
530   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
531   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
532   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
533   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
534 
535   /* For now, we do not dispatch based on the type of A and P */
536   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
537   fA = A->ops->ptapnumeric;
538   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
539   fP = P->ops->ptapnumeric;
540   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
541   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
542 
543   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
544   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
545   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
546 
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
552 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
553 {
554   PetscErrorCode ierr;
555   int            flops=0;
556   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
557   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
558   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
559   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
560   int            *ci=c->i,*cj=c->j,*cjj;
561   int            am=A->M,cn=C->N,cm=C->M;
562   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
563   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
564 
565   PetscFunctionBegin;
566   /* Allocate temporary array for storage of one row of A*P */
567   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
568   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
569 
570   apj      = (int *)(apa + cn);
571   apjdense = apj + cn;
572 
573   /* Clear old values in C */
574   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
575 
576   for (i=0;i<am;i++) {
577     /* Form sparse row of A*P */
578     anzi  = ai[i+1] - ai[i];
579     apnzj = 0;
580     for (j=0;j<anzi;j++) {
581       prow = *aj++;
582       pnzj = pi[prow+1] - pi[prow];
583       pjj  = pj + pi[prow];
584       paj  = pa + pi[prow];
585       for (k=0;k<pnzj;k++) {
586         if (!apjdense[pjj[k]]) {
587           apjdense[pjj[k]] = -1;
588           apj[apnzj++]     = pjj[k];
589         }
590         apa[pjj[k]] += (*aa)*paj[k];
591       }
592       flops += 2*pnzj;
593       aa++;
594     }
595 
596     /* Sort the j index array for quick sparse axpy. */
597     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
598 
599     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
600     pnzi = pi[i+1] - pi[i];
601     for (j=0;j<pnzi;j++) {
602       nextap = 0;
603       crow   = *pJ++;
604       cjj    = cj + ci[crow];
605       caj    = ca + ci[crow];
606       /* Perform sparse axpy operation.  Note cjj includes apj. */
607       for (k=0;nextap<apnzj;k++) {
608         if (cjj[k]==apj[nextap]) {
609           caj[k] += (*pA)*apa[apj[nextap++]];
610         }
611       }
612       flops += 2*apnzj;
613       pA++;
614     }
615 
616     /* Zero the current row info for A*P */
617     for (j=0;j<apnzj;j++) {
618       apa[apj[j]]      = 0.;
619       apjdense[apj[j]] = 0;
620     }
621   }
622 
623   /* Assemble the final matrix and clean up */
624   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
625   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
626   ierr = PetscFree(apa);CHKERRQ(ierr);
627   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
628 
629   PetscFunctionReturn(0);
630 }
631 
632 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
636 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C)
637 {
638   PetscErrorCode ierr;
639   int            flops=0;
640   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
641   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
642   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
643   int            *ai=a->i,*aj=a->j,*apj,*apjdense;
644   int            *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
645   int            *ci=c->i,*cj=c->j,*cjj;
646   int            am=A->M,cn=C->N,cm=C->M;
647   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
648   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
649 
650   PetscFunctionBegin;
651   pA=p->a+pi[prstart];
652   /* Allocate temporary array for storage of one row of A*P */
653   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
654   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
655 
656   apj      = (int *)(apa + cn);
657   apjdense = apj + cn;
658 
659   /* Clear old values in C */
660   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
661 
662   for (i=0;i<am;i++) {
663     /* Form sparse row of A*P */
664     anzi  = ai[i+1] - ai[i];
665     apnzj = 0;
666     for (j=0;j<anzi;j++) {
667       prow = *aj++;
668       pnzj = pi[prow+1] - pi[prow];
669       pjj  = pj + pi[prow];
670       paj  = pa + pi[prow];
671       for (k=0;k<pnzj;k++) {
672         if (!apjdense[pjj[k]]) {
673           apjdense[pjj[k]] = -1;
674           apj[apnzj++]     = pjj[k];
675         }
676         apa[pjj[k]] += (*aa)*paj[k];
677       }
678       flops += 2*pnzj;
679       aa++;
680     }
681 
682     /* Sort the j index array for quick sparse axpy. */
683     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
684 
685     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
686     pnzi = pi[i+1+prstart] - pi[i+prstart];
687     for (j=0;j<pnzi;j++) {
688       nextap = 0;
689       crow   = *pJ++;
690       cjj    = cj + ci[crow];
691       caj    = ca + ci[crow];
692       /* Perform sparse axpy operation.  Note cjj includes apj. */
693       for (k=0;nextap<apnzj;k++) {
694         if (cjj[k]==apj[nextap]) {
695           caj[k] += (*pA)*apa[apj[nextap++]];
696         }
697       }
698       flops += 2*apnzj;
699       pA++;
700     }
701 
702     /* Zero the current row info for A*P */
703     for (j=0;j<apnzj;j++) {
704       apa[apj[j]]      = 0.;
705       apjdense[apj[j]] = 0;
706     }
707   }
708 
709   /* Assemble the final matrix and clean up */
710   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
711   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
712   ierr = PetscFree(apa);CHKERRQ(ierr);
713   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
714 
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
720 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) {
721   PetscErrorCode ierr;
722   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
723   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
724   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
725   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
726   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
727   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
728   MatScalar      *ca;
729   Mat *psub,P_sub;
730   IS  isrow,iscol;
731   int m = prend - prstart;
732 
733   PetscFunctionBegin;
734   /* Get ij structure of P[rstart:rend,:]^T */
735   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
736   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
737   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
738   ierr = ISDestroy(isrow);CHKERRQ(ierr);
739   ierr = ISDestroy(iscol);CHKERRQ(ierr);
740   P_sub = psub[0];
741   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
742   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
743   ptJ=ptj;
744 
745   /* Allocate ci array, arrays for fill computation and */
746   /* free space for accumulating nonzero column info */
747   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
748   ci[0] = 0;
749 
750   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
751   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
752   ptasparserow = ptadenserow  + an;
753   denserow     = ptasparserow + an;
754   sparserow    = denserow     + pn;
755 
756   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
757   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
758   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
759   current_space = free_space;
760 
761   /* Determine symbolic info for each row of C: */
762   for (i=0;i<pn;i++) {
763     ptnzi  = pti[i+1] - pti[i];
764     ptanzi = 0;
765     /* Determine symbolic row of PtA_reduced: */
766     for (j=0;j<ptnzi;j++) {
767       arow = *ptJ++;
768       anzj = ai[arow+1] - ai[arow];
769       ajj  = aj + ai[arow];
770       for (k=0;k<anzj;k++) {
771         if (!ptadenserow[ajj[k]]) {
772           ptadenserow[ajj[k]]    = -1;
773           ptasparserow[ptanzi++] = ajj[k];
774         }
775       }
776     }
777       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
778     ptaj = ptasparserow;
779     cnzi   = 0;
780     for (j=0;j<ptanzi;j++) {
781       prow = *ptaj++;
782       pnzj = pi[prow+1] - pi[prow];
783       pjj  = pj + pi[prow];
784       for (k=0;k<pnzj;k++) {
785         if (!denserow[pjj[k]]) {
786             denserow[pjj[k]]  = -1;
787             sparserow[cnzi++] = pjj[k];
788         }
789       }
790     }
791 
792     /* sort sparserow */
793     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
794 
795     /* If free space is not available, make more free space */
796     /* Double the amount of total space in the list */
797     if (current_space->local_remaining<cnzi) {
798       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
799     }
800 
801     /* Copy data into free space, and zero out denserows */
802     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
803     current_space->array           += cnzi;
804     current_space->local_used      += cnzi;
805     current_space->local_remaining -= cnzi;
806 
807     for (j=0;j<ptanzi;j++) {
808       ptadenserow[ptasparserow[j]] = 0;
809     }
810     for (j=0;j<cnzi;j++) {
811       denserow[sparserow[j]] = 0;
812     }
813       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
814       /*        For now, we will recompute what is needed. */
815     ci[i+1] = ci[i] + cnzi;
816   }
817   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
818   /* Allocate space for cj, initialize cj, and */
819   /* destroy list of free space and other temporary array(s) */
820   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
821   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
822   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
823 
824   /* Allocate space for ca */
825   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
826   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
827 
828   /* put together the new matrix */
829   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
830 
831   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
832   /* Since these are PETSc arrays, change flags to free them as necessary. */
833   c = (Mat_SeqAIJ *)((*C)->data);
834   c->freedata = PETSC_TRUE;
835   c->nonew    = 0;
836 
837   /* Clean up. */
838   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
839 
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
845 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C)
846 {
847   PetscErrorCode ierr;
848   PetscFunctionBegin;
849   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
850   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
851   if (scall == MAT_INITIAL_MATRIX){
852     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
853   }
854 
855   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
856 
857   PetscFunctionReturn(0);
858 }
859 
860