xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 25616d81f464dead70372e35c2cc7e6db2a8f99c)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatPtAP"
12 /*@
13    MatPtAP - Creates the matrix projection C = P^T * A * P
14 
15    Collective on Mat
16 
17    Input Parameters:
18 +  A - the matrix
19 .  P - the projection matrix
20 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
22 
23    Output Parameters:
24 .  C - the product matrix
25 
26    Notes:
27    C will be created and must be destroyed by the user with MatDestroy().
28 
29    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
30    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
31 
32    Level: intermediate
33 
34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
35 @*/
36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37   PetscErrorCode ierr;
38   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40 
41   PetscFunctionBegin;
42   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
43   PetscValidType(A,1);
44   MatPreallocated(A);
45   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
46   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
47   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
48   PetscValidType(P,2);
49   MatPreallocated(P);
50   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
51   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
52   PetscValidPointer(C,3);
53 
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   /* For now, we do not dispatch based on the type of A and P */
59   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60   fA = A->ops->ptap;
61   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62   fP = P->ops->ptap;
63   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65 
66   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
68   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 EXTERN PetscErrorCode MatGetLocalMat(Mat,MatReuse,IS*,IS*,Mat*);
73 EXTERN PetscErrorCode MatGetBrowsOfAcols(Mat,Mat,MatReuse,IS*,IS*,PetscInt*,Mat*);
74 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*);
75 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*);
76 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat);
77 #undef __FUNCT__
78 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
79 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
80 {
81   PetscErrorCode    ierr;
82   Mat               P_seq,A_loc,C_seq;
83   int               prstart,prend,m=P->m;
84   IS                isrowp,iscolp;
85 
86   PetscFunctionBegin;
87   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
88   ierr = MatGetBrowsOfAcols(A,P,scall,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
89   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
90 
91   /* get A_loc = submatrix of A by taking all local rows of A */
92   ierr = MatGetLocalMat(A,scall,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
93   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
94 
95   /* compute C_seq = P_loc^T * A_loc * P_seq */
96   prend   = prstart + m;
97   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
98 
99   /* add C_seq into mpi C */
100   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr);
101 
102   /* clean up */
103   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
104   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
105   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
111 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
112 {
113 
114   PetscFunctionBegin;
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
120 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
121 {
122 
123   PetscFunctionBegin;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
129 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
130 {
131   PetscErrorCode ierr;
132   PetscFunctionBegin;
133   if (scall == MAT_INITIAL_MATRIX){
134     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
135     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
136     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
137   }
138   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
139   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
140   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
141   PetscFunctionReturn(0);
142 }
143 
144 #undef __FUNCT__
145 #define __FUNCT__ "MatPtAPSymbolic"
146 /*
147    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
148 
149    Collective on Mat
150 
151    Input Parameters:
152 +  A - the matrix
153 -  P - the projection matrix
154 
155    Output Parameters:
156 .  C - the (i,j) structure of the product matrix
157 
158    Notes:
159    C will be created and must be destroyed by the user with MatDestroy().
160 
161    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
162    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
163    this (i,j) structure by calling MatPtAPNumeric().
164 
165    Level: intermediate
166 
167 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
168 */
169 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
170   PetscErrorCode ierr;
171   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
172   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
173 
174   PetscFunctionBegin;
175 
176   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
177   PetscValidType(A,1);
178   MatPreallocated(A);
179   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
180   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
181 
182   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
183   PetscValidType(P,2);
184   MatPreallocated(P);
185   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
186   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
187 
188   PetscValidPointer(C,3);
189 
190   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
191   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
192 
193   /* For now, we do not dispatch based on the type of A and P */
194   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
195   fA = A->ops->ptapsymbolic;
196   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
197   fP = P->ops->ptapsymbolic;
198   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
199   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
200 
201   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
202   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
203   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
204 
205   PetscFunctionReturn(0);
206 }
207 
208 typedef struct {
209   Mat    symAP;
210 } Mat_PtAPstruct;
211 
212 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
216 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
217 {
218   PetscErrorCode    ierr;
219   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
220 
221   PetscFunctionBegin;
222   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
223   ierr = PetscFree(ptap);CHKERRQ(ierr);
224   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 #undef __FUNCT__
229 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
230 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
231   PetscErrorCode ierr;
232   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
233   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
234   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
235   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
236   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
237   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
238   MatScalar      *ca;
239 
240   PetscFunctionBegin;
241   /* Get ij structure of P^T */
242   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
243   ptJ=ptj;
244 
245   /* Allocate ci array, arrays for fill computation and */
246   /* free space for accumulating nonzero column info */
247   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
248   ci[0] = 0;
249 
250   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
251   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
252   ptasparserow = ptadenserow  + an;
253   denserow     = ptasparserow + an;
254   sparserow    = denserow     + pn;
255 
256   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
257   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
258   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
259   current_space = free_space;
260 
261   /* Determine symbolic info for each row of C: */
262   for (i=0;i<pn;i++) {
263     ptnzi  = pti[i+1] - pti[i];
264     ptanzi = 0;
265     /* Determine symbolic row of PtA: */
266     for (j=0;j<ptnzi;j++) {
267       arow = *ptJ++;
268       anzj = ai[arow+1] - ai[arow];
269       ajj  = aj + ai[arow];
270       for (k=0;k<anzj;k++) {
271         if (!ptadenserow[ajj[k]]) {
272           ptadenserow[ajj[k]]    = -1;
273           ptasparserow[ptanzi++] = ajj[k];
274         }
275       }
276     }
277       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
278     ptaj = ptasparserow;
279     cnzi   = 0;
280     for (j=0;j<ptanzi;j++) {
281       prow = *ptaj++;
282       pnzj = pi[prow+1] - pi[prow];
283       pjj  = pj + pi[prow];
284       for (k=0;k<pnzj;k++) {
285         if (!denserow[pjj[k]]) {
286             denserow[pjj[k]]  = -1;
287             sparserow[cnzi++] = pjj[k];
288         }
289       }
290     }
291 
292     /* sort sparserow */
293     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
294 
295     /* If free space is not available, make more free space */
296     /* Double the amount of total space in the list */
297     if (current_space->local_remaining<cnzi) {
298       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
299     }
300 
301     /* Copy data into free space, and zero out denserows */
302     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
303     current_space->array           += cnzi;
304     current_space->local_used      += cnzi;
305     current_space->local_remaining -= cnzi;
306 
307     for (j=0;j<ptanzi;j++) {
308       ptadenserow[ptasparserow[j]] = 0;
309     }
310     for (j=0;j<cnzi;j++) {
311       denserow[sparserow[j]] = 0;
312     }
313       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
314       /*        For now, we will recompute what is needed. */
315     ci[i+1] = ci[i] + cnzi;
316   }
317   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
318   /* Allocate space for cj, initialize cj, and */
319   /* destroy list of free space and other temporary array(s) */
320   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
321   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
322   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
323 
324   /* Allocate space for ca */
325   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
326   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
327 
328   /* put together the new matrix */
329   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
330 
331   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
332   /* Since these are PETSc arrays, change flags to free them as necessary. */
333   c = (Mat_SeqAIJ *)((*C)->data);
334   c->freedata = PETSC_TRUE;
335   c->nonew    = 0;
336 
337   /* Clean up. */
338   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
339 
340   PetscFunctionReturn(0);
341 }
342 
343 #include "src/mat/impls/maij/maij.h"
344 EXTERN_C_BEGIN
345 #undef __FUNCT__
346 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
347 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
348   /* This routine requires testing -- I don't think it works. */
349   PetscErrorCode ierr;
350   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
351   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
352   Mat            P=pp->AIJ;
353   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
354   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
355   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
356   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
357   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
358   MatScalar      *ca;
359 
360   PetscFunctionBegin;
361   /* Start timer */
362   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
363 
364   /* Get ij structure of P^T */
365   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
366 
367   /* Allocate ci array, arrays for fill computation and */
368   /* free space for accumulating nonzero column info */
369   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
370   ci[0] = 0;
371 
372   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
373   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
374   ptasparserow = ptadenserow  + an;
375   denserow     = ptasparserow + an;
376   sparserow    = denserow     + pn;
377 
378   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
379   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
380   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
381   current_space = free_space;
382 
383   /* Determine symbolic info for each row of C: */
384   for (i=0;i<pn/ppdof;i++) {
385     ptnzi  = pti[i+1] - pti[i];
386     ptanzi = 0;
387     ptJ    = ptj + pti[i];
388     for (dof=0;dof<ppdof;dof++) {
389     /* Determine symbolic row of PtA: */
390       for (j=0;j<ptnzi;j++) {
391         arow = ptJ[j] + dof;
392         anzj = ai[arow+1] - ai[arow];
393         ajj  = aj + ai[arow];
394         for (k=0;k<anzj;k++) {
395           if (!ptadenserow[ajj[k]]) {
396             ptadenserow[ajj[k]]    = -1;
397             ptasparserow[ptanzi++] = ajj[k];
398           }
399         }
400       }
401       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
402       ptaj = ptasparserow;
403       cnzi   = 0;
404       for (j=0;j<ptanzi;j++) {
405         pdof = *ptaj%dof;
406         prow = (*ptaj++)/dof;
407         pnzj = pi[prow+1] - pi[prow];
408         pjj  = pj + pi[prow];
409         for (k=0;k<pnzj;k++) {
410           if (!denserow[pjj[k]+pdof]) {
411             denserow[pjj[k]+pdof] = -1;
412             sparserow[cnzi++]     = pjj[k]+pdof;
413           }
414         }
415       }
416 
417       /* sort sparserow */
418       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
419 
420       /* If free space is not available, make more free space */
421       /* Double the amount of total space in the list */
422       if (current_space->local_remaining<cnzi) {
423         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
424       }
425 
426       /* Copy data into free space, and zero out denserows */
427       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
428       current_space->array           += cnzi;
429       current_space->local_used      += cnzi;
430       current_space->local_remaining -= cnzi;
431 
432       for (j=0;j<ptanzi;j++) {
433         ptadenserow[ptasparserow[j]] = 0;
434       }
435       for (j=0;j<cnzi;j++) {
436         denserow[sparserow[j]] = 0;
437       }
438       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
439       /*        For now, we will recompute what is needed. */
440       ci[i+1+dof] = ci[i+dof] + cnzi;
441     }
442   }
443   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
444   /* Allocate space for cj, initialize cj, and */
445   /* destroy list of free space and other temporary array(s) */
446   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
447   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
448   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
449 
450   /* Allocate space for ca */
451   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
452   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
453 
454   /* put together the new matrix */
455   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
456 
457   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
458   /* Since these are PETSc arrays, change flags to free them as necessary. */
459   c = (Mat_SeqAIJ *)((*C)->data);
460   c->freedata = PETSC_TRUE;
461   c->nonew    = 0;
462 
463   /* Clean up. */
464   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
465 
466   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 EXTERN_C_END
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatPtAPNumeric"
473 /*
474    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
475 
476    Collective on Mat
477 
478    Input Parameters:
479 +  A - the matrix
480 -  P - the projection matrix
481 
482    Output Parameters:
483 .  C - the product matrix
484 
485    Notes:
486    C must have been created by calling MatPtAPSymbolic and must be destroyed by
487    the user using MatDeatroy().
488 
489    This routine is currently only implemented for pairs of AIJ matrices and classes
490    which inherit from AIJ.  C will be of type MATAIJ.
491 
492    Level: intermediate
493 
494 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
495 */
496 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
497   PetscErrorCode ierr;
498   PetscErrorCode (*fA)(Mat,Mat,Mat);
499   PetscErrorCode (*fP)(Mat,Mat,Mat);
500 
501   PetscFunctionBegin;
502 
503   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
504   PetscValidType(A,1);
505   MatPreallocated(A);
506   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
507   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
508 
509   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
510   PetscValidType(P,2);
511   MatPreallocated(P);
512   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
513   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
514 
515   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
516   PetscValidType(C,3);
517   MatPreallocated(C);
518   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
519   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
520 
521   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
522   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
523   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
524   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
525 
526   /* For now, we do not dispatch based on the type of A and P */
527   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
528   fA = A->ops->ptapnumeric;
529   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
530   fP = P->ops->ptapnumeric;
531   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
532   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
533 
534   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
535   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
536   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
543 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
544 {
545   PetscErrorCode ierr;
546   int            flops=0;
547   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
548   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
549   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
550   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
551   int            *ci=c->i,*cj=c->j,*cjj;
552   int            am=A->M,cn=C->N,cm=C->M;
553   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
554   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
555 
556   PetscFunctionBegin;
557   /* Allocate temporary array for storage of one row of A*P */
558   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
559   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
560 
561   apj      = (int *)(apa + cn);
562   apjdense = apj + cn;
563 
564   /* Clear old values in C */
565   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
566 
567   for (i=0;i<am;i++) {
568     /* Form sparse row of A*P */
569     anzi  = ai[i+1] - ai[i];
570     apnzj = 0;
571     for (j=0;j<anzi;j++) {
572       prow = *aj++;
573       pnzj = pi[prow+1] - pi[prow];
574       pjj  = pj + pi[prow];
575       paj  = pa + pi[prow];
576       for (k=0;k<pnzj;k++) {
577         if (!apjdense[pjj[k]]) {
578           apjdense[pjj[k]] = -1;
579           apj[apnzj++]     = pjj[k];
580         }
581         apa[pjj[k]] += (*aa)*paj[k];
582       }
583       flops += 2*pnzj;
584       aa++;
585     }
586 
587     /* Sort the j index array for quick sparse axpy. */
588     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
589 
590     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
591     pnzi = pi[i+1] - pi[i];
592     for (j=0;j<pnzi;j++) {
593       nextap = 0;
594       crow   = *pJ++;
595       cjj    = cj + ci[crow];
596       caj    = ca + ci[crow];
597       /* Perform sparse axpy operation.  Note cjj includes apj. */
598       for (k=0;nextap<apnzj;k++) {
599         if (cjj[k]==apj[nextap]) {
600           caj[k] += (*pA)*apa[apj[nextap++]];
601         }
602       }
603       flops += 2*apnzj;
604       pA++;
605     }
606 
607     /* Zero the current row info for A*P */
608     for (j=0;j<apnzj;j++) {
609       apa[apj[j]]      = 0.;
610       apjdense[apj[j]] = 0;
611     }
612   }
613 
614   /* Assemble the final matrix and clean up */
615   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
616   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617   ierr = PetscFree(apa);CHKERRQ(ierr);
618   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
619 
620   PetscFunctionReturn(0);
621 }
622 
623 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
627 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C)
628 {
629   PetscErrorCode ierr;
630   int            flops=0;
631   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
632   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
633   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
634   int            *ai=a->i,*aj=a->j,*apj,*apjdense;
635   int            *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
636   int            *ci=c->i,*cj=c->j,*cjj;
637   int            am=A->M,cn=C->N,cm=C->M;
638   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
639   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
640 
641   PetscFunctionBegin;
642   pA=p->a+pi[prstart];
643   /* Allocate temporary array for storage of one row of A*P */
644   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
645   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
646 
647   apj      = (int *)(apa + cn);
648   apjdense = apj + cn;
649 
650   /* Clear old values in C */
651   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
652 
653   for (i=0;i<am;i++) {
654     /* Form sparse row of A*P */
655     anzi  = ai[i+1] - ai[i];
656     apnzj = 0;
657     for (j=0;j<anzi;j++) {
658       prow = *aj++;
659       pnzj = pi[prow+1] - pi[prow];
660       pjj  = pj + pi[prow];
661       paj  = pa + pi[prow];
662       for (k=0;k<pnzj;k++) {
663         if (!apjdense[pjj[k]]) {
664           apjdense[pjj[k]] = -1;
665           apj[apnzj++]     = pjj[k];
666         }
667         apa[pjj[k]] += (*aa)*paj[k];
668       }
669       flops += 2*pnzj;
670       aa++;
671     }
672 
673     /* Sort the j index array for quick sparse axpy. */
674     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
675 
676     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
677     pnzi = pi[i+1+prstart] - pi[i+prstart];
678     for (j=0;j<pnzi;j++) {
679       nextap = 0;
680       crow   = *pJ++;
681       cjj    = cj + ci[crow];
682       caj    = ca + ci[crow];
683       /* Perform sparse axpy operation.  Note cjj includes apj. */
684       for (k=0;nextap<apnzj;k++) {
685         if (cjj[k]==apj[nextap]) {
686           caj[k] += (*pA)*apa[apj[nextap++]];
687         }
688       }
689       flops += 2*apnzj;
690       pA++;
691     }
692 
693     /* Zero the current row info for A*P */
694     for (j=0;j<apnzj;j++) {
695       apa[apj[j]]      = 0.;
696       apjdense[apj[j]] = 0;
697     }
698   }
699 
700   /* Assemble the final matrix and clean up */
701   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
702   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
703   ierr = PetscFree(apa);CHKERRQ(ierr);
704   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
705 
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
711 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) {
712   PetscErrorCode ierr;
713   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
714   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
715   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
716   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
717   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
718   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
719   MatScalar      *ca;
720   Mat *psub,P_sub;
721   IS  isrow,iscol;
722   int m = prend - prstart;
723 
724   PetscFunctionBegin;
725   /* Get ij structure of P[rstart:rend,:]^T */
726   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
727   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
728   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
729   ierr = ISDestroy(isrow);CHKERRQ(ierr);
730   ierr = ISDestroy(iscol);CHKERRQ(ierr);
731   P_sub = psub[0];
732   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
733   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
734   ptJ=ptj;
735 
736   /* Allocate ci array, arrays for fill computation and */
737   /* free space for accumulating nonzero column info */
738   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
739   ci[0] = 0;
740 
741   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
742   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
743   ptasparserow = ptadenserow  + an;
744   denserow     = ptasparserow + an;
745   sparserow    = denserow     + pn;
746 
747   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
748   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
749   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
750   current_space = free_space;
751 
752   /* Determine symbolic info for each row of C: */
753   for (i=0;i<pn;i++) {
754     ptnzi  = pti[i+1] - pti[i];
755     ptanzi = 0;
756     /* Determine symbolic row of PtA_reduced: */
757     for (j=0;j<ptnzi;j++) {
758       arow = *ptJ++;
759       anzj = ai[arow+1] - ai[arow];
760       ajj  = aj + ai[arow];
761       for (k=0;k<anzj;k++) {
762         if (!ptadenserow[ajj[k]]) {
763           ptadenserow[ajj[k]]    = -1;
764           ptasparserow[ptanzi++] = ajj[k];
765         }
766       }
767     }
768       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
769     ptaj = ptasparserow;
770     cnzi   = 0;
771     for (j=0;j<ptanzi;j++) {
772       prow = *ptaj++;
773       pnzj = pi[prow+1] - pi[prow];
774       pjj  = pj + pi[prow];
775       for (k=0;k<pnzj;k++) {
776         if (!denserow[pjj[k]]) {
777             denserow[pjj[k]]  = -1;
778             sparserow[cnzi++] = pjj[k];
779         }
780       }
781     }
782 
783     /* sort sparserow */
784     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
785 
786     /* If free space is not available, make more free space */
787     /* Double the amount of total space in the list */
788     if (current_space->local_remaining<cnzi) {
789       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
790     }
791 
792     /* Copy data into free space, and zero out denserows */
793     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
794     current_space->array           += cnzi;
795     current_space->local_used      += cnzi;
796     current_space->local_remaining -= cnzi;
797 
798     for (j=0;j<ptanzi;j++) {
799       ptadenserow[ptasparserow[j]] = 0;
800     }
801     for (j=0;j<cnzi;j++) {
802       denserow[sparserow[j]] = 0;
803     }
804       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
805       /*        For now, we will recompute what is needed. */
806     ci[i+1] = ci[i] + cnzi;
807   }
808   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
809   /* Allocate space for cj, initialize cj, and */
810   /* destroy list of free space and other temporary array(s) */
811   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
812   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
813   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
814 
815   /* Allocate space for ca */
816   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
817   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
818 
819   /* put together the new matrix */
820   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
821 
822   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
823   /* Since these are PETSc arrays, change flags to free them as necessary. */
824   c = (Mat_SeqAIJ *)((*C)->data);
825   c->freedata = PETSC_TRUE;
826   c->nonew    = 0;
827 
828   /* Clean up. */
829   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
830 
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
836 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C)
837 {
838   PetscErrorCode ierr;
839   PetscFunctionBegin;
840   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
841   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
842   if (scall == MAT_INITIAL_MATRIX){
843     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
844   }
845 
846   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
847 
848   PetscFunctionReturn(0);
849 }
850 
851