xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 4d3841fd6aab5bbfbc7d95cf7b2a91f823f98fbd)
1eb9c0419SKris Buschelman /*
2eb9c0419SKris Buschelman   Defines projective product routines where A is a SeqAIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6eb9c0419SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
8eb9c0419SKris Buschelman 
97e44a18eSKris Buschelman EXTERN int MatSeqAIJPtAP(Mat,Mat,Mat*);
107e44a18eSKris Buschelman EXTERN int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
117e44a18eSKris Buschelman EXTERN int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
127e44a18eSKris Buschelman EXTERN int RegisterMatMatMultRoutines_Private(Mat);
13eb9c0419SKris Buschelman 
14eb9c0419SKris Buschelman static int MATSeqAIJ_PtAP         = 0;
15eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPSymbolic = 0;
16eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPNumeric  = 0;
17eb9c0419SKris Buschelman 
18eb9c0419SKris Buschelman #undef __FUNCT__
19eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAP"
20*4d3841fdSKris Buschelman /*@
21*4d3841fdSKris Buschelman    MatSeqAIJPtAP - Creates the matrix projection C = P^T * A * P
22*4d3841fdSKris Buschelman 
23*4d3841fdSKris Buschelman    Collective on Mat
24*4d3841fdSKris Buschelman 
25*4d3841fdSKris Buschelman    Input Parameters:
26*4d3841fdSKris Buschelman +  A - the matrix
27*4d3841fdSKris Buschelman -  P - the projection matrix
28*4d3841fdSKris Buschelman 
29*4d3841fdSKris Buschelman    Output Parameters:
30*4d3841fdSKris Buschelman .  C - the product matrix
31*4d3841fdSKris Buschelman 
32*4d3841fdSKris Buschelman    Notes:
33*4d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
34*4d3841fdSKris Buschelman 
35*4d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
36*4d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
37*4d3841fdSKris Buschelman 
38*4d3841fdSKris Buschelman    Level: intermediate
39*4d3841fdSKris Buschelman 
40*4d3841fdSKris Buschelman .seealso: MatSeqAIJPtAPSymbolic(),MatSeqAIJPtAPNumeric(),MatMatMult()
41*4d3841fdSKris Buschelman @*/
42eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
43eb9c0419SKris Buschelman   int ierr;
44eb9c0419SKris Buschelman   char funct[80];
45*4d3841fdSKris Buschelman   int (*f)(Mat,Mat,Mat);
46eb9c0419SKris Buschelman 
47eb9c0419SKris Buschelman   PetscFunctionBegin;
48eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
49eb9c0419SKris Buschelman 
50eb9c0419SKris Buschelman   ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr);
51eb9c0419SKris Buschelman 
52eb9c0419SKris Buschelman   /* Avoid additional error checking included in */
53eb9c0419SKris Buschelman /*   ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */
54eb9c0419SKris Buschelman 
55*4d3841fdSKris Buschelman   /* Currently only _seqaij_seqaij is implemented, so just query for it in A and P. */
56*4d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
57*4d3841fdSKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
58*4d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
59*4d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name);
60*4d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
61*4d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name);
62*4d3841fdSKris Buschelman 
63*4d3841fdSKris Buschelman   ierr = (*f)(A,P,*C);CHKERRQ(ierr);
64eb9c0419SKris Buschelman 
65eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
66eb9c0419SKris Buschelman   PetscFunctionReturn(0);
67eb9c0419SKris Buschelman }
68eb9c0419SKris Buschelman 
69eb9c0419SKris Buschelman #undef __FUNCT__
70eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPSymbolic"
71*4d3841fdSKris Buschelman /*@
72*4d3841fdSKris Buschelman    MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
73*4d3841fdSKris Buschelman 
74*4d3841fdSKris Buschelman    Collective on Mat
75*4d3841fdSKris Buschelman 
76*4d3841fdSKris Buschelman    Input Parameters:
77*4d3841fdSKris Buschelman +  A - the matrix
78*4d3841fdSKris Buschelman -  P - the projection matrix
79*4d3841fdSKris Buschelman 
80*4d3841fdSKris Buschelman    Output Parameters:
81*4d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
82*4d3841fdSKris Buschelman 
83*4d3841fdSKris Buschelman    Notes:
84*4d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
85*4d3841fdSKris Buschelman 
86*4d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
87*4d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
88*4d3841fdSKris Buschelman    this (i,j) structure by calling MatSeqAIJPtAPNumeric().
89*4d3841fdSKris Buschelman 
90*4d3841fdSKris Buschelman    Level: intermediate
91*4d3841fdSKris Buschelman 
92*4d3841fdSKris Buschelman .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPNumeric(),MatMatMultSymbolic()
93*4d3841fdSKris Buschelman @*/
94eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
95eb9c0419SKris Buschelman   int ierr;
96eb9c0419SKris Buschelman   char funct[80];
97*4d3841fdSKris Buschelman   int (*f)(Mat,Mat,Mat);
98eb9c0419SKris Buschelman 
99eb9c0419SKris Buschelman   PetscFunctionBegin;
100eb9c0419SKris Buschelman 
1014482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
102c9780b6fSBarry Smith   PetscValidType(A,1);
103eb9c0419SKris Buschelman   MatPreallocated(A);
104eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
105eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
106eb9c0419SKris Buschelman 
1074482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
108c9780b6fSBarry Smith   PetscValidType(P,2);
109eb9c0419SKris Buschelman   MatPreallocated(P);
110eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
111eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
112eb9c0419SKris Buschelman 
1134482741eSBarry Smith   PetscValidPointer(C,3);
1144482741eSBarry Smith 
115eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
116eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
117eb9c0419SKris Buschelman 
118*4d3841fdSKris Buschelman   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
119*4d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
120*4d3841fdSKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr);
121*4d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
122*4d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPSymbolic is not supported for P of type %s",P->type_name);
123*4d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
124*4d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPSymbolic is not supported for A of type %s",A->type_name);
125*4d3841fdSKris Buschelman 
126*4d3841fdSKris Buschelman   ierr = (*f)(A,P,*C);CHKERRQ(ierr);
127eb9c0419SKris Buschelman 
128eb9c0419SKris Buschelman   PetscFunctionReturn(0);
129eb9c0419SKris Buschelman }
130eb9c0419SKris Buschelman 
131eb9c0419SKris Buschelman EXTERN_C_BEGIN
132eb9c0419SKris Buschelman #undef __FUNCT__
133eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ"
134eb9c0419SKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
135eb9c0419SKris Buschelman   int            ierr;
136eb9c0419SKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
137eb9c0419SKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
138eb9c0419SKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
139eb9c0419SKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
140eb9c0419SKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
141eb9c0419SKris Buschelman   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
142eb9c0419SKris Buschelman   MatScalar      *ca;
143eb9c0419SKris Buschelman 
144eb9c0419SKris Buschelman   PetscFunctionBegin;
145eb9c0419SKris Buschelman 
146eb9c0419SKris Buschelman   /* Start timer */
147eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
148eb9c0419SKris Buschelman 
149eb9c0419SKris Buschelman   /* Get ij structure of P^T */
150eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
151eb9c0419SKris Buschelman   ptJ=ptj;
152eb9c0419SKris Buschelman 
153eb9c0419SKris Buschelman   /* Allocate ci array, arrays for fill computation and */
154eb9c0419SKris Buschelman   /* free space for accumulating nonzero column info */
1553985e5eaSKris Buschelman   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
156eb9c0419SKris Buschelman   ci[0] = 0;
157eb9c0419SKris Buschelman 
158eb9c0419SKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
159eb9c0419SKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
160eb9c0419SKris Buschelman   ptasparserow = ptadenserow  + an;
161eb9c0419SKris Buschelman   denserow     = ptasparserow + an;
162eb9c0419SKris Buschelman   sparserow    = denserow     + pn;
163eb9c0419SKris Buschelman 
164eb9c0419SKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
165eb9c0419SKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
166eb9c0419SKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
167eb9c0419SKris Buschelman   current_space = free_space;
168eb9c0419SKris Buschelman 
169eb9c0419SKris Buschelman   /* Determine symbolic info for each row of C: */
170eb9c0419SKris Buschelman   for (i=0;i<pn;i++) {
171eb9c0419SKris Buschelman     ptnzi  = pti[i+1] - pti[i];
172eb9c0419SKris Buschelman     ptanzi = 0;
173eb9c0419SKris Buschelman     /* Determine symbolic row of PtA: */
174eb9c0419SKris Buschelman     for (j=0;j<ptnzi;j++) {
175eb9c0419SKris Buschelman       arow = *ptJ++;
176eb9c0419SKris Buschelman       anzj = ai[arow+1] - ai[arow];
177eb9c0419SKris Buschelman       ajj  = aj + ai[arow];
178eb9c0419SKris Buschelman       for (k=0;k<anzj;k++) {
179eb9c0419SKris Buschelman         if (!ptadenserow[ajj[k]]) {
180eb9c0419SKris Buschelman           ptadenserow[ajj[k]]    = -1;
181eb9c0419SKris Buschelman           ptasparserow[ptanzi++] = ajj[k];
182eb9c0419SKris Buschelman         }
183eb9c0419SKris Buschelman       }
184eb9c0419SKris Buschelman     }
185eb9c0419SKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
186eb9c0419SKris Buschelman     ptaj = ptasparserow;
187eb9c0419SKris Buschelman     cnzi   = 0;
188eb9c0419SKris Buschelman     for (j=0;j<ptanzi;j++) {
189eb9c0419SKris Buschelman       prow = *ptaj++;
190eb9c0419SKris Buschelman       pnzj = pi[prow+1] - pi[prow];
191eb9c0419SKris Buschelman       pjj  = pj + pi[prow];
192eb9c0419SKris Buschelman       for (k=0;k<pnzj;k++) {
193eb9c0419SKris Buschelman         if (!denserow[pjj[k]]) {
194eb9c0419SKris Buschelman             denserow[pjj[k]]  = -1;
195eb9c0419SKris Buschelman             sparserow[cnzi++] = pjj[k];
196eb9c0419SKris Buschelman         }
197eb9c0419SKris Buschelman       }
198eb9c0419SKris Buschelman     }
199eb9c0419SKris Buschelman 
200eb9c0419SKris Buschelman     /* sort sparserow */
201eb9c0419SKris Buschelman     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
202eb9c0419SKris Buschelman 
203eb9c0419SKris Buschelman     /* If free space is not available, make more free space */
204eb9c0419SKris Buschelman     /* Double the amount of total space in the list */
205eb9c0419SKris Buschelman     if (current_space->local_remaining<cnzi) {
206eb9c0419SKris Buschelman       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
207eb9c0419SKris Buschelman     }
208eb9c0419SKris Buschelman 
209eb9c0419SKris Buschelman     /* Copy data into free space, and zero out denserows */
210eb9c0419SKris Buschelman     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
211eb9c0419SKris Buschelman     current_space->array           += cnzi;
212eb9c0419SKris Buschelman     current_space->local_used      += cnzi;
213eb9c0419SKris Buschelman     current_space->local_remaining -= cnzi;
214eb9c0419SKris Buschelman 
215eb9c0419SKris Buschelman     for (j=0;j<ptanzi;j++) {
216eb9c0419SKris Buschelman       ptadenserow[ptasparserow[j]] = 0;
217eb9c0419SKris Buschelman     }
218eb9c0419SKris Buschelman     for (j=0;j<cnzi;j++) {
219eb9c0419SKris Buschelman       denserow[sparserow[j]] = 0;
220eb9c0419SKris Buschelman     }
221eb9c0419SKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
222eb9c0419SKris Buschelman       /*        For now, we will recompute what is needed. */
223eb9c0419SKris Buschelman     ci[i+1] = ci[i] + cnzi;
224eb9c0419SKris Buschelman   }
225eb9c0419SKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
226eb9c0419SKris Buschelman   /* Allocate space for cj, initialize cj, and */
227eb9c0419SKris Buschelman   /* destroy list of free space and other temporary array(s) */
228eb9c0419SKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
229eb9c0419SKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
230eb9c0419SKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
231eb9c0419SKris Buschelman 
232eb9c0419SKris Buschelman   /* Allocate space for ca */
233eb9c0419SKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
234eb9c0419SKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
235eb9c0419SKris Buschelman 
236eb9c0419SKris Buschelman   /* put together the new matrix */
237eb9c0419SKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
238eb9c0419SKris Buschelman 
239eb9c0419SKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
240eb9c0419SKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
241eb9c0419SKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
242eb9c0419SKris Buschelman   c->freedata = PETSC_TRUE;
243eb9c0419SKris Buschelman   c->nonew    = 0;
244eb9c0419SKris Buschelman 
245eb9c0419SKris Buschelman   /* Clean up. */
246eb9c0419SKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
247eb9c0419SKris Buschelman 
248eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
249eb9c0419SKris Buschelman   PetscFunctionReturn(0);
250eb9c0419SKris Buschelman }
251eb9c0419SKris Buschelman EXTERN_C_END
252eb9c0419SKris Buschelman 
2533985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
2543985e5eaSKris Buschelman EXTERN_C_BEGIN
2553985e5eaSKris Buschelman #undef __FUNCT__
2563985e5eaSKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ"
2573985e5eaSKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
2585c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
2593985e5eaSKris Buschelman   int            ierr;
2603985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
2613985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2623985e5eaSKris Buschelman   Mat            P=pp->AIJ;
2633985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2643985e5eaSKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2653985e5eaSKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
2663985e5eaSKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
267fe05a634SKris Buschelman   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2683985e5eaSKris Buschelman   MatScalar      *ca;
2693985e5eaSKris Buschelman 
2703985e5eaSKris Buschelman   PetscFunctionBegin;
2713985e5eaSKris Buschelman   /* Start timer */
2723985e5eaSKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2733985e5eaSKris Buschelman 
2743985e5eaSKris Buschelman   /* Get ij structure of P^T */
2753985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2763985e5eaSKris Buschelman 
2773985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
2783985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
2793985e5eaSKris Buschelman   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
2803985e5eaSKris Buschelman   ci[0] = 0;
2813985e5eaSKris Buschelman 
2823985e5eaSKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
2833985e5eaSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
2843985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
2853985e5eaSKris Buschelman   denserow     = ptasparserow + an;
2863985e5eaSKris Buschelman   sparserow    = denserow     + pn;
2873985e5eaSKris Buschelman 
2883985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2893985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2903985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
2913985e5eaSKris Buschelman   current_space = free_space;
2923985e5eaSKris Buschelman 
2933985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
2943985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
2953985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
2963985e5eaSKris Buschelman     ptanzi = 0;
2973985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
2983985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
2993985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
3003985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
3013985e5eaSKris Buschelman         arow = ptJ[j] + dof;
3023985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
3033985e5eaSKris Buschelman         ajj  = aj + ai[arow];
3043985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
3053985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
3063985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
3073985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
3083985e5eaSKris Buschelman           }
3093985e5eaSKris Buschelman         }
3103985e5eaSKris Buschelman       }
3113985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3123985e5eaSKris Buschelman       ptaj = ptasparserow;
3133985e5eaSKris Buschelman       cnzi   = 0;
3143985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
315fe05a634SKris Buschelman         pdof = *ptaj%dof;
3163985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
3173985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
3183985e5eaSKris Buschelman         pjj  = pj + pi[prow];
3193985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
320fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
321fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
322fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
3233985e5eaSKris Buschelman           }
3243985e5eaSKris Buschelman         }
3253985e5eaSKris Buschelman       }
3263985e5eaSKris Buschelman 
3273985e5eaSKris Buschelman       /* sort sparserow */
3283985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3293985e5eaSKris Buschelman 
3303985e5eaSKris Buschelman       /* If free space is not available, make more free space */
3313985e5eaSKris Buschelman       /* Double the amount of total space in the list */
3323985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
3333985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3343985e5eaSKris Buschelman       }
3353985e5eaSKris Buschelman 
3363985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
3373985e5eaSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
3383985e5eaSKris Buschelman       current_space->array           += cnzi;
3393985e5eaSKris Buschelman       current_space->local_used      += cnzi;
3403985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
3413985e5eaSKris Buschelman 
3423985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
3433985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
3443985e5eaSKris Buschelman       }
3453985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
3463985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
3473985e5eaSKris Buschelman       }
3483985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3493985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
3503985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
3513985e5eaSKris Buschelman     }
3523985e5eaSKris Buschelman   }
3533985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3543985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
3553985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
3563985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
3573985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3583985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
3593985e5eaSKris Buschelman 
3603985e5eaSKris Buschelman   /* Allocate space for ca */
3613985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
3623985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3633985e5eaSKris Buschelman 
3643985e5eaSKris Buschelman   /* put together the new matrix */
3653985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
3663985e5eaSKris Buschelman 
3673985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3683985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
3693985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
3703985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
3713985e5eaSKris Buschelman   c->nonew    = 0;
3723985e5eaSKris Buschelman 
3733985e5eaSKris Buschelman   /* Clean up. */
3743985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3753985e5eaSKris Buschelman 
3763985e5eaSKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3773985e5eaSKris Buschelman   PetscFunctionReturn(0);
3783985e5eaSKris Buschelman }
3793985e5eaSKris Buschelman EXTERN_C_END
3803985e5eaSKris Buschelman 
381eb9c0419SKris Buschelman #undef __FUNCT__
382eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPNumeric"
383*4d3841fdSKris Buschelman /*@
384*4d3841fdSKris Buschelman    MatSeqAIJPtAPNumeric - Computes the matrix projection C = P^T * A * P
385*4d3841fdSKris Buschelman 
386*4d3841fdSKris Buschelman    Collective on Mat
387*4d3841fdSKris Buschelman 
388*4d3841fdSKris Buschelman    Input Parameters:
389*4d3841fdSKris Buschelman +  A - the matrix
390*4d3841fdSKris Buschelman -  P - the projection matrix
391*4d3841fdSKris Buschelman 
392*4d3841fdSKris Buschelman    Output Parameters:
393*4d3841fdSKris Buschelman .  C - the product matrix
394*4d3841fdSKris Buschelman 
395*4d3841fdSKris Buschelman    Notes:
396*4d3841fdSKris Buschelman    C must have been created by calling MatSeqAIJPtAPSymbolic and must be destroyed by
397*4d3841fdSKris Buschelman    the user using MatDeatroy().
398*4d3841fdSKris Buschelman 
399*4d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
400*4d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
401*4d3841fdSKris Buschelman 
402*4d3841fdSKris Buschelman    Level: intermediate
403*4d3841fdSKris Buschelman 
404*4d3841fdSKris Buschelman .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPSymbolic(),MatMatMultNumeric()
405*4d3841fdSKris Buschelman @*/
406eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
407eb9c0419SKris Buschelman   int ierr;
408eb9c0419SKris Buschelman   char funct[80];
409*4d3841fdSKris Buschelman   int (*f)(Mat,Mat,Mat);
410eb9c0419SKris Buschelman 
411eb9c0419SKris Buschelman   PetscFunctionBegin;
412eb9c0419SKris Buschelman 
4134482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
414c9780b6fSBarry Smith   PetscValidType(A,1);
415eb9c0419SKris Buschelman   MatPreallocated(A);
416eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
417eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
418eb9c0419SKris Buschelman 
4194482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
420c9780b6fSBarry Smith   PetscValidType(P,2);
421eb9c0419SKris Buschelman   MatPreallocated(P);
422eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
423eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
424eb9c0419SKris Buschelman 
4254482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
426c9780b6fSBarry Smith   PetscValidType(C,3);
427eb9c0419SKris Buschelman   MatPreallocated(C);
428eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
429eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
430eb9c0419SKris Buschelman 
431eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
432eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
433eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
434eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
435eb9c0419SKris Buschelman 
436*4d3841fdSKris Buschelman   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
437*4d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
438*4d3841fdSKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
439*4d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
440*4d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name);
441*4d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
442*4d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name);
443*4d3841fdSKris Buschelman 
444*4d3841fdSKris Buschelman   ierr = (*f)(A,P,C);CHKERRQ(ierr);
445eb9c0419SKris Buschelman 
446eb9c0419SKris Buschelman   PetscFunctionReturn(0);
447eb9c0419SKris Buschelman }
448eb9c0419SKris Buschelman 
449eb9c0419SKris Buschelman EXTERN_C_BEGIN
450eb9c0419SKris Buschelman #undef __FUNCT__
451eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ"
452eb9c0419SKris Buschelman int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
453eb9c0419SKris Buschelman   int        ierr,flops=0;
454eb9c0419SKris Buschelman   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
455eb9c0419SKris Buschelman   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
456eb9c0419SKris Buschelman   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
457eb9c0419SKris Buschelman   int        *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
458eb9c0419SKris Buschelman   int        *ci=c->i,*cj=c->j,*cjj;
459eb9c0419SKris Buschelman   int        am=A->M,cn=C->N,cm=C->M;
460eb9c0419SKris Buschelman   int        i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
461eb9c0419SKris Buschelman   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
462eb9c0419SKris Buschelman 
463eb9c0419SKris Buschelman   PetscFunctionBegin;
464eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
465eb9c0419SKris Buschelman 
466eb9c0419SKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
467eb9c0419SKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
468eb9c0419SKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
469eb9c0419SKris Buschelman 
470eb9c0419SKris Buschelman   apj      = (int *)(apa + cn);
471eb9c0419SKris Buschelman   apjdense = apj + cn;
472eb9c0419SKris Buschelman 
473eb9c0419SKris Buschelman   /* Clear old values in C */
474eb9c0419SKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
475eb9c0419SKris Buschelman 
476eb9c0419SKris Buschelman   for (i=0;i<am;i++) {
477eb9c0419SKris Buschelman     /* Form sparse row of A*P */
478eb9c0419SKris Buschelman     anzi  = ai[i+1] - ai[i];
479eb9c0419SKris Buschelman     apnzj = 0;
480eb9c0419SKris Buschelman     for (j=0;j<anzi;j++) {
481eb9c0419SKris Buschelman       prow = *aj++;
482eb9c0419SKris Buschelman       pnzj = pi[prow+1] - pi[prow];
483eb9c0419SKris Buschelman       pjj  = pj + pi[prow];
484eb9c0419SKris Buschelman       paj  = pa + pi[prow];
485eb9c0419SKris Buschelman       for (k=0;k<pnzj;k++) {
486eb9c0419SKris Buschelman         if (!apjdense[pjj[k]]) {
487eb9c0419SKris Buschelman           apjdense[pjj[k]] = -1;
488eb9c0419SKris Buschelman           apj[apnzj++]     = pjj[k];
489eb9c0419SKris Buschelman         }
490eb9c0419SKris Buschelman         apa[pjj[k]] += (*aa)*paj[k];
491eb9c0419SKris Buschelman       }
492eb9c0419SKris Buschelman       flops += 2*pnzj;
493eb9c0419SKris Buschelman       aa++;
494eb9c0419SKris Buschelman     }
495eb9c0419SKris Buschelman 
496eb9c0419SKris Buschelman     /* Sort the j index array for quick sparse axpy. */
497eb9c0419SKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
498eb9c0419SKris Buschelman 
499eb9c0419SKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
500eb9c0419SKris Buschelman     pnzi = pi[i+1] - pi[i];
501eb9c0419SKris Buschelman     for (j=0;j<pnzi;j++) {
502eb9c0419SKris Buschelman       nextap = 0;
503eb9c0419SKris Buschelman       crow   = *pJ++;
504eb9c0419SKris Buschelman       cjj    = cj + ci[crow];
505eb9c0419SKris Buschelman       caj    = ca + ci[crow];
506eb9c0419SKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
507eb9c0419SKris Buschelman       for (k=0;nextap<apnzj;k++) {
508eb9c0419SKris Buschelman         if (cjj[k]==apj[nextap]) {
509eb9c0419SKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
510eb9c0419SKris Buschelman         }
511eb9c0419SKris Buschelman       }
512eb9c0419SKris Buschelman       flops += 2*apnzj;
513eb9c0419SKris Buschelman       pA++;
514eb9c0419SKris Buschelman     }
515eb9c0419SKris Buschelman 
516eb9c0419SKris Buschelman     /* Zero the current row info for A*P */
517eb9c0419SKris Buschelman     for (j=0;j<apnzj;j++) {
518eb9c0419SKris Buschelman       apa[apj[j]]      = 0.;
519eb9c0419SKris Buschelman       apjdense[apj[j]] = 0;
520eb9c0419SKris Buschelman     }
521eb9c0419SKris Buschelman   }
522eb9c0419SKris Buschelman 
523eb9c0419SKris Buschelman   /* Assemble the final matrix and clean up */
524eb9c0419SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
525eb9c0419SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526eb9c0419SKris Buschelman   ierr = PetscFree(apa);CHKERRQ(ierr);
527eb9c0419SKris Buschelman   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
528eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
529eb9c0419SKris Buschelman 
530eb9c0419SKris Buschelman   PetscFunctionReturn(0);
531eb9c0419SKris Buschelman }
532eb9c0419SKris Buschelman EXTERN_C_END
533eb9c0419SKris Buschelman 
534eb9c0419SKris Buschelman #undef __FUNCT__
535eb9c0419SKris Buschelman #define __FUNCT__ "RegisterApplyPtAPRoutines_Private"
536eb9c0419SKris Buschelman int RegisterApplyPtAPRoutines_Private(Mat A) {
537eb9c0419SKris Buschelman   int ierr;
538eb9c0419SKris Buschelman 
539eb9c0419SKris Buschelman   PetscFunctionBegin;
540eb9c0419SKris Buschelman 
541eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAP) {
542eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr);
543eb9c0419SKris Buschelman   }
544eb9c0419SKris Buschelman 
545eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAPSymbolic) {
546eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr);
547eb9c0419SKris Buschelman   }
548eb9c0419SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
549eb9c0419SKris Buschelman                                            "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
550eb9c0419SKris Buschelman                                            MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
551eb9c0419SKris Buschelman 
552eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAPNumeric) {
553eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr);
554eb9c0419SKris Buschelman   }
555eb9c0419SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
556eb9c0419SKris Buschelman                                            "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
557eb9c0419SKris Buschelman                                            MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
5585c66b693SKris Buschelman   ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr);
559eb9c0419SKris Buschelman   PetscFunctionReturn(0);
560eb9c0419SKris Buschelman }
561