xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 231952e2dfbf08bf677468c0163cd5f0e326d986)
1eb9c0419SKris Buschelman /*
2eb9c0419SKris Buschelman   Defines projective product routines where A is a SeqAIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6*231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
8eb9c0419SKris Buschelman 
97e44a18eSKris Buschelman EXTERN int MatSeqAIJPtAP(Mat,Mat,Mat*);
107e44a18eSKris Buschelman EXTERN int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
117e44a18eSKris Buschelman EXTERN int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
127e44a18eSKris Buschelman EXTERN int RegisterMatMatMultRoutines_Private(Mat);
13eb9c0419SKris Buschelman 
14eb9c0419SKris Buschelman static int MATSeqAIJ_PtAP         = 0;
15eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPSymbolic = 0;
16eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPNumeric  = 0;
17eb9c0419SKris Buschelman 
18eb9c0419SKris Buschelman #undef __FUNCT__
19eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAP"
204d3841fdSKris Buschelman /*@
214d3841fdSKris Buschelman    MatSeqAIJPtAP - Creates the matrix projection C = P^T * A * P
224d3841fdSKris Buschelman 
234d3841fdSKris Buschelman    Collective on Mat
244d3841fdSKris Buschelman 
254d3841fdSKris Buschelman    Input Parameters:
264d3841fdSKris Buschelman +  A - the matrix
274d3841fdSKris Buschelman -  P - the projection matrix
284d3841fdSKris Buschelman 
294d3841fdSKris Buschelman    Output Parameters:
304d3841fdSKris Buschelman .  C - the product matrix
314d3841fdSKris Buschelman 
324d3841fdSKris Buschelman    Notes:
334d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
344d3841fdSKris Buschelman 
354d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
364d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
374d3841fdSKris Buschelman 
384d3841fdSKris Buschelman    Level: intermediate
394d3841fdSKris Buschelman 
404d3841fdSKris Buschelman .seealso: MatSeqAIJPtAPSymbolic(),MatSeqAIJPtAPNumeric(),MatMatMult()
414d3841fdSKris Buschelman @*/
42eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
43eb9c0419SKris Buschelman   int ierr;
44eb9c0419SKris Buschelman   char funct[80];
454d3841fdSKris Buschelman   int (*f)(Mat,Mat,Mat);
46eb9c0419SKris Buschelman 
47eb9c0419SKris Buschelman   PetscFunctionBegin;
48eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
49eb9c0419SKris Buschelman 
50eb9c0419SKris Buschelman   ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr);
51eb9c0419SKris Buschelman 
52eb9c0419SKris Buschelman   /* Avoid additional error checking included in */
53eb9c0419SKris Buschelman /*   ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */
54eb9c0419SKris Buschelman 
554d3841fdSKris Buschelman   /* Currently only _seqaij_seqaij is implemented, so just query for it in A and P. */
564d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
574d3841fdSKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
584d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
594d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name);
604d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
614d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name);
624d3841fdSKris Buschelman 
634d3841fdSKris Buschelman   ierr = (*f)(A,P,*C);CHKERRQ(ierr);
64eb9c0419SKris Buschelman 
65eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
66eb9c0419SKris Buschelman   PetscFunctionReturn(0);
67eb9c0419SKris Buschelman }
68eb9c0419SKris Buschelman 
69eb9c0419SKris Buschelman #undef __FUNCT__
70eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPSymbolic"
714d3841fdSKris Buschelman /*@
724d3841fdSKris Buschelman    MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
734d3841fdSKris Buschelman 
744d3841fdSKris Buschelman    Collective on Mat
754d3841fdSKris Buschelman 
764d3841fdSKris Buschelman    Input Parameters:
774d3841fdSKris Buschelman +  A - the matrix
784d3841fdSKris Buschelman -  P - the projection matrix
794d3841fdSKris Buschelman 
804d3841fdSKris Buschelman    Output Parameters:
814d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
824d3841fdSKris Buschelman 
834d3841fdSKris Buschelman    Notes:
844d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
854d3841fdSKris Buschelman 
864d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
874d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
884d3841fdSKris Buschelman    this (i,j) structure by calling MatSeqAIJPtAPNumeric().
894d3841fdSKris Buschelman 
904d3841fdSKris Buschelman    Level: intermediate
914d3841fdSKris Buschelman 
924d3841fdSKris Buschelman .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPNumeric(),MatMatMultSymbolic()
934d3841fdSKris Buschelman @*/
94eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
95eb9c0419SKris Buschelman   int ierr;
96eb9c0419SKris Buschelman   char funct[80];
974d3841fdSKris Buschelman   int (*f)(Mat,Mat,Mat);
98eb9c0419SKris Buschelman 
99eb9c0419SKris Buschelman   PetscFunctionBegin;
100eb9c0419SKris Buschelman 
1014482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
102c9780b6fSBarry Smith   PetscValidType(A,1);
103eb9c0419SKris Buschelman   MatPreallocated(A);
104eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
105eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
106eb9c0419SKris Buschelman 
1074482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
108c9780b6fSBarry Smith   PetscValidType(P,2);
109eb9c0419SKris Buschelman   MatPreallocated(P);
110eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
111eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
112eb9c0419SKris Buschelman 
1134482741eSBarry Smith   PetscValidPointer(C,3);
1144482741eSBarry Smith 
115eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
116eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
117eb9c0419SKris Buschelman 
1184d3841fdSKris Buschelman   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
1194d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
1204d3841fdSKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr);
1214d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
1224d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPSymbolic is not supported for P of type %s",P->type_name);
1234d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
1244d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPSymbolic is not supported for A of type %s",A->type_name);
1254d3841fdSKris Buschelman 
1264d3841fdSKris Buschelman   ierr = (*f)(A,P,*C);CHKERRQ(ierr);
127eb9c0419SKris Buschelman 
128eb9c0419SKris Buschelman   PetscFunctionReturn(0);
129eb9c0419SKris Buschelman }
130eb9c0419SKris Buschelman 
131eb9c0419SKris Buschelman EXTERN_C_BEGIN
132eb9c0419SKris Buschelman #undef __FUNCT__
133eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ"
134eb9c0419SKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
135eb9c0419SKris Buschelman   int            ierr;
136eb9c0419SKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
137eb9c0419SKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
138eb9c0419SKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
139eb9c0419SKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
140eb9c0419SKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
141eb9c0419SKris Buschelman   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
142eb9c0419SKris Buschelman   MatScalar      *ca;
143eb9c0419SKris Buschelman 
144eb9c0419SKris Buschelman   PetscFunctionBegin;
145eb9c0419SKris Buschelman 
146eb9c0419SKris Buschelman   /* Start timer */
147eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
148eb9c0419SKris Buschelman 
149eb9c0419SKris Buschelman   /* Get ij structure of P^T */
150eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
151eb9c0419SKris Buschelman   ptJ=ptj;
152eb9c0419SKris Buschelman 
153eb9c0419SKris Buschelman   /* Allocate ci array, arrays for fill computation and */
154eb9c0419SKris Buschelman   /* free space for accumulating nonzero column info */
1553985e5eaSKris Buschelman   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
156eb9c0419SKris Buschelman   ci[0] = 0;
157eb9c0419SKris Buschelman 
158eb9c0419SKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
159eb9c0419SKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
160eb9c0419SKris Buschelman   ptasparserow = ptadenserow  + an;
161eb9c0419SKris Buschelman   denserow     = ptasparserow + an;
162eb9c0419SKris Buschelman   sparserow    = denserow     + pn;
163eb9c0419SKris Buschelman 
164eb9c0419SKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
165eb9c0419SKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
166eb9c0419SKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
167eb9c0419SKris Buschelman   current_space = free_space;
168eb9c0419SKris Buschelman 
169eb9c0419SKris Buschelman   /* Determine symbolic info for each row of C: */
170eb9c0419SKris Buschelman   for (i=0;i<pn;i++) {
171eb9c0419SKris Buschelman     ptnzi  = pti[i+1] - pti[i];
172eb9c0419SKris Buschelman     ptanzi = 0;
173eb9c0419SKris Buschelman     /* Determine symbolic row of PtA: */
174eb9c0419SKris Buschelman     for (j=0;j<ptnzi;j++) {
175eb9c0419SKris Buschelman       arow = *ptJ++;
176eb9c0419SKris Buschelman       anzj = ai[arow+1] - ai[arow];
177eb9c0419SKris Buschelman       ajj  = aj + ai[arow];
178eb9c0419SKris Buschelman       for (k=0;k<anzj;k++) {
179eb9c0419SKris Buschelman         if (!ptadenserow[ajj[k]]) {
180eb9c0419SKris Buschelman           ptadenserow[ajj[k]]    = -1;
181eb9c0419SKris Buschelman           ptasparserow[ptanzi++] = ajj[k];
182eb9c0419SKris Buschelman         }
183eb9c0419SKris Buschelman       }
184eb9c0419SKris Buschelman     }
185eb9c0419SKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
186eb9c0419SKris Buschelman     ptaj = ptasparserow;
187eb9c0419SKris Buschelman     cnzi   = 0;
188eb9c0419SKris Buschelman     for (j=0;j<ptanzi;j++) {
189eb9c0419SKris Buschelman       prow = *ptaj++;
190eb9c0419SKris Buschelman       pnzj = pi[prow+1] - pi[prow];
191eb9c0419SKris Buschelman       pjj  = pj + pi[prow];
192eb9c0419SKris Buschelman       for (k=0;k<pnzj;k++) {
193eb9c0419SKris Buschelman         if (!denserow[pjj[k]]) {
194eb9c0419SKris Buschelman             denserow[pjj[k]]  = -1;
195eb9c0419SKris Buschelman             sparserow[cnzi++] = pjj[k];
196eb9c0419SKris Buschelman         }
197eb9c0419SKris Buschelman       }
198eb9c0419SKris Buschelman     }
199eb9c0419SKris Buschelman 
200eb9c0419SKris Buschelman     /* sort sparserow */
201eb9c0419SKris Buschelman     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
202eb9c0419SKris Buschelman 
203eb9c0419SKris Buschelman     /* If free space is not available, make more free space */
204eb9c0419SKris Buschelman     /* Double the amount of total space in the list */
205eb9c0419SKris Buschelman     if (current_space->local_remaining<cnzi) {
206eb9c0419SKris Buschelman       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
207eb9c0419SKris Buschelman     }
208eb9c0419SKris Buschelman 
209eb9c0419SKris Buschelman     /* Copy data into free space, and zero out denserows */
210eb9c0419SKris Buschelman     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
211eb9c0419SKris Buschelman     current_space->array           += cnzi;
212eb9c0419SKris Buschelman     current_space->local_used      += cnzi;
213eb9c0419SKris Buschelman     current_space->local_remaining -= cnzi;
214eb9c0419SKris Buschelman 
215eb9c0419SKris Buschelman     for (j=0;j<ptanzi;j++) {
216eb9c0419SKris Buschelman       ptadenserow[ptasparserow[j]] = 0;
217eb9c0419SKris Buschelman     }
218eb9c0419SKris Buschelman     for (j=0;j<cnzi;j++) {
219eb9c0419SKris Buschelman       denserow[sparserow[j]] = 0;
220eb9c0419SKris Buschelman     }
221eb9c0419SKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
222eb9c0419SKris Buschelman       /*        For now, we will recompute what is needed. */
223eb9c0419SKris Buschelman     ci[i+1] = ci[i] + cnzi;
224eb9c0419SKris Buschelman   }
225eb9c0419SKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
226eb9c0419SKris Buschelman   /* Allocate space for cj, initialize cj, and */
227eb9c0419SKris Buschelman   /* destroy list of free space and other temporary array(s) */
228eb9c0419SKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
229eb9c0419SKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
230eb9c0419SKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
231eb9c0419SKris Buschelman 
232eb9c0419SKris Buschelman   /* Allocate space for ca */
233eb9c0419SKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
234eb9c0419SKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
235eb9c0419SKris Buschelman 
236eb9c0419SKris Buschelman   /* put together the new matrix */
237eb9c0419SKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
238eb9c0419SKris Buschelman 
239eb9c0419SKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
240eb9c0419SKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
241eb9c0419SKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
242eb9c0419SKris Buschelman   c->freedata = PETSC_TRUE;
243eb9c0419SKris Buschelman   c->nonew    = 0;
244eb9c0419SKris Buschelman 
245eb9c0419SKris Buschelman   /* Clean up. */
246eb9c0419SKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
247eb9c0419SKris Buschelman 
248eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
249eb9c0419SKris Buschelman   PetscFunctionReturn(0);
250eb9c0419SKris Buschelman }
251eb9c0419SKris Buschelman EXTERN_C_END
252eb9c0419SKris Buschelman 
2533985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
2543985e5eaSKris Buschelman EXTERN_C_BEGIN
2553985e5eaSKris Buschelman #undef __FUNCT__
2563985e5eaSKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ"
2573985e5eaSKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
2585c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
2593985e5eaSKris Buschelman   int            ierr;
2603985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
2613985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2623985e5eaSKris Buschelman   Mat            P=pp->AIJ;
2633985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2643985e5eaSKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2653985e5eaSKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
2663985e5eaSKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
267fe05a634SKris Buschelman   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2683985e5eaSKris Buschelman   MatScalar      *ca;
2693985e5eaSKris Buschelman 
2703985e5eaSKris Buschelman   PetscFunctionBegin;
2713985e5eaSKris Buschelman   /* Start timer */
2723985e5eaSKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2733985e5eaSKris Buschelman 
2743985e5eaSKris Buschelman   /* Get ij structure of P^T */
2753985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2763985e5eaSKris Buschelman 
2773985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
2783985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
2793985e5eaSKris Buschelman   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
2803985e5eaSKris Buschelman   ci[0] = 0;
2813985e5eaSKris Buschelman 
2823985e5eaSKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
2833985e5eaSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
2843985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
2853985e5eaSKris Buschelman   denserow     = ptasparserow + an;
2863985e5eaSKris Buschelman   sparserow    = denserow     + pn;
2873985e5eaSKris Buschelman 
2883985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2893985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2903985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
2913985e5eaSKris Buschelman   current_space = free_space;
2923985e5eaSKris Buschelman 
2933985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
2943985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
2953985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
2963985e5eaSKris Buschelman     ptanzi = 0;
2973985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
2983985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
2993985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
3003985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
3013985e5eaSKris Buschelman         arow = ptJ[j] + dof;
3023985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
3033985e5eaSKris Buschelman         ajj  = aj + ai[arow];
3043985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
3053985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
3063985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
3073985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
3083985e5eaSKris Buschelman           }
3093985e5eaSKris Buschelman         }
3103985e5eaSKris Buschelman       }
3113985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3123985e5eaSKris Buschelman       ptaj = ptasparserow;
3133985e5eaSKris Buschelman       cnzi   = 0;
3143985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
315fe05a634SKris Buschelman         pdof = *ptaj%dof;
3163985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
3173985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
3183985e5eaSKris Buschelman         pjj  = pj + pi[prow];
3193985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
320fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
321fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
322fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
3233985e5eaSKris Buschelman           }
3243985e5eaSKris Buschelman         }
3253985e5eaSKris Buschelman       }
3263985e5eaSKris Buschelman 
3273985e5eaSKris Buschelman       /* sort sparserow */
3283985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3293985e5eaSKris Buschelman 
3303985e5eaSKris Buschelman       /* If free space is not available, make more free space */
3313985e5eaSKris Buschelman       /* Double the amount of total space in the list */
3323985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
3333985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3343985e5eaSKris Buschelman       }
3353985e5eaSKris Buschelman 
3363985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
3373985e5eaSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
3383985e5eaSKris Buschelman       current_space->array           += cnzi;
3393985e5eaSKris Buschelman       current_space->local_used      += cnzi;
3403985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
3413985e5eaSKris Buschelman 
3423985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
3433985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
3443985e5eaSKris Buschelman       }
3453985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
3463985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
3473985e5eaSKris Buschelman       }
3483985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3493985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
3503985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
3513985e5eaSKris Buschelman     }
3523985e5eaSKris Buschelman   }
3533985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3543985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
3553985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
3563985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
3573985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3583985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
3593985e5eaSKris Buschelman 
3603985e5eaSKris Buschelman   /* Allocate space for ca */
3613985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
3623985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3633985e5eaSKris Buschelman 
3643985e5eaSKris Buschelman   /* put together the new matrix */
3653985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
3663985e5eaSKris Buschelman 
3673985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3683985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
3693985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
3703985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
3713985e5eaSKris Buschelman   c->nonew    = 0;
3723985e5eaSKris Buschelman 
3733985e5eaSKris Buschelman   /* Clean up. */
3743985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3753985e5eaSKris Buschelman 
3763985e5eaSKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3773985e5eaSKris Buschelman   PetscFunctionReturn(0);
3783985e5eaSKris Buschelman }
3793985e5eaSKris Buschelman EXTERN_C_END
3803985e5eaSKris Buschelman 
381eb9c0419SKris Buschelman #undef __FUNCT__
382eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPNumeric"
3834d3841fdSKris Buschelman /*@
3844d3841fdSKris Buschelman    MatSeqAIJPtAPNumeric - Computes the matrix projection C = P^T * A * P
3854d3841fdSKris Buschelman 
3864d3841fdSKris Buschelman    Collective on Mat
3874d3841fdSKris Buschelman 
3884d3841fdSKris Buschelman    Input Parameters:
3894d3841fdSKris Buschelman +  A - the matrix
3904d3841fdSKris Buschelman -  P - the projection matrix
3914d3841fdSKris Buschelman 
3924d3841fdSKris Buschelman    Output Parameters:
3934d3841fdSKris Buschelman .  C - the product matrix
3944d3841fdSKris Buschelman 
3954d3841fdSKris Buschelman    Notes:
3964d3841fdSKris Buschelman    C must have been created by calling MatSeqAIJPtAPSymbolic and must be destroyed by
3974d3841fdSKris Buschelman    the user using MatDeatroy().
3984d3841fdSKris Buschelman 
3994d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
4004d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
4014d3841fdSKris Buschelman 
4024d3841fdSKris Buschelman    Level: intermediate
4034d3841fdSKris Buschelman 
4044d3841fdSKris Buschelman .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPSymbolic(),MatMatMultNumeric()
4054d3841fdSKris Buschelman @*/
406eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
407eb9c0419SKris Buschelman   int ierr;
408eb9c0419SKris Buschelman   char funct[80];
4094d3841fdSKris Buschelman   int (*f)(Mat,Mat,Mat);
410eb9c0419SKris Buschelman 
411eb9c0419SKris Buschelman   PetscFunctionBegin;
412eb9c0419SKris Buschelman 
4134482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
414c9780b6fSBarry Smith   PetscValidType(A,1);
415eb9c0419SKris Buschelman   MatPreallocated(A);
416eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
417eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
418eb9c0419SKris Buschelman 
4194482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
420c9780b6fSBarry Smith   PetscValidType(P,2);
421eb9c0419SKris Buschelman   MatPreallocated(P);
422eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
423eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
424eb9c0419SKris Buschelman 
4254482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
426c9780b6fSBarry Smith   PetscValidType(C,3);
427eb9c0419SKris Buschelman   MatPreallocated(C);
428eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
429eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
430eb9c0419SKris Buschelman 
431eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
432eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
433eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
434eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
435eb9c0419SKris Buschelman 
4364d3841fdSKris Buschelman   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
4374d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
4384d3841fdSKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
4394d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
4404d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name);
4414d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
4424d3841fdSKris Buschelman   if (!f) SETERRQ1(1,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name);
4434d3841fdSKris Buschelman 
4444d3841fdSKris Buschelman   ierr = (*f)(A,P,C);CHKERRQ(ierr);
445eb9c0419SKris Buschelman 
446eb9c0419SKris Buschelman   PetscFunctionReturn(0);
447eb9c0419SKris Buschelman }
448eb9c0419SKris Buschelman 
449eb9c0419SKris Buschelman EXTERN_C_BEGIN
450eb9c0419SKris Buschelman #undef __FUNCT__
451eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ"
452eb9c0419SKris Buschelman int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
453eb9c0419SKris Buschelman   int        ierr,flops=0;
454eb9c0419SKris Buschelman   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
455eb9c0419SKris Buschelman   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
456eb9c0419SKris Buschelman   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
457eb9c0419SKris Buschelman   int        *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
458eb9c0419SKris Buschelman   int        *ci=c->i,*cj=c->j,*cjj;
459eb9c0419SKris Buschelman   int        am=A->M,cn=C->N,cm=C->M;
460eb9c0419SKris Buschelman   int        i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
461eb9c0419SKris Buschelman   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
462eb9c0419SKris Buschelman 
463eb9c0419SKris Buschelman   PetscFunctionBegin;
464eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
465eb9c0419SKris Buschelman 
466eb9c0419SKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
467eb9c0419SKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
468eb9c0419SKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
469eb9c0419SKris Buschelman 
470eb9c0419SKris Buschelman   apj      = (int *)(apa + cn);
471eb9c0419SKris Buschelman   apjdense = apj + cn;
472eb9c0419SKris Buschelman 
473eb9c0419SKris Buschelman   /* Clear old values in C */
474eb9c0419SKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
475eb9c0419SKris Buschelman 
476eb9c0419SKris Buschelman   for (i=0;i<am;i++) {
477eb9c0419SKris Buschelman     /* Form sparse row of A*P */
478eb9c0419SKris Buschelman     anzi  = ai[i+1] - ai[i];
479eb9c0419SKris Buschelman     apnzj = 0;
480eb9c0419SKris Buschelman     for (j=0;j<anzi;j++) {
481eb9c0419SKris Buschelman       prow = *aj++;
482eb9c0419SKris Buschelman       pnzj = pi[prow+1] - pi[prow];
483eb9c0419SKris Buschelman       pjj  = pj + pi[prow];
484eb9c0419SKris Buschelman       paj  = pa + pi[prow];
485eb9c0419SKris Buschelman       for (k=0;k<pnzj;k++) {
486eb9c0419SKris Buschelman         if (!apjdense[pjj[k]]) {
487eb9c0419SKris Buschelman           apjdense[pjj[k]] = -1;
488eb9c0419SKris Buschelman           apj[apnzj++]     = pjj[k];
489eb9c0419SKris Buschelman         }
490eb9c0419SKris Buschelman         apa[pjj[k]] += (*aa)*paj[k];
491eb9c0419SKris Buschelman       }
492eb9c0419SKris Buschelman       flops += 2*pnzj;
493eb9c0419SKris Buschelman       aa++;
494eb9c0419SKris Buschelman     }
495eb9c0419SKris Buschelman 
496eb9c0419SKris Buschelman     /* Sort the j index array for quick sparse axpy. */
497eb9c0419SKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
498eb9c0419SKris Buschelman 
499eb9c0419SKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
500eb9c0419SKris Buschelman     pnzi = pi[i+1] - pi[i];
501eb9c0419SKris Buschelman     for (j=0;j<pnzi;j++) {
502eb9c0419SKris Buschelman       nextap = 0;
503eb9c0419SKris Buschelman       crow   = *pJ++;
504eb9c0419SKris Buschelman       cjj    = cj + ci[crow];
505eb9c0419SKris Buschelman       caj    = ca + ci[crow];
506eb9c0419SKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
507eb9c0419SKris Buschelman       for (k=0;nextap<apnzj;k++) {
508eb9c0419SKris Buschelman         if (cjj[k]==apj[nextap]) {
509eb9c0419SKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
510eb9c0419SKris Buschelman         }
511eb9c0419SKris Buschelman       }
512eb9c0419SKris Buschelman       flops += 2*apnzj;
513eb9c0419SKris Buschelman       pA++;
514eb9c0419SKris Buschelman     }
515eb9c0419SKris Buschelman 
516eb9c0419SKris Buschelman     /* Zero the current row info for A*P */
517eb9c0419SKris Buschelman     for (j=0;j<apnzj;j++) {
518eb9c0419SKris Buschelman       apa[apj[j]]      = 0.;
519eb9c0419SKris Buschelman       apjdense[apj[j]] = 0;
520eb9c0419SKris Buschelman     }
521eb9c0419SKris Buschelman   }
522eb9c0419SKris Buschelman 
523eb9c0419SKris Buschelman   /* Assemble the final matrix and clean up */
524eb9c0419SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
525eb9c0419SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526eb9c0419SKris Buschelman   ierr = PetscFree(apa);CHKERRQ(ierr);
527eb9c0419SKris Buschelman   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
528eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
529eb9c0419SKris Buschelman 
530eb9c0419SKris Buschelman   PetscFunctionReturn(0);
531eb9c0419SKris Buschelman }
532eb9c0419SKris Buschelman EXTERN_C_END
533eb9c0419SKris Buschelman 
534eb9c0419SKris Buschelman #undef __FUNCT__
535eb9c0419SKris Buschelman #define __FUNCT__ "RegisterApplyPtAPRoutines_Private"
536eb9c0419SKris Buschelman int RegisterApplyPtAPRoutines_Private(Mat A) {
537eb9c0419SKris Buschelman   int ierr;
538eb9c0419SKris Buschelman 
539eb9c0419SKris Buschelman   PetscFunctionBegin;
540eb9c0419SKris Buschelman 
541eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAP) {
542eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr);
543eb9c0419SKris Buschelman   }
544eb9c0419SKris Buschelman 
545eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAPSymbolic) {
546eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr);
547eb9c0419SKris Buschelman   }
548eb9c0419SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
549eb9c0419SKris Buschelman                                            "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
550eb9c0419SKris Buschelman                                            MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
551eb9c0419SKris Buschelman 
5525485867bSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_aij",
5535485867bSBarry Smith                                            "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
5545485867bSBarry Smith                                            MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
5555485867bSBarry Smith 
556eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAPNumeric) {
557eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr);
558eb9c0419SKris Buschelman   }
5595485867bSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_aij",
5605485867bSBarry Smith                                            "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
5615485867bSBarry Smith                                            MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
562eb9c0419SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
563eb9c0419SKris Buschelman                                            "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
564eb9c0419SKris Buschelman                                            MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
5655c66b693SKris Buschelman   ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr);
566eb9c0419SKris Buschelman   PetscFunctionReturn(0);
567eb9c0419SKris Buschelman }
568