xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision eb9c0419a981a97953bed00d7987199f49d3316b)
1*eb9c0419SKris Buschelman /*
2*eb9c0419SKris Buschelman   Defines projective product routines where A is a SeqAIJ matrix
3*eb9c0419SKris Buschelman           C = P^T * A * P
4*eb9c0419SKris Buschelman */
5*eb9c0419SKris Buschelman 
6*eb9c0419SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"
7*eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
8*eb9c0419SKris Buschelman 
9*eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat,Mat,Mat*);
10*eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
11*eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
12*eb9c0419SKris Buschelman 
13*eb9c0419SKris Buschelman static int MATSeqAIJ_PtAP         = 0;
14*eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPSymbolic = 0;
15*eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPNumeric  = 0;
16*eb9c0419SKris Buschelman 
17*eb9c0419SKris Buschelman /*
18*eb9c0419SKris Buschelman      MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C,
19*eb9c0419SKris Buschelman            of SeqAIJ matrix A and matrix P:
20*eb9c0419SKris Buschelman                  C = P^T * A * P;
21*eb9c0419SKris Buschelman 
22*eb9c0419SKris Buschelman      Note: C is assumed to be uncreated.
23*eb9c0419SKris Buschelman            If this is not the case, Destroy C before calling this routine.
24*eb9c0419SKris Buschelman */
25*eb9c0419SKris Buschelman #undef __FUNCT__
26*eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAP"
27*eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
28*eb9c0419SKris Buschelman   int ierr;
29*eb9c0419SKris Buschelman   char funct[80];
30*eb9c0419SKris Buschelman 
31*eb9c0419SKris Buschelman   PetscFunctionBegin;
32*eb9c0419SKris Buschelman 
33*eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
34*eb9c0419SKris Buschelman 
35*eb9c0419SKris Buschelman   ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr);
36*eb9c0419SKris Buschelman 
37*eb9c0419SKris Buschelman   /* Avoid additional error checking included in */
38*eb9c0419SKris Buschelman /*   ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */
39*eb9c0419SKris Buschelman 
40*eb9c0419SKris Buschelman   /* Query A for ApplyPtAPNumeric implementation based on types of P */
41*eb9c0419SKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr);
42*eb9c0419SKris Buschelman   ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr);
43*eb9c0419SKris Buschelman   ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));CHKERRQ(ierr);
44*eb9c0419SKris Buschelman 
45*eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
46*eb9c0419SKris Buschelman 
47*eb9c0419SKris Buschelman   PetscFunctionReturn(0);
48*eb9c0419SKris Buschelman }
49*eb9c0419SKris Buschelman 
50*eb9c0419SKris Buschelman /*
51*eb9c0419SKris Buschelman      MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C,
52*eb9c0419SKris Buschelman            of SeqAIJ matrix A and matrix P, according to:
53*eb9c0419SKris Buschelman                  C = P^T * A * P;
54*eb9c0419SKris Buschelman 
55*eb9c0419SKris Buschelman      Note: C is assumed to be uncreated.
56*eb9c0419SKris Buschelman            If this is not the case, Destroy C before calling this routine.
57*eb9c0419SKris Buschelman */
58*eb9c0419SKris Buschelman #undef __FUNCT__
59*eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPSymbolic"
60*eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
61*eb9c0419SKris Buschelman   int ierr;
62*eb9c0419SKris Buschelman   char funct[80];
63*eb9c0419SKris Buschelman 
64*eb9c0419SKris Buschelman   PetscFunctionBegin;
65*eb9c0419SKris Buschelman 
66*eb9c0419SKris Buschelman   PetscValidPointer(C);
67*eb9c0419SKris Buschelman 
68*eb9c0419SKris Buschelman   PetscValidHeaderSpecific(A,MAT_COOKIE);
69*eb9c0419SKris Buschelman   PetscValidType(A);
70*eb9c0419SKris Buschelman   MatPreallocated(A);
71*eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
72*eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
73*eb9c0419SKris Buschelman 
74*eb9c0419SKris Buschelman   PetscValidHeaderSpecific(P,MAT_COOKIE);
75*eb9c0419SKris Buschelman   PetscValidType(P);
76*eb9c0419SKris Buschelman   MatPreallocated(P);
77*eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
78*eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
79*eb9c0419SKris Buschelman 
80*eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
81*eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
82*eb9c0419SKris Buschelman 
83*eb9c0419SKris Buschelman   /* Query A for ApplyPtAP implementation based on types of P */
84*eb9c0419SKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");CHKERRQ(ierr);
85*eb9c0419SKris Buschelman   ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr);
86*eb9c0419SKris Buschelman   ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));CHKERRQ(ierr);
87*eb9c0419SKris Buschelman 
88*eb9c0419SKris Buschelman   PetscFunctionReturn(0);
89*eb9c0419SKris Buschelman }
90*eb9c0419SKris Buschelman 
91*eb9c0419SKris Buschelman EXTERN_C_BEGIN
92*eb9c0419SKris Buschelman #undef __FUNCT__
93*eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ"
94*eb9c0419SKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
95*eb9c0419SKris Buschelman   int            ierr;
96*eb9c0419SKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
97*eb9c0419SKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
98*eb9c0419SKris Buschelman   int            aishift=a->indexshift,pishift=p->indexshift;
99*eb9c0419SKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
100*eb9c0419SKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
101*eb9c0419SKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
102*eb9c0419SKris Buschelman   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
103*eb9c0419SKris Buschelman   MatScalar      *ca;
104*eb9c0419SKris Buschelman 
105*eb9c0419SKris Buschelman   PetscFunctionBegin;
106*eb9c0419SKris Buschelman 
107*eb9c0419SKris Buschelman   /* some error checking which could be moved into interface layer */
108*eb9c0419SKris Buschelman   if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
109*eb9c0419SKris Buschelman 
110*eb9c0419SKris Buschelman   /* Start timer */
111*eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
112*eb9c0419SKris Buschelman 
113*eb9c0419SKris Buschelman   /* Get ij structure of P^T */
114*eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
115*eb9c0419SKris Buschelman   ptJ=ptj;
116*eb9c0419SKris Buschelman 
117*eb9c0419SKris Buschelman   /* Allocate ci array, arrays for fill computation and */
118*eb9c0419SKris Buschelman   /* free space for accumulating nonzero column info */
119*eb9c0419SKris Buschelman   ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr);
120*eb9c0419SKris Buschelman   ci[0] = 0;
121*eb9c0419SKris Buschelman 
122*eb9c0419SKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
123*eb9c0419SKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
124*eb9c0419SKris Buschelman   ptasparserow = ptadenserow  + an;
125*eb9c0419SKris Buschelman   denserow     = ptasparserow + an;
126*eb9c0419SKris Buschelman   sparserow    = denserow     + pn;
127*eb9c0419SKris Buschelman 
128*eb9c0419SKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
129*eb9c0419SKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
130*eb9c0419SKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
131*eb9c0419SKris Buschelman   current_space = free_space;
132*eb9c0419SKris Buschelman 
133*eb9c0419SKris Buschelman   /* Determine symbolic info for each row of C: */
134*eb9c0419SKris Buschelman   for (i=0;i<pn;i++) {
135*eb9c0419SKris Buschelman     ptnzi  = pti[i+1] - pti[i];
136*eb9c0419SKris Buschelman     ptanzi = 0;
137*eb9c0419SKris Buschelman     /* Determine symbolic row of PtA: */
138*eb9c0419SKris Buschelman     for (j=0;j<ptnzi;j++) {
139*eb9c0419SKris Buschelman       arow = *ptJ++;
140*eb9c0419SKris Buschelman       anzj = ai[arow+1] - ai[arow];
141*eb9c0419SKris Buschelman       ajj  = aj + ai[arow];
142*eb9c0419SKris Buschelman       for (k=0;k<anzj;k++) {
143*eb9c0419SKris Buschelman         if (!ptadenserow[ajj[k]]) {
144*eb9c0419SKris Buschelman           ptadenserow[ajj[k]]    = -1;
145*eb9c0419SKris Buschelman           ptasparserow[ptanzi++] = ajj[k];
146*eb9c0419SKris Buschelman         }
147*eb9c0419SKris Buschelman       }
148*eb9c0419SKris Buschelman     }
149*eb9c0419SKris Buschelman     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
150*eb9c0419SKris Buschelman     ptaj = ptasparserow;
151*eb9c0419SKris Buschelman     cnzi   = 0;
152*eb9c0419SKris Buschelman     for (j=0;j<ptanzi;j++) {
153*eb9c0419SKris Buschelman       prow = *ptaj++;
154*eb9c0419SKris Buschelman       pnzj = pi[prow+1] - pi[prow];
155*eb9c0419SKris Buschelman       pjj  = pj + pi[prow];
156*eb9c0419SKris Buschelman       for (k=0;k<pnzj;k++) {
157*eb9c0419SKris Buschelman         if (!denserow[pjj[k]]) {
158*eb9c0419SKris Buschelman           denserow[pjj[k]]  = -1;
159*eb9c0419SKris Buschelman           sparserow[cnzi++] = pjj[k];
160*eb9c0419SKris Buschelman         }
161*eb9c0419SKris Buschelman       }
162*eb9c0419SKris Buschelman     }
163*eb9c0419SKris Buschelman 
164*eb9c0419SKris Buschelman     /* sort sparserow */
165*eb9c0419SKris Buschelman     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
166*eb9c0419SKris Buschelman 
167*eb9c0419SKris Buschelman     /* If free space is not available, make more free space */
168*eb9c0419SKris Buschelman     /* Double the amount of total space in the list */
169*eb9c0419SKris Buschelman     if (current_space->local_remaining<cnzi) {
170*eb9c0419SKris Buschelman       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
171*eb9c0419SKris Buschelman     }
172*eb9c0419SKris Buschelman 
173*eb9c0419SKris Buschelman     /* Copy data into free space, and zero out denserows */
174*eb9c0419SKris Buschelman     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
175*eb9c0419SKris Buschelman     current_space->array           += cnzi;
176*eb9c0419SKris Buschelman     current_space->local_used      += cnzi;
177*eb9c0419SKris Buschelman     current_space->local_remaining -= cnzi;
178*eb9c0419SKris Buschelman 
179*eb9c0419SKris Buschelman     for (j=0;j<ptanzi;j++) {
180*eb9c0419SKris Buschelman       ptadenserow[ptasparserow[j]] = 0;
181*eb9c0419SKris Buschelman     }
182*eb9c0419SKris Buschelman     for (j=0;j<cnzi;j++) {
183*eb9c0419SKris Buschelman       denserow[sparserow[j]] = 0;
184*eb9c0419SKris Buschelman     }
185*eb9c0419SKris Buschelman     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
186*eb9c0419SKris Buschelman     /*        For now, we will recompute what is needed. */
187*eb9c0419SKris Buschelman     ci[i+1] = ci[i] + cnzi;
188*eb9c0419SKris Buschelman   }
189*eb9c0419SKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
190*eb9c0419SKris Buschelman   /* Allocate space for cj, initialize cj, and */
191*eb9c0419SKris Buschelman   /* destroy list of free space and other temporary array(s) */
192*eb9c0419SKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
193*eb9c0419SKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
194*eb9c0419SKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
195*eb9c0419SKris Buschelman 
196*eb9c0419SKris Buschelman   /* Allocate space for ca */
197*eb9c0419SKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
198*eb9c0419SKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
199*eb9c0419SKris Buschelman 
200*eb9c0419SKris Buschelman   /* put together the new matrix */
201*eb9c0419SKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
202*eb9c0419SKris Buschelman 
203*eb9c0419SKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
204*eb9c0419SKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
205*eb9c0419SKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
206*eb9c0419SKris Buschelman   c->freedata = PETSC_TRUE;
207*eb9c0419SKris Buschelman   c->nonew    = 0;
208*eb9c0419SKris Buschelman 
209*eb9c0419SKris Buschelman   /* Clean up. */
210*eb9c0419SKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
211*eb9c0419SKris Buschelman 
212*eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
213*eb9c0419SKris Buschelman   PetscFunctionReturn(0);
214*eb9c0419SKris Buschelman }
215*eb9c0419SKris Buschelman EXTERN_C_END
216*eb9c0419SKris Buschelman 
217*eb9c0419SKris Buschelman /*
218*eb9c0419SKris Buschelman      MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C,
219*eb9c0419SKris Buschelman            of SeqAIJ matrix A and matrix P, according to:
220*eb9c0419SKris Buschelman                  C = P^T * A * P
221*eb9c0419SKris Buschelman      Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic.
222*eb9c0419SKris Buschelman */
223*eb9c0419SKris Buschelman #undef __FUNCT__
224*eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPNumeric"
225*eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
226*eb9c0419SKris Buschelman   int ierr;
227*eb9c0419SKris Buschelman   char funct[80];
228*eb9c0419SKris Buschelman 
229*eb9c0419SKris Buschelman   PetscFunctionBegin;
230*eb9c0419SKris Buschelman 
231*eb9c0419SKris Buschelman   PetscValidHeaderSpecific(A,MAT_COOKIE);
232*eb9c0419SKris Buschelman   PetscValidType(A);
233*eb9c0419SKris Buschelman   MatPreallocated(A);
234*eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
235*eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
236*eb9c0419SKris Buschelman 
237*eb9c0419SKris Buschelman   PetscValidHeaderSpecific(P,MAT_COOKIE);
238*eb9c0419SKris Buschelman   PetscValidType(P);
239*eb9c0419SKris Buschelman   MatPreallocated(P);
240*eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
241*eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
242*eb9c0419SKris Buschelman 
243*eb9c0419SKris Buschelman   PetscValidHeaderSpecific(C,MAT_COOKIE);
244*eb9c0419SKris Buschelman   PetscValidType(C);
245*eb9c0419SKris Buschelman   MatPreallocated(C);
246*eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
247*eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
248*eb9c0419SKris Buschelman 
249*eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
250*eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
251*eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
252*eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
253*eb9c0419SKris Buschelman 
254*eb9c0419SKris Buschelman   /* Query A for ApplyPtAP implementation based on types of P */
255*eb9c0419SKris Buschelman   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr);
256*eb9c0419SKris Buschelman   ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr);
257*eb9c0419SKris Buschelman   ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));CHKERRQ(ierr);
258*eb9c0419SKris Buschelman 
259*eb9c0419SKris Buschelman   PetscFunctionReturn(0);
260*eb9c0419SKris Buschelman }
261*eb9c0419SKris Buschelman 
262*eb9c0419SKris Buschelman EXTERN_C_BEGIN
263*eb9c0419SKris Buschelman #undef __FUNCT__
264*eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ"
265*eb9c0419SKris Buschelman int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
266*eb9c0419SKris Buschelman   int        ierr,flops=0;
267*eb9c0419SKris Buschelman   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
268*eb9c0419SKris Buschelman   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
269*eb9c0419SKris Buschelman   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
270*eb9c0419SKris Buschelman   int        aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
271*eb9c0419SKris Buschelman   int        *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
272*eb9c0419SKris Buschelman   int        *ci=c->i,*cj=c->j,*cjj;
273*eb9c0419SKris Buschelman   int        am=A->M,cn=C->N,cm=C->M;
274*eb9c0419SKris Buschelman   int        i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
275*eb9c0419SKris Buschelman   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
276*eb9c0419SKris Buschelman 
277*eb9c0419SKris Buschelman   PetscFunctionBegin;
278*eb9c0419SKris Buschelman 
279*eb9c0419SKris Buschelman   /* Currently not for shifted matrices! */
280*eb9c0419SKris Buschelman   if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
281*eb9c0419SKris Buschelman 
282*eb9c0419SKris Buschelman   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
283*eb9c0419SKris Buschelman 
284*eb9c0419SKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
285*eb9c0419SKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
286*eb9c0419SKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
287*eb9c0419SKris Buschelman 
288*eb9c0419SKris Buschelman   apj      = (int *)(apa + cn);
289*eb9c0419SKris Buschelman   apjdense = apj + cn;
290*eb9c0419SKris Buschelman 
291*eb9c0419SKris Buschelman   /* Clear old values in C */
292*eb9c0419SKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
293*eb9c0419SKris Buschelman 
294*eb9c0419SKris Buschelman   for (i=0;i<am;i++) {
295*eb9c0419SKris Buschelman     /* Form sparse row of A*P */
296*eb9c0419SKris Buschelman     anzi  = ai[i+1] - ai[i];
297*eb9c0419SKris Buschelman     apnzj = 0;
298*eb9c0419SKris Buschelman     for (j=0;j<anzi;j++) {
299*eb9c0419SKris Buschelman       prow = *aj++;
300*eb9c0419SKris Buschelman       pnzj = pi[prow+1] - pi[prow];
301*eb9c0419SKris Buschelman       pjj  = pj + pi[prow];
302*eb9c0419SKris Buschelman       paj  = pa + pi[prow];
303*eb9c0419SKris Buschelman       for (k=0;k<pnzj;k++) {
304*eb9c0419SKris Buschelman         if (!apjdense[pjj[k]]) {
305*eb9c0419SKris Buschelman           apjdense[pjj[k]] = -1;
306*eb9c0419SKris Buschelman           apj[apnzj++]     = pjj[k];
307*eb9c0419SKris Buschelman         }
308*eb9c0419SKris Buschelman         apa[pjj[k]] += (*aa)*paj[k];
309*eb9c0419SKris Buschelman       }
310*eb9c0419SKris Buschelman       flops += 2*pnzj;
311*eb9c0419SKris Buschelman       aa++;
312*eb9c0419SKris Buschelman     }
313*eb9c0419SKris Buschelman 
314*eb9c0419SKris Buschelman     /* Sort the j index array for quick sparse axpy. */
315*eb9c0419SKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
316*eb9c0419SKris Buschelman 
317*eb9c0419SKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
318*eb9c0419SKris Buschelman     pnzi = pi[i+1] - pi[i];
319*eb9c0419SKris Buschelman     for (j=0;j<pnzi;j++) {
320*eb9c0419SKris Buschelman       nextap = 0;
321*eb9c0419SKris Buschelman       crow   = *pJ++;
322*eb9c0419SKris Buschelman       cjj    = cj + ci[crow];
323*eb9c0419SKris Buschelman       caj    = ca + ci[crow];
324*eb9c0419SKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
325*eb9c0419SKris Buschelman       for (k=0;nextap<apnzj;k++) {
326*eb9c0419SKris Buschelman         if (cjj[k]==apj[nextap]) {
327*eb9c0419SKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
328*eb9c0419SKris Buschelman         }
329*eb9c0419SKris Buschelman       }
330*eb9c0419SKris Buschelman       flops += 2*apnzj;
331*eb9c0419SKris Buschelman       pA++;
332*eb9c0419SKris Buschelman     }
333*eb9c0419SKris Buschelman 
334*eb9c0419SKris Buschelman     /* Zero the current row info for A*P */
335*eb9c0419SKris Buschelman     for (j=0;j<apnzj;j++) {
336*eb9c0419SKris Buschelman       apa[apj[j]]      = 0.;
337*eb9c0419SKris Buschelman       apjdense[apj[j]] = 0;
338*eb9c0419SKris Buschelman     }
339*eb9c0419SKris Buschelman   }
340*eb9c0419SKris Buschelman 
341*eb9c0419SKris Buschelman   /* Assemble the final matrix and clean up */
342*eb9c0419SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343*eb9c0419SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344*eb9c0419SKris Buschelman   ierr = PetscFree(apa);CHKERRQ(ierr);
345*eb9c0419SKris Buschelman   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
346*eb9c0419SKris Buschelman   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
347*eb9c0419SKris Buschelman 
348*eb9c0419SKris Buschelman   PetscFunctionReturn(0);
349*eb9c0419SKris Buschelman }
350*eb9c0419SKris Buschelman EXTERN_C_END
351*eb9c0419SKris Buschelman 
352*eb9c0419SKris Buschelman #undef __FUNCT__
353*eb9c0419SKris Buschelman #define __FUNCT__ "RegisterApplyPtAPRoutines_Private"
354*eb9c0419SKris Buschelman int RegisterApplyPtAPRoutines_Private(Mat A) {
355*eb9c0419SKris Buschelman   int ierr;
356*eb9c0419SKris Buschelman 
357*eb9c0419SKris Buschelman   PetscFunctionBegin;
358*eb9c0419SKris Buschelman 
359*eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAP) {
360*eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr);
361*eb9c0419SKris Buschelman   }
362*eb9c0419SKris Buschelman 
363*eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAPSymbolic) {
364*eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr);
365*eb9c0419SKris Buschelman   }
366*eb9c0419SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
367*eb9c0419SKris Buschelman                                            "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
368*eb9c0419SKris Buschelman                                            MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
369*eb9c0419SKris Buschelman 
370*eb9c0419SKris Buschelman   if (!MATSeqAIJ_PtAPNumeric) {
371*eb9c0419SKris Buschelman     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr);
372*eb9c0419SKris Buschelman   }
373*eb9c0419SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
374*eb9c0419SKris Buschelman                                            "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
375*eb9c0419SKris Buschelman                                            MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
376*eb9c0419SKris Buschelman   PetscFunctionReturn(0);
377*eb9c0419SKris Buschelman }
378