xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 7d6d1ca649488d223aebf2e1d06d846fbae1f093)
1d50806bdSBarry Smith /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
2d50806bdSBarry Smith /*
3d50806bdSBarry Smith   Defines a matrix-matrix product for 2 SeqAIJ matrices
4d50806bdSBarry Smith           C = A * B
5d50806bdSBarry Smith */
6d50806bdSBarry Smith 
7d50806bdSBarry Smith #include "src/mat/impls/aij/seq/aij.h"
8d50806bdSBarry Smith 
9d50806bdSBarry Smith typedef struct _Space *FreeSpaceList;
10d50806bdSBarry Smith typedef struct _Space {
11d50806bdSBarry Smith   FreeSpaceList more_space;
12d50806bdSBarry Smith   int           *array;
13d50806bdSBarry Smith   int           *array_head;
14d50806bdSBarry Smith   int           total_array_size;
15d50806bdSBarry Smith   int           local_used;
16d50806bdSBarry Smith   int           local_remaining;
17d50806bdSBarry Smith } FreeSpace;
18d50806bdSBarry Smith 
19d50806bdSBarry Smith #undef __FUNCT__
20d50806bdSBarry Smith #define __FUNCT__ "GetMoreSpace"
21d50806bdSBarry Smith int GetMoreSpace(int size,FreeSpaceList *list) {
22d50806bdSBarry Smith   FreeSpaceList a;
23d50806bdSBarry Smith   int ierr;
24d50806bdSBarry Smith 
25d50806bdSBarry Smith   PetscFunctionBegin;
26d50806bdSBarry Smith   ierr = PetscMalloc(sizeof(FreeSpace),&a);CHKERRQ(ierr);
27d50806bdSBarry Smith   ierr = PetscMalloc(size*sizeof(int),&(a->array_head));CHKERRQ(ierr);
28d50806bdSBarry Smith   a->array            = a->array_head;
29d50806bdSBarry Smith   a->local_remaining  = size;
30d50806bdSBarry Smith   a->local_used       = 0;
31d50806bdSBarry Smith   a->total_array_size = 0;
32d50806bdSBarry Smith   a->more_space       = NULL;
33d50806bdSBarry Smith 
34d50806bdSBarry Smith   if (*list) {
35d50806bdSBarry Smith     (*list)->more_space = a;
36d50806bdSBarry Smith     a->total_array_size = (*list)->total_array_size;
37d50806bdSBarry Smith   }
38d50806bdSBarry Smith 
39d50806bdSBarry Smith   a->total_array_size += size;
40d50806bdSBarry Smith   *list               =  a;
41d50806bdSBarry Smith   PetscFunctionReturn(0);
42d50806bdSBarry Smith }
43d50806bdSBarry Smith 
44d50806bdSBarry Smith #undef __FUNCT__
45d50806bdSBarry Smith #define __FUNCT__ "MakeSpaceContiguous"
46d50806bdSBarry Smith int MakeSpaceContiguous(int *space,FreeSpaceList *head) {
47d50806bdSBarry Smith   FreeSpaceList a;
48d50806bdSBarry Smith   int           ierr;
49d50806bdSBarry Smith 
50d50806bdSBarry Smith   PetscFunctionBegin;
51d50806bdSBarry Smith   while ((*head)!=NULL) {
52d50806bdSBarry Smith     a     =  (*head)->more_space;
53d50806bdSBarry Smith     ierr  =  PetscMemcpy(space,(*head)->array_head,((*head)->local_used)*sizeof(int));CHKERRQ(ierr);
54d50806bdSBarry Smith     space += (*head)->local_used;
55d50806bdSBarry Smith     ierr  =  PetscFree((*head)->array_head);CHKERRQ(ierr);
56d50806bdSBarry Smith     ierr  =  PetscFree(*head);CHKERRQ(ierr);
57d50806bdSBarry Smith     *head =  a;
58d50806bdSBarry Smith   }
59d50806bdSBarry Smith   PetscFunctionReturn(0);
60d50806bdSBarry Smith }
61d50806bdSBarry Smith 
62d50806bdSBarry Smith static int logkey_matmatmult_symbolic = 0;
63d50806bdSBarry Smith static int logkey_matmatmult_numeric  = 0;
64d50806bdSBarry Smith 
65d50806bdSBarry Smith /*
66d50806bdSBarry Smith      MatMatMult_SeqAIJ_SeqAIJ_Symbolic - Forms the symbolic product of two SeqAIJ matrices
67d50806bdSBarry Smith            C=A*B;
68d50806bdSBarry Smith 
69d50806bdSBarry Smith      Note: C is assumed to be uninitialized.
70d50806bdSBarry Smith            If this is not the case, Destroy C before calling this routine.
71d50806bdSBarry Smith */
72d50806bdSBarry Smith #undef __FUNCT__
73d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ_Symbolic"
74d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ_Symbolic(Mat A,Mat B,Mat *C)
75d50806bdSBarry Smith {
76d50806bdSBarry Smith   int            ierr;
77d50806bdSBarry Smith   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
78d50806bdSBarry Smith   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
79d50806bdSBarry Smith   int            aishift=a->indexshift,bishift=b->indexshift;
80d50806bdSBarry Smith   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
81d50806bdSBarry Smith   int            *ci,*cj,*densefill,*sparsefill;
82d50806bdSBarry Smith   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
83d50806bdSBarry Smith   int            i,j,k,anzi,brow,bnzj,cnzi;
84d50806bdSBarry Smith   MatScalar      *ca;
85d50806bdSBarry Smith 
86d50806bdSBarry Smith   PetscFunctionBegin;
87d50806bdSBarry Smith   /* some error checking which could be moved into interface layer */
88d50806bdSBarry Smith   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
89d50806bdSBarry Smith   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
90d50806bdSBarry Smith 
91d50806bdSBarry Smith   if (!logkey_matmatmult_symbolic) {
92d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
93d50806bdSBarry Smith   }
94d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
95d50806bdSBarry Smith 
96d50806bdSBarry Smith   /* Set up */
97d50806bdSBarry Smith   /* Allocate ci array, arrays for fill computation and */
98d50806bdSBarry Smith   /* free space for accumulating nonzero column info */
99d50806bdSBarry Smith   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
100d50806bdSBarry Smith   ci[0] = 0;
101d50806bdSBarry Smith 
102d50806bdSBarry Smith   ierr = PetscMalloc((2*bn+1)*sizeof(int),&densefill);CHKERRQ(ierr);
103d50806bdSBarry Smith   ierr = PetscMemzero(densefill,(2*bn+1)*sizeof(int));CHKERRQ(ierr);
104d50806bdSBarry Smith   sparsefill = densefill + bn;
105d50806bdSBarry Smith 
106d50806bdSBarry Smith   /* Initial FreeSpace size is nnz(B)=bi[bm] */
107d50806bdSBarry Smith   ierr          = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr);
108d50806bdSBarry Smith   current_space = free_space;
109d50806bdSBarry Smith 
110d50806bdSBarry Smith   /* Determine fill for each row: */
111d50806bdSBarry Smith   for (i=0;i<am;i++) {
112d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
113d50806bdSBarry Smith     cnzi = 0;
114d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
115d50806bdSBarry Smith       brow = *aj++;
116d50806bdSBarry Smith       bnzj = bi[brow+1] - bi[brow];
117d50806bdSBarry Smith       bjj  = bj + bi[brow];
118d50806bdSBarry Smith       for (k=0;k<bnzj;k++) {
119d50806bdSBarry Smith         /* If column is not marked, mark it in compressed and uncompressed locations. */
120d50806bdSBarry Smith         /* For simplicity, leave uncompressed row unsorted until finished with row, */
121d50806bdSBarry Smith         /* and increment nonzero count for this row. */
122d50806bdSBarry Smith         if (!densefill[bjj[k]]) {
123d50806bdSBarry Smith           densefill[bjj[k]]  = -1;
124d50806bdSBarry Smith           sparsefill[cnzi++] = bjj[k];
125d50806bdSBarry Smith         }
126d50806bdSBarry Smith       }
127d50806bdSBarry Smith     }
128d50806bdSBarry Smith 
129d50806bdSBarry Smith     /* sort sparsefill */
130d50806bdSBarry Smith     ierr = PetscSortInt(cnzi,sparsefill);CHKERRQ(ierr);
131d50806bdSBarry Smith 
132d50806bdSBarry Smith     /* If free space is not available, make more free space */
133d50806bdSBarry Smith     /* Double the amount of total space in the list */
134d50806bdSBarry Smith     if (current_space->local_remaining<cnzi) {
135d50806bdSBarry Smith       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
136d50806bdSBarry Smith     }
137d50806bdSBarry Smith 
138d50806bdSBarry Smith     /* Copy data into free space, and zero out densefill */
139d50806bdSBarry Smith     ierr = PetscMemcpy(current_space->array,sparsefill,cnzi*sizeof(int));CHKERRQ(ierr);
140d50806bdSBarry Smith     current_space->array           += cnzi;
141d50806bdSBarry Smith     current_space->local_used      += cnzi;
142d50806bdSBarry Smith     current_space->local_remaining -= cnzi;
143d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
144d50806bdSBarry Smith       densefill[sparsefill[j]] = 0;
145d50806bdSBarry Smith     }
146d50806bdSBarry Smith     ci[i+1] = ci[i] + cnzi;
147d50806bdSBarry Smith   }
148d50806bdSBarry Smith 
149d50806bdSBarry Smith   /* nnz is now stored in ci[am], column indices are in the list of free space */
150d50806bdSBarry Smith   /* Allocate space for cj, initialize cj, and */
151d50806bdSBarry Smith   /* destroy list of free space and other temporary array(s) */
152d50806bdSBarry Smith   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
153d50806bdSBarry Smith   ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr);
154d50806bdSBarry Smith   ierr = PetscFree(densefill);CHKERRQ(ierr);
155d50806bdSBarry Smith 
156d50806bdSBarry Smith   /* Allocate space for ca */
157d50806bdSBarry Smith   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
158d50806bdSBarry Smith   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
159d50806bdSBarry Smith 
160d50806bdSBarry Smith   /* put together the new matrix */
161d50806bdSBarry Smith   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
162d50806bdSBarry Smith 
163d50806bdSBarry Smith   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
164d50806bdSBarry Smith   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
165d50806bdSBarry Smith   c = (Mat_SeqAIJ *)((*C)->data);
166d50806bdSBarry Smith   c->freedata = PETSC_TRUE;
167d50806bdSBarry Smith   c->nonew    = 0;
168d50806bdSBarry Smith 
169d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
170d50806bdSBarry Smith   PetscFunctionReturn(0);
171d50806bdSBarry Smith }
172d50806bdSBarry Smith 
173d50806bdSBarry Smith /*
174d50806bdSBarry Smith      MatMatMult_SeqAIJ_SeqAIJ_Numeric - Forms the numeric product of two SeqAIJ matrices
175d50806bdSBarry Smith            C=A*B;
176d50806bdSBarry Smith      Note: C must have been created by calling MatMatMult_SeqAIJ_SeqAIJ_Symbolic.
177d50806bdSBarry Smith */
178d50806bdSBarry Smith #undef __FUNCT__
179d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ_Numeric"
180d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ_Numeric(Mat A,Mat B,Mat C)
181d50806bdSBarry Smith {
182d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
183d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
184d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
185d50806bdSBarry Smith   int        aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift;
186d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
187d50806bdSBarry Smith   int        an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
188d50806bdSBarry Smith   int        ierr,i,j,k,anzi,bnzi,cnzi,brow,flops;
189d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
190d50806bdSBarry Smith 
191d50806bdSBarry Smith   PetscFunctionBegin;
192d50806bdSBarry Smith 
193d50806bdSBarry Smith   /* This error checking should be unnecessary if the symbolic was performed */
194d50806bdSBarry Smith   if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
195d50806bdSBarry Smith   if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
196d50806bdSBarry Smith   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
197d50806bdSBarry Smith   if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);
198d50806bdSBarry Smith 
199d50806bdSBarry Smith   if (!logkey_matmatmult_numeric) {
200d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
201d50806bdSBarry Smith   }
202d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
203d50806bdSBarry Smith   flops = 0;
204d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
205d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
206d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
207d50806bdSBarry Smith   /* Traverse A row-wise. */
208d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
209d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
210d50806bdSBarry Smith   for (i=0;i<am;i++) {
211d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
212d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
213d50806bdSBarry Smith       brow = *aj++;
214d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
215d50806bdSBarry Smith       bjj  = bj + bi[brow];
216d50806bdSBarry Smith       baj  = ba + bi[brow];
217d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
218d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
219d50806bdSBarry Smith       }
220d50806bdSBarry Smith       flops += 2*bnzi;
221d50806bdSBarry Smith       aa++;
222d50806bdSBarry Smith     }
223d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
224d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
225d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
226d50806bdSBarry Smith       ca[j] = temp[cj[j]];
227d50806bdSBarry Smith       temp[cj[j]] = 0.0;
228d50806bdSBarry Smith     }
229d50806bdSBarry Smith     ca += cnzi;
230d50806bdSBarry Smith     cj += cnzi;
231d50806bdSBarry Smith   }
232d50806bdSBarry Smith   /* Free temp */
233d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
234d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
235d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
236d50806bdSBarry Smith   PetscFunctionReturn(0);
237d50806bdSBarry Smith }
238d50806bdSBarry Smith 
239d50806bdSBarry Smith #undef __FUNCT__
240d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
241d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {
242d50806bdSBarry Smith   int ierr;
243d50806bdSBarry Smith 
244d50806bdSBarry Smith   PetscFunctionBegin;
245d50806bdSBarry Smith   ierr = MatMatMult_SeqAIJ_SeqAIJ_Symbolic(A,B,C);CHKERRQ(ierr);
246d50806bdSBarry Smith   ierr = MatMatMult_SeqAIJ_SeqAIJ_Numeric(A,B,*C);CHKERRQ(ierr);
247d50806bdSBarry Smith   PetscFunctionReturn(0);
248d50806bdSBarry Smith }
249d50806bdSBarry Smith 
250d50806bdSBarry Smith static int logkey_matapplyptap_symbolic = 0;
251d50806bdSBarry Smith static int logkey_matapplyptap_numeric  = 0;
252d50806bdSBarry Smith 
253d50806bdSBarry Smith #undef __FUNCT__
254d50806bdSBarry Smith #define __FUNCT__ "MatApplyPtAP_SeqAIJ_Symbolic"
255d50806bdSBarry Smith int MatApplyPtAP_SeqAIJ_Symbolic(Mat A,Mat P,Mat *C) {
256d50806bdSBarry Smith   int ierr;
257d50806bdSBarry Smith   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
258d50806bdSBarry Smith   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
259d50806bdSBarry Smith   int            aishift=a->indexshift,pishift=p->indexshift;
260d50806bdSBarry Smith   int            *pti,*ptj,*ptfill,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
261d50806bdSBarry Smith   int            *ci,*cj,*densefill,*sparsefill,*ptadensefill,*ptasparsefill,*ptaj;
262d50806bdSBarry Smith   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
263d50806bdSBarry Smith   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
264d50806bdSBarry Smith   MatScalar      *ca;
265d50806bdSBarry Smith 
266d50806bdSBarry Smith   PetscFunctionBegin;
267d50806bdSBarry Smith 
268d50806bdSBarry Smith   /* some error checking which could be moved into interface layer */
269d50806bdSBarry Smith   if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
270d50806bdSBarry Smith   if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an);
271d50806bdSBarry Smith   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
272d50806bdSBarry Smith 
273d50806bdSBarry Smith   if (!logkey_matapplyptap_symbolic) {
274d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matapplyptap_symbolic,"MatApplyPtAP_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
275d50806bdSBarry Smith   }
276d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr);
277d50806bdSBarry Smith 
278d50806bdSBarry Smith   /* Create ij structure of P^T */
279d50806bdSBarry Smith   /* Recall in P^T there are pn rows and pi[pm] nonzeros. */
280d50806bdSBarry Smith   ierr = PetscMalloc((pn+1+pi[pm])*sizeof(int),&pti);CHKERRQ(ierr);
281d50806bdSBarry Smith   ierr = PetscMemzero(pti,(pn+1+pi[pm])*sizeof(int));CHKERRQ(ierr);
282d50806bdSBarry Smith   ptj = pti + pn+1;
283d50806bdSBarry Smith 
284d50806bdSBarry Smith   /* Walk through pj and count ## of non-zeros in each row of P^T. */
285*7d6d1ca6SKris Buschelman   for (i=0;i<pi[pm];i++) {
286d50806bdSBarry Smith     pti[pj[i]+1] += 1;
287d50806bdSBarry Smith   }
288d50806bdSBarry Smith   /* Form pti for csr format of P^T. */
289*7d6d1ca6SKris Buschelman   for (i=0;i<pn;i++) {
290d50806bdSBarry Smith     pti[i+1] += pti[i];
291d50806bdSBarry Smith   }
292d50806bdSBarry Smith 
293d50806bdSBarry Smith   /* Allocate temporary space for next insert location in each row of P^T. */
294d50806bdSBarry Smith   ierr = PetscMalloc(pn*sizeof(int),&ptfill);CHKERRQ(ierr);
295d50806bdSBarry Smith   ierr = PetscMemcpy(ptfill,pti,pn*sizeof(int));CHKERRQ(ierr);
296d50806bdSBarry Smith 
297d50806bdSBarry Smith   /* Walk through P row-wise and mark nonzero entries of P^T. */
298d50806bdSBarry Smith   for (i=0;i<pm;i++) {
299d50806bdSBarry Smith     pnzj = pi[i+1] - pi[i];
300d50806bdSBarry Smith     for (j=0;j<pnzj;j++) {
301*7d6d1ca6SKris Buschelman       ptj[ptfill[*pj]] =  i;
302*7d6d1ca6SKris Buschelman       ptfill[*pj++]      += 1;
303d50806bdSBarry Smith     }
304d50806bdSBarry Smith   }
305*7d6d1ca6SKris Buschelman   pj = p->j;
306d50806bdSBarry Smith 
307d50806bdSBarry Smith   /* Clean-up temporary space. */
308d50806bdSBarry Smith   ierr = PetscFree(ptfill);CHKERRQ(ierr);
309d50806bdSBarry Smith 
310d50806bdSBarry Smith   /* Allocate ci array, arrays for fill computation and */
311d50806bdSBarry Smith   /* free space for accumulating nonzero column info */
312d50806bdSBarry Smith   ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr);
313d50806bdSBarry Smith   ci[0] = 0;
314d50806bdSBarry Smith 
315d50806bdSBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadensefill);CHKERRQ(ierr);
316d50806bdSBarry Smith   ierr = PetscMemzero(ptadensefill,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
317d50806bdSBarry Smith   ptasparsefill = ptadensefill  + an;
318d50806bdSBarry Smith   densefill     = ptasparsefill + an;
319d50806bdSBarry Smith   sparsefill    = densefill     + pn;
320d50806bdSBarry Smith 
321d50806bdSBarry Smith   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
322d50806bdSBarry Smith   /* Reason: Take pn/pm = 1/2. */
323d50806bdSBarry Smith   /*         P^T*A*P will take A(NxN) and create C(N/2xN/2). */
324d50806bdSBarry Smith   /*         If C has same sparsity pattern as A, nnz(C)~1/2*nnz(A). */
325d50806bdSBarry Smith   /*         Is this reasonable???? */
326d50806bdSBarry Smith   ierr          = GetMoreSpace((ai[am]*pn)/pm,&free_space);
327d50806bdSBarry Smith   current_space = free_space;
328d50806bdSBarry Smith 
329d50806bdSBarry Smith   /* Determine fill for each row of C: */
330d50806bdSBarry Smith   for (i=0;i<pn;i++) {
331d50806bdSBarry Smith     ptnzi  = pti[i+1] - pti[i];
332d50806bdSBarry Smith     ptanzi = 0;
333d50806bdSBarry Smith     /* Determine fill for row of PtA: */
334d50806bdSBarry Smith     for (j=0;j<ptnzi;j++) {
335d50806bdSBarry Smith       arow = *ptj++;
336d50806bdSBarry Smith       anzj = ai[arow+1] - ai[arow];
337d50806bdSBarry Smith       ajj  = aj + ai[arow];
338d50806bdSBarry Smith       for (k=0;k<anzj;k++) {
339d50806bdSBarry Smith         if (!ptadensefill[ajj[k]]) {
340d50806bdSBarry Smith           ptadensefill[ajj[k]]    = -1;
341d50806bdSBarry Smith           ptasparsefill[ptanzi++] = ajj[k];
342d50806bdSBarry Smith         }
343d50806bdSBarry Smith       }
344d50806bdSBarry Smith     }
345d50806bdSBarry Smith     /* Using fill info for row of PtA, determine fill for row of C: */
346d50806bdSBarry Smith     ptaj = ptasparsefill;
347d50806bdSBarry Smith     cnzi   = 0;
348d50806bdSBarry Smith     for (j=0;j<ptanzi;j++) {
349d50806bdSBarry Smith       prow = *ptaj++;
350d50806bdSBarry Smith       pnzj = pi[prow+1] - pi[prow];
351d50806bdSBarry Smith       pjj  = pj + pi[prow];
352d50806bdSBarry Smith       for (k=0;k<pnzj;k++) {
353d50806bdSBarry Smith         if (!densefill[pjj[k]]) {
354d50806bdSBarry Smith           densefill[pjj[k]]  = -1;
355d50806bdSBarry Smith           sparsefill[cnzi++] = pjj[k];
356d50806bdSBarry Smith         }
357d50806bdSBarry Smith       }
358d50806bdSBarry Smith     }
359d50806bdSBarry Smith 
360d50806bdSBarry Smith     /* sort sparsefill */
361d50806bdSBarry Smith     ierr = PetscSortInt(cnzi,sparsefill);CHKERRQ(ierr);
362d50806bdSBarry Smith 
363d50806bdSBarry Smith     /* If free space is not available, make more free space */
364d50806bdSBarry Smith     /* Double the amount of total space in the list */
365d50806bdSBarry Smith     if (current_space->local_remaining<cnzi) {
366d50806bdSBarry Smith       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
367d50806bdSBarry Smith     }
368d50806bdSBarry Smith 
369d50806bdSBarry Smith     /* Copy data into free space, and zero out densefills */
370d50806bdSBarry Smith     ierr = PetscMemcpy(current_space->array,sparsefill,cnzi*sizeof(int));CHKERRQ(ierr);
371d50806bdSBarry Smith     current_space->array           += cnzi;
372d50806bdSBarry Smith     current_space->local_used      += cnzi;
373d50806bdSBarry Smith     current_space->local_remaining -= cnzi;
374d50806bdSBarry Smith 
375d50806bdSBarry Smith     for (j=0;j<ptanzi;j++) {
376d50806bdSBarry Smith       ptadensefill[ptasparsefill[j]] = 0;
377d50806bdSBarry Smith     }
378d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
379d50806bdSBarry Smith       densefill[sparsefill[j]] = 0;
380d50806bdSBarry Smith     }
381d50806bdSBarry Smith     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
382d50806bdSBarry Smith     /*        For now, we will recompute what is needed. */
383d50806bdSBarry Smith     ci[i+1] = ci[i] + cnzi;
384d50806bdSBarry Smith   }
385d50806bdSBarry Smith   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
386d50806bdSBarry Smith   /* Allocate space for cj, initialize cj, and */
387d50806bdSBarry Smith   /* destroy list of free space and other temporary array(s) */
388d50806bdSBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
389d50806bdSBarry Smith   ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr);
390d50806bdSBarry Smith   ierr = PetscFree(ptadensefill);CHKERRQ(ierr);
391d50806bdSBarry Smith 
392d50806bdSBarry Smith   /* Allocate space for ca */
393d50806bdSBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
394d50806bdSBarry Smith   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
395d50806bdSBarry Smith 
396d50806bdSBarry Smith   /* put together the new matrix */
397d50806bdSBarry Smith   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
398d50806bdSBarry Smith 
399d50806bdSBarry Smith   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
400d50806bdSBarry Smith   /* Since these are PETSc arrays, change flags to free them as necessary. */
401d50806bdSBarry Smith   c = (Mat_SeqAIJ *)((*C)->data);
402d50806bdSBarry Smith   c->freedata = PETSC_TRUE;
403d50806bdSBarry Smith   c->nonew    = 0;
404d50806bdSBarry Smith 
405d50806bdSBarry Smith   /* Clean up. */
406d50806bdSBarry Smith   /* Perhaps we should attach the (i,j) info for P^T to P for future use. */
407d50806bdSBarry Smith   /* For now, we won't. */
408d50806bdSBarry Smith   ierr = PetscFree(pti);
409d50806bdSBarry Smith 
410d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr);
411d50806bdSBarry Smith   PetscFunctionReturn(0);
412d50806bdSBarry Smith }
413d50806bdSBarry Smith 
414d50806bdSBarry Smith #undef __FUNCT__
415d50806bdSBarry Smith #define __FUNCT__ "MatApplyPtAP_SeqAIJ_Numeric"
416d50806bdSBarry Smith int MatApplyPtAP_SeqAIJ_Numeric(Mat A,Mat P,Mat C) {
417d50806bdSBarry Smith   int ierr,flops;
418d50806bdSBarry Smith   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
419d50806bdSBarry Smith   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
420d50806bdSBarry Smith   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
421d50806bdSBarry Smith   int        aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
422d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*apj,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj,*ci=c->i,*cj=c->j,*cjj;
423d50806bdSBarry Smith   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
424d50806bdSBarry Smith   int        i,j,k,anzi,pnzi,apnzj,pnzj,cnzj,prow,crow;
425d50806bdSBarry Smith   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
426d50806bdSBarry Smith 
427d50806bdSBarry Smith   PetscFunctionBegin;
428d50806bdSBarry Smith 
429d50806bdSBarry Smith   /* This error checking should be unnecessary if the symbolic was performed */
430d50806bdSBarry Smith   if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
431d50806bdSBarry Smith   if (pn!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,cm);
432d50806bdSBarry Smith   if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an);
433d50806bdSBarry Smith   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
434d50806bdSBarry Smith   if (pn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn, cn);
435d50806bdSBarry Smith 
436d50806bdSBarry Smith   if (!logkey_matapplyptap_numeric) {
437d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matapplyptap_numeric,"MatApplyPtAP_Numeric",MAT_COOKIE);CHKERRQ(ierr);
438d50806bdSBarry Smith   }
439d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr);
440d50806bdSBarry Smith   flops = 0;
441d50806bdSBarry Smith 
442d50806bdSBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+sizeof(int)),&apa);CHKERRQ(ierr);
443d50806bdSBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+sizeof(int)));CHKERRQ(ierr);
444d50806bdSBarry Smith   apj = (int *)(apa + cn);
445d50806bdSBarry Smith   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
446d50806bdSBarry Smith 
447d50806bdSBarry Smith   for (i=0;i<am;i++) {
448d50806bdSBarry Smith CHKMEMQ;
449d50806bdSBarry Smith     /* Form sparse row of A*P */
450d50806bdSBarry Smith     anzi  = ai[i+1] - ai[i];
451d50806bdSBarry Smith     apnzj = 0;
452d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
453d50806bdSBarry Smith       prow = *aj++;
454d50806bdSBarry Smith       pnzj = pi[prow+1] - pi[prow];
455d50806bdSBarry Smith       pjj  = pj + pi[prow];
456d50806bdSBarry Smith       paj  = pa + pi[prow];
457d50806bdSBarry Smith       for (k=0;k<pnzj;k++) {
458d50806bdSBarry Smith         if (!apa[pjj[k]]) {
459d50806bdSBarry Smith CHKMEMQ;
460d50806bdSBarry Smith           apj[apnzj++]=pjj[k];
461d50806bdSBarry Smith CHKMEMQ;
462d50806bdSBarry Smith         }
463d50806bdSBarry Smith CHKMEMQ;
464d50806bdSBarry Smith         apa[pjj[k]] += (*aa)*paj[k];
465d50806bdSBarry Smith CHKMEMQ;
466d50806bdSBarry Smith       }
467d50806bdSBarry Smith       flops += 2*pnzj;
468d50806bdSBarry Smith       aa++;
469d50806bdSBarry Smith     }
470d50806bdSBarry Smith 
471d50806bdSBarry Smith     /* Sort the j index array for quick sparse axpy. */
472d50806bdSBarry Smith     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
473d50806bdSBarry Smith 
474d50806bdSBarry Smith     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
475d50806bdSBarry Smith     pnzi = pi[i+1] - pi[i];
476d50806bdSBarry Smith     for (j=0;j<pnzi;j++) {
477d50806bdSBarry Smith       int nextap=0;
478d50806bdSBarry Smith       crow = *pJ++;
479d50806bdSBarry Smith       cnzj = ci[crow+1] - ci[crow];
480d50806bdSBarry Smith       cjj  = cj + ci[crow];
481d50806bdSBarry Smith       caj  = ca + ci[crow];
482d50806bdSBarry Smith       /* Perform the sparse axpy operation.  Note cjj includes apj. */
483d50806bdSBarry Smith       for (k=0;k<cnzj;k++) {
484d50806bdSBarry Smith         if (cjj[k]==apj[nextap]) {
485d50806bdSBarry Smith           caj[k] += (*pA)*apa[apj[nextap++]];
486d50806bdSBarry Smith         }
487d50806bdSBarry Smith       }
488d50806bdSBarry Smith       flops += 2*apnzj;
489d50806bdSBarry Smith       pA++;
490d50806bdSBarry Smith     }
491d50806bdSBarry Smith 
492d50806bdSBarry Smith     for (j=0;j<apnzj;j++) {
493d50806bdSBarry Smith CHKMEMQ;
494d50806bdSBarry Smith       apa[apj[j]] = 0.;
495d50806bdSBarry Smith CHKMEMQ;
496d50806bdSBarry Smith     }
497d50806bdSBarry Smith   }
498d50806bdSBarry Smith   ierr = PetscFree(apa);CHKERRQ(ierr);
499d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
500d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr);
501d50806bdSBarry Smith   PetscFunctionReturn(0);
502d50806bdSBarry Smith }
503d50806bdSBarry Smith 
504d50806bdSBarry Smith #undef __FUNCT__
505d50806bdSBarry Smith #define __FUNCT__ "MatApplyPtAP_SeqAIJ"
506d50806bdSBarry Smith int MatApplyPtAP_SeqAIJ(Mat A,Mat P,Mat *C) {
507d50806bdSBarry Smith   int ierr;
508d50806bdSBarry Smith 
509d50806bdSBarry Smith   PetscFunctionBegin;
510d50806bdSBarry Smith   ierr = MatApplyPtAP_SeqAIJ_Symbolic(A,P,C);CHKERRQ(ierr);
511d50806bdSBarry Smith   ierr = MatApplyPtAP_SeqAIJ_Numeric(A,P,*C);CHKERRQ(ierr);
512d50806bdSBarry Smith   PetscFunctionReturn(0);
513d50806bdSBarry Smith }
514