xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 2216b3a47cf7e6d0c49a68cbce08e6f75aec1b68)
1d50806bdSBarry Smith /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
2d50806bdSBarry Smith /*
3d50806bdSBarry Smith   Defines a matrix-matrix product for 2 SeqAIJ matrices
4d50806bdSBarry Smith           C = A * B
5d50806bdSBarry Smith */
6d50806bdSBarry Smith 
7d50806bdSBarry Smith #include "src/mat/impls/aij/seq/aij.h"
8d50806bdSBarry Smith 
9*2216b3a4SKris Buschelman static int logkey_matmatmult            = 0;
10*2216b3a4SKris Buschelman static int logkey_matmatmult_symbolic   = 0;
11*2216b3a4SKris Buschelman static int logkey_matmatmult_numeric    = 0;
12*2216b3a4SKris Buschelman 
13*2216b3a4SKris Buschelman static int logkey_matapplyptap          = 0;
14*2216b3a4SKris Buschelman static int logkey_matapplyptap_symbolic = 0;
15*2216b3a4SKris Buschelman static int logkey_matapplyptap_numeric  = 0;
16*2216b3a4SKris Buschelman 
17d50806bdSBarry Smith typedef struct _Space *FreeSpaceList;
18d50806bdSBarry Smith typedef struct _Space {
19d50806bdSBarry Smith   FreeSpaceList more_space;
20d50806bdSBarry Smith   int           *array;
21d50806bdSBarry Smith   int           *array_head;
22d50806bdSBarry Smith   int           total_array_size;
23d50806bdSBarry Smith   int           local_used;
24d50806bdSBarry Smith   int           local_remaining;
25d50806bdSBarry Smith } FreeSpace;
26d50806bdSBarry Smith 
27d50806bdSBarry Smith #undef __FUNCT__
28d50806bdSBarry Smith #define __FUNCT__ "GetMoreSpace"
29*2216b3a4SKris Buschelman int GetMoreSpace(int size,FreeSpaceList *list) {
30d50806bdSBarry Smith   FreeSpaceList a;
31d50806bdSBarry Smith   int ierr;
32d50806bdSBarry Smith 
33d50806bdSBarry Smith   PetscFunctionBegin;
34d50806bdSBarry Smith   ierr = PetscMalloc(sizeof(FreeSpace),&a);CHKERRQ(ierr);
35d50806bdSBarry Smith   ierr = PetscMalloc(size*sizeof(int),&(a->array_head));CHKERRQ(ierr);
36d50806bdSBarry Smith   a->array            = a->array_head;
37d50806bdSBarry Smith   a->local_remaining  = size;
38d50806bdSBarry Smith   a->local_used       = 0;
39d50806bdSBarry Smith   a->total_array_size = 0;
40d50806bdSBarry Smith   a->more_space       = NULL;
41d50806bdSBarry Smith 
42d50806bdSBarry Smith   if (*list) {
43d50806bdSBarry Smith     (*list)->more_space = a;
44d50806bdSBarry Smith     a->total_array_size = (*list)->total_array_size;
45d50806bdSBarry Smith   }
46d50806bdSBarry Smith 
47d50806bdSBarry Smith   a->total_array_size += size;
48d50806bdSBarry Smith   *list               =  a;
49d50806bdSBarry Smith   PetscFunctionReturn(0);
50d50806bdSBarry Smith }
51d50806bdSBarry Smith 
52d50806bdSBarry Smith #undef __FUNCT__
53d50806bdSBarry Smith #define __FUNCT__ "MakeSpaceContiguous"
54d50806bdSBarry Smith int MakeSpaceContiguous(int *space,FreeSpaceList *head) {
55d50806bdSBarry Smith   FreeSpaceList a;
56d50806bdSBarry Smith   int           ierr;
57d50806bdSBarry Smith 
58d50806bdSBarry Smith   PetscFunctionBegin;
59d50806bdSBarry Smith   while ((*head)!=NULL) {
60d50806bdSBarry Smith     a     =  (*head)->more_space;
61d50806bdSBarry Smith     ierr  =  PetscMemcpy(space,(*head)->array_head,((*head)->local_used)*sizeof(int));CHKERRQ(ierr);
62d50806bdSBarry Smith     space += (*head)->local_used;
63d50806bdSBarry Smith     ierr  =  PetscFree((*head)->array_head);CHKERRQ(ierr);
64d50806bdSBarry Smith     ierr  =  PetscFree(*head);CHKERRQ(ierr);
65d50806bdSBarry Smith     *head =  a;
66d50806bdSBarry Smith   }
67d50806bdSBarry Smith   PetscFunctionReturn(0);
68d50806bdSBarry Smith }
69d50806bdSBarry Smith 
70d50806bdSBarry Smith /*
71d50806bdSBarry Smith      MatMatMult_SeqAIJ_SeqAIJ_Symbolic - Forms the symbolic product of two SeqAIJ matrices
72d50806bdSBarry Smith            C=A*B;
73d50806bdSBarry Smith 
74d50806bdSBarry Smith      Note: C is assumed to be uninitialized.
75d50806bdSBarry Smith            If this is not the case, Destroy C before calling this routine.
76d50806bdSBarry Smith */
77d50806bdSBarry Smith #undef __FUNCT__
78d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ_Symbolic"
79d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ_Symbolic(Mat A,Mat B,Mat *C)
80d50806bdSBarry Smith {
81d50806bdSBarry Smith   int            ierr;
82d50806bdSBarry Smith   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
83d50806bdSBarry Smith   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
84d50806bdSBarry Smith   int            aishift=a->indexshift,bishift=b->indexshift;
85d50806bdSBarry Smith   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
86d50806bdSBarry Smith   int            *ci,*cj,*densefill,*sparsefill;
87d50806bdSBarry Smith   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
88d50806bdSBarry Smith   int            i,j,k,anzi,brow,bnzj,cnzi;
89d50806bdSBarry Smith   MatScalar      *ca;
90d50806bdSBarry Smith 
91d50806bdSBarry Smith   PetscFunctionBegin;
92d50806bdSBarry Smith   /* some error checking which could be moved into interface layer */
93d50806bdSBarry Smith   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
94d50806bdSBarry Smith   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
95d50806bdSBarry Smith 
96d50806bdSBarry Smith   if (!logkey_matmatmult_symbolic) {
97d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
98d50806bdSBarry Smith   }
99d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
100d50806bdSBarry Smith 
101d50806bdSBarry Smith   /* Set up */
102d50806bdSBarry Smith   /* Allocate ci array, arrays for fill computation and */
103d50806bdSBarry Smith   /* free space for accumulating nonzero column info */
104d50806bdSBarry Smith   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
105d50806bdSBarry Smith   ci[0] = 0;
106d50806bdSBarry Smith 
107d50806bdSBarry Smith   ierr = PetscMalloc((2*bn+1)*sizeof(int),&densefill);CHKERRQ(ierr);
108d50806bdSBarry Smith   ierr = PetscMemzero(densefill,(2*bn+1)*sizeof(int));CHKERRQ(ierr);
109d50806bdSBarry Smith   sparsefill = densefill + bn;
110d50806bdSBarry Smith 
111d50806bdSBarry Smith   /* Initial FreeSpace size is nnz(B)=bi[bm] */
112d50806bdSBarry Smith   ierr          = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr);
113d50806bdSBarry Smith   current_space = free_space;
114d50806bdSBarry Smith 
115d50806bdSBarry Smith   /* Determine fill for each row: */
116d50806bdSBarry Smith   for (i=0;i<am;i++) {
117d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
118d50806bdSBarry Smith     cnzi = 0;
119d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
120d50806bdSBarry Smith       brow = *aj++;
121d50806bdSBarry Smith       bnzj = bi[brow+1] - bi[brow];
122d50806bdSBarry Smith       bjj  = bj + bi[brow];
123d50806bdSBarry Smith       for (k=0;k<bnzj;k++) {
124d50806bdSBarry Smith         /* If column is not marked, mark it in compressed and uncompressed locations. */
125d50806bdSBarry Smith         /* For simplicity, leave uncompressed row unsorted until finished with row, */
126d50806bdSBarry Smith         /* and increment nonzero count for this row. */
127d50806bdSBarry Smith         if (!densefill[bjj[k]]) {
128d50806bdSBarry Smith           densefill[bjj[k]]  = -1;
129d50806bdSBarry Smith           sparsefill[cnzi++] = bjj[k];
130d50806bdSBarry Smith         }
131d50806bdSBarry Smith       }
132d50806bdSBarry Smith     }
133d50806bdSBarry Smith 
134d50806bdSBarry Smith     /* sort sparsefill */
135d50806bdSBarry Smith     ierr = PetscSortInt(cnzi,sparsefill);CHKERRQ(ierr);
136d50806bdSBarry Smith 
137d50806bdSBarry Smith     /* If free space is not available, make more free space */
138d50806bdSBarry Smith     /* Double the amount of total space in the list */
139d50806bdSBarry Smith     if (current_space->local_remaining<cnzi) {
140d50806bdSBarry Smith       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
141d50806bdSBarry Smith     }
142d50806bdSBarry Smith 
143d50806bdSBarry Smith     /* Copy data into free space, and zero out densefill */
144d50806bdSBarry Smith     ierr = PetscMemcpy(current_space->array,sparsefill,cnzi*sizeof(int));CHKERRQ(ierr);
145d50806bdSBarry Smith     current_space->array           += cnzi;
146d50806bdSBarry Smith     current_space->local_used      += cnzi;
147d50806bdSBarry Smith     current_space->local_remaining -= cnzi;
148d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
149d50806bdSBarry Smith       densefill[sparsefill[j]] = 0;
150d50806bdSBarry Smith     }
151d50806bdSBarry Smith     ci[i+1] = ci[i] + cnzi;
152d50806bdSBarry Smith   }
153d50806bdSBarry Smith 
154d50806bdSBarry Smith   /* nnz is now stored in ci[am], column indices are in the list of free space */
155d50806bdSBarry Smith   /* Allocate space for cj, initialize cj, and */
156d50806bdSBarry Smith   /* destroy list of free space and other temporary array(s) */
157d50806bdSBarry Smith   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
158d50806bdSBarry Smith   ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr);
159d50806bdSBarry Smith   ierr = PetscFree(densefill);CHKERRQ(ierr);
160d50806bdSBarry Smith 
161d50806bdSBarry Smith   /* Allocate space for ca */
162d50806bdSBarry Smith   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
163d50806bdSBarry Smith   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
164d50806bdSBarry Smith 
165d50806bdSBarry Smith   /* put together the new matrix */
166d50806bdSBarry Smith   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
167d50806bdSBarry Smith 
168d50806bdSBarry Smith   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
169d50806bdSBarry Smith   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
170d50806bdSBarry Smith   c = (Mat_SeqAIJ *)((*C)->data);
171d50806bdSBarry Smith   c->freedata = PETSC_TRUE;
172d50806bdSBarry Smith   c->nonew    = 0;
173d50806bdSBarry Smith 
174d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
175d50806bdSBarry Smith   PetscFunctionReturn(0);
176d50806bdSBarry Smith }
177d50806bdSBarry Smith 
178d50806bdSBarry Smith /*
179d50806bdSBarry Smith      MatMatMult_SeqAIJ_SeqAIJ_Numeric - Forms the numeric product of two SeqAIJ matrices
180d50806bdSBarry Smith            C=A*B;
181d50806bdSBarry Smith      Note: C must have been created by calling MatMatMult_SeqAIJ_SeqAIJ_Symbolic.
182d50806bdSBarry Smith */
183d50806bdSBarry Smith #undef __FUNCT__
184d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ_Numeric"
185d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ_Numeric(Mat A,Mat B,Mat C)
186d50806bdSBarry Smith {
187d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
188d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
189d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
190d50806bdSBarry Smith   int        aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift;
191d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
192d50806bdSBarry Smith   int        an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
193d50806bdSBarry Smith   int        ierr,i,j,k,anzi,bnzi,cnzi,brow,flops;
194d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
195d50806bdSBarry Smith 
196d50806bdSBarry Smith   PetscFunctionBegin;
197d50806bdSBarry Smith 
198d50806bdSBarry Smith   /* This error checking should be unnecessary if the symbolic was performed */
199d50806bdSBarry Smith   if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
200d50806bdSBarry Smith   if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
201d50806bdSBarry Smith   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
202d50806bdSBarry Smith   if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);
203d50806bdSBarry Smith 
204d50806bdSBarry Smith   if (!logkey_matmatmult_numeric) {
205d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
206d50806bdSBarry Smith   }
207d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
208d50806bdSBarry Smith   flops = 0;
209d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
210d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
211d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
212d50806bdSBarry Smith   /* Traverse A row-wise. */
213d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
214d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
215d50806bdSBarry Smith   for (i=0;i<am;i++) {
216d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
217d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
218d50806bdSBarry Smith       brow = *aj++;
219d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
220d50806bdSBarry Smith       bjj  = bj + bi[brow];
221d50806bdSBarry Smith       baj  = ba + bi[brow];
222d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
223d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
224d50806bdSBarry Smith       }
225d50806bdSBarry Smith       flops += 2*bnzi;
226d50806bdSBarry Smith       aa++;
227d50806bdSBarry Smith     }
228d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
229d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
230d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
231d50806bdSBarry Smith       ca[j] = temp[cj[j]];
232d50806bdSBarry Smith       temp[cj[j]] = 0.0;
233d50806bdSBarry Smith     }
234d50806bdSBarry Smith     ca += cnzi;
235d50806bdSBarry Smith     cj += cnzi;
236d50806bdSBarry Smith   }
237716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
238716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239716bacf3SKris Buschelman 
240d50806bdSBarry Smith   /* Free temp */
241d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
242d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
243d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
244d50806bdSBarry Smith   PetscFunctionReturn(0);
245d50806bdSBarry Smith }
246d50806bdSBarry Smith 
247d50806bdSBarry Smith #undef __FUNCT__
248d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
249d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {
250d50806bdSBarry Smith   int ierr;
251d50806bdSBarry Smith 
252d50806bdSBarry Smith   PetscFunctionBegin;
253*2216b3a4SKris Buschelman   if (!logkey_matmatmult) {
254*2216b3a4SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
255*2216b3a4SKris Buschelman   }
256*2216b3a4SKris Buschelman   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
257*2216b3a4SKris Buschelman 
258d50806bdSBarry Smith   ierr = MatMatMult_SeqAIJ_SeqAIJ_Symbolic(A,B,C);CHKERRQ(ierr);
259d50806bdSBarry Smith   ierr = MatMatMult_SeqAIJ_SeqAIJ_Numeric(A,B,*C);CHKERRQ(ierr);
260*2216b3a4SKris Buschelman   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
261d50806bdSBarry Smith   PetscFunctionReturn(0);
262d50806bdSBarry Smith }
263d50806bdSBarry Smith 
264d50806bdSBarry Smith #undef __FUNCT__
265d50806bdSBarry Smith #define __FUNCT__ "MatApplyPtAP_SeqAIJ_Symbolic"
266d50806bdSBarry Smith int MatApplyPtAP_SeqAIJ_Symbolic(Mat A,Mat P,Mat *C) {
267d50806bdSBarry Smith   int ierr;
268d50806bdSBarry Smith   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
269d50806bdSBarry Smith   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
270d50806bdSBarry Smith   int            aishift=a->indexshift,pishift=p->indexshift;
271d50806bdSBarry Smith   int            *pti,*ptj,*ptfill,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
272d50806bdSBarry Smith   int            *ci,*cj,*densefill,*sparsefill,*ptadensefill,*ptasparsefill,*ptaj;
273d50806bdSBarry Smith   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
274d50806bdSBarry Smith   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
275d50806bdSBarry Smith   MatScalar      *ca;
276d50806bdSBarry Smith 
277d50806bdSBarry Smith   PetscFunctionBegin;
278d50806bdSBarry Smith 
279d50806bdSBarry Smith   /* some error checking which could be moved into interface layer */
280d50806bdSBarry Smith   if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
281d50806bdSBarry Smith   if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an);
282d50806bdSBarry Smith   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
283d50806bdSBarry Smith 
284d50806bdSBarry Smith   if (!logkey_matapplyptap_symbolic) {
285d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matapplyptap_symbolic,"MatApplyPtAP_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
286d50806bdSBarry Smith   }
287d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr);
288d50806bdSBarry Smith 
289d50806bdSBarry Smith   /* Create ij structure of P^T */
290d50806bdSBarry Smith   /* Recall in P^T there are pn rows and pi[pm] nonzeros. */
291d50806bdSBarry Smith   ierr = PetscMalloc((pn+1+pi[pm])*sizeof(int),&pti);CHKERRQ(ierr);
292d50806bdSBarry Smith   ierr = PetscMemzero(pti,(pn+1+pi[pm])*sizeof(int));CHKERRQ(ierr);
293d50806bdSBarry Smith   ptj = pti + pn+1;
294d50806bdSBarry Smith 
295d50806bdSBarry Smith   /* Walk through pj and count ## of non-zeros in each row of P^T. */
2967d6d1ca6SKris Buschelman   for (i=0;i<pi[pm];i++) {
297d50806bdSBarry Smith     pti[pj[i]+1] += 1;
298d50806bdSBarry Smith   }
299d50806bdSBarry Smith   /* Form pti for csr format of P^T. */
3007d6d1ca6SKris Buschelman   for (i=0;i<pn;i++) {
301d50806bdSBarry Smith     pti[i+1] += pti[i];
302d50806bdSBarry Smith   }
303d50806bdSBarry Smith 
304d50806bdSBarry Smith   /* Allocate temporary space for next insert location in each row of P^T. */
305d50806bdSBarry Smith   ierr = PetscMalloc(pn*sizeof(int),&ptfill);CHKERRQ(ierr);
306d50806bdSBarry Smith   ierr = PetscMemcpy(ptfill,pti,pn*sizeof(int));CHKERRQ(ierr);
307d50806bdSBarry Smith 
308d50806bdSBarry Smith   /* Walk through P row-wise and mark nonzero entries of P^T. */
309d50806bdSBarry Smith   for (i=0;i<pm;i++) {
310d50806bdSBarry Smith     pnzj = pi[i+1] - pi[i];
311d50806bdSBarry Smith     for (j=0;j<pnzj;j++) {
3127d6d1ca6SKris Buschelman       ptj[ptfill[*pj]] =  i;
3137d6d1ca6SKris Buschelman       ptfill[*pj++]      += 1;
314d50806bdSBarry Smith     }
315d50806bdSBarry Smith   }
3167d6d1ca6SKris Buschelman   pj = p->j;
317d50806bdSBarry Smith 
318d50806bdSBarry Smith   /* Clean-up temporary space. */
319d50806bdSBarry Smith   ierr = PetscFree(ptfill);CHKERRQ(ierr);
320d50806bdSBarry Smith 
321d50806bdSBarry Smith   /* Allocate ci array, arrays for fill computation and */
322d50806bdSBarry Smith   /* free space for accumulating nonzero column info */
323d50806bdSBarry Smith   ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr);
324d50806bdSBarry Smith   ci[0] = 0;
325d50806bdSBarry Smith 
326d50806bdSBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadensefill);CHKERRQ(ierr);
327d50806bdSBarry Smith   ierr = PetscMemzero(ptadensefill,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
328d50806bdSBarry Smith   ptasparsefill = ptadensefill  + an;
329d50806bdSBarry Smith   densefill     = ptasparsefill + an;
330d50806bdSBarry Smith   sparsefill    = densefill     + pn;
331d50806bdSBarry Smith 
332d50806bdSBarry Smith   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
333d50806bdSBarry Smith   /* Reason: Take pn/pm = 1/2. */
334d50806bdSBarry Smith   /*         P^T*A*P will take A(NxN) and create C(N/2xN/2). */
335d50806bdSBarry Smith   /*         If C has same sparsity pattern as A, nnz(C)~1/2*nnz(A). */
336d50806bdSBarry Smith   /*         Is this reasonable???? */
337716bacf3SKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
338d50806bdSBarry Smith   current_space = free_space;
339d50806bdSBarry Smith 
340d50806bdSBarry Smith   /* Determine fill for each row of C: */
341d50806bdSBarry Smith   for (i=0;i<pn;i++) {
342d50806bdSBarry Smith     ptnzi  = pti[i+1] - pti[i];
343d50806bdSBarry Smith     ptanzi = 0;
344d50806bdSBarry Smith     /* Determine fill for row of PtA: */
345d50806bdSBarry Smith     for (j=0;j<ptnzi;j++) {
346d50806bdSBarry Smith       arow = *ptj++;
347d50806bdSBarry Smith       anzj = ai[arow+1] - ai[arow];
348d50806bdSBarry Smith       ajj  = aj + ai[arow];
349d50806bdSBarry Smith       for (k=0;k<anzj;k++) {
350d50806bdSBarry Smith         if (!ptadensefill[ajj[k]]) {
351d50806bdSBarry Smith           ptadensefill[ajj[k]]    = -1;
352d50806bdSBarry Smith           ptasparsefill[ptanzi++] = ajj[k];
353d50806bdSBarry Smith         }
354d50806bdSBarry Smith       }
355d50806bdSBarry Smith     }
356d50806bdSBarry Smith     /* Using fill info for row of PtA, determine fill for row of C: */
357d50806bdSBarry Smith     ptaj = ptasparsefill;
358d50806bdSBarry Smith     cnzi   = 0;
359d50806bdSBarry Smith     for (j=0;j<ptanzi;j++) {
360d50806bdSBarry Smith       prow = *ptaj++;
361d50806bdSBarry Smith       pnzj = pi[prow+1] - pi[prow];
362d50806bdSBarry Smith       pjj  = pj + pi[prow];
363d50806bdSBarry Smith       for (k=0;k<pnzj;k++) {
364d50806bdSBarry Smith         if (!densefill[pjj[k]]) {
365d50806bdSBarry Smith           densefill[pjj[k]]  = -1;
366d50806bdSBarry Smith           sparsefill[cnzi++] = pjj[k];
367d50806bdSBarry Smith         }
368d50806bdSBarry Smith       }
369d50806bdSBarry Smith     }
370d50806bdSBarry Smith 
371d50806bdSBarry Smith     /* sort sparsefill */
372d50806bdSBarry Smith     ierr = PetscSortInt(cnzi,sparsefill);CHKERRQ(ierr);
373d50806bdSBarry Smith 
374d50806bdSBarry Smith     /* If free space is not available, make more free space */
375d50806bdSBarry Smith     /* Double the amount of total space in the list */
376d50806bdSBarry Smith     if (current_space->local_remaining<cnzi) {
377d50806bdSBarry Smith       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
378d50806bdSBarry Smith     }
379d50806bdSBarry Smith 
380d50806bdSBarry Smith     /* Copy data into free space, and zero out densefills */
381d50806bdSBarry Smith     ierr = PetscMemcpy(current_space->array,sparsefill,cnzi*sizeof(int));CHKERRQ(ierr);
382d50806bdSBarry Smith     current_space->array           += cnzi;
383d50806bdSBarry Smith     current_space->local_used      += cnzi;
384d50806bdSBarry Smith     current_space->local_remaining -= cnzi;
385d50806bdSBarry Smith 
386d50806bdSBarry Smith     for (j=0;j<ptanzi;j++) {
387d50806bdSBarry Smith       ptadensefill[ptasparsefill[j]] = 0;
388d50806bdSBarry Smith     }
389d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
390d50806bdSBarry Smith       densefill[sparsefill[j]] = 0;
391d50806bdSBarry Smith     }
392d50806bdSBarry Smith     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
393d50806bdSBarry Smith     /*        For now, we will recompute what is needed. */
394d50806bdSBarry Smith     ci[i+1] = ci[i] + cnzi;
395d50806bdSBarry Smith   }
396d50806bdSBarry Smith   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
397d50806bdSBarry Smith   /* Allocate space for cj, initialize cj, and */
398d50806bdSBarry Smith   /* destroy list of free space and other temporary array(s) */
399d50806bdSBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
400d50806bdSBarry Smith   ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr);
401d50806bdSBarry Smith   ierr = PetscFree(ptadensefill);CHKERRQ(ierr);
402d50806bdSBarry Smith 
403d50806bdSBarry Smith   /* Allocate space for ca */
404d50806bdSBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
405d50806bdSBarry Smith   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
406d50806bdSBarry Smith 
407d50806bdSBarry Smith   /* put together the new matrix */
408d50806bdSBarry Smith   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
409d50806bdSBarry Smith 
410d50806bdSBarry Smith   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
411d50806bdSBarry Smith   /* Since these are PETSc arrays, change flags to free them as necessary. */
412d50806bdSBarry Smith   c = (Mat_SeqAIJ *)((*C)->data);
413d50806bdSBarry Smith   c->freedata = PETSC_TRUE;
414d50806bdSBarry Smith   c->nonew    = 0;
415d50806bdSBarry Smith 
416d50806bdSBarry Smith   /* Clean up. */
417d50806bdSBarry Smith   /* Perhaps we should attach the (i,j) info for P^T to P for future use. */
418d50806bdSBarry Smith   /* For now, we won't. */
419d50806bdSBarry Smith   ierr = PetscFree(pti);
420d50806bdSBarry Smith 
421d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr);
422d50806bdSBarry Smith   PetscFunctionReturn(0);
423d50806bdSBarry Smith }
424d50806bdSBarry Smith 
425d50806bdSBarry Smith #undef __FUNCT__
426d50806bdSBarry Smith #define __FUNCT__ "MatApplyPtAP_SeqAIJ_Numeric"
427d50806bdSBarry Smith int MatApplyPtAP_SeqAIJ_Numeric(Mat A,Mat P,Mat C) {
428d50806bdSBarry Smith   int ierr,flops;
429d50806bdSBarry Smith   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
430d50806bdSBarry Smith   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
431d50806bdSBarry Smith   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
432d50806bdSBarry Smith   int        aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
433716bacf3SKris Buschelman   int        *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
434716bacf3SKris Buschelman   int        *ci=c->i,*cj=c->j,*cjj;
435d50806bdSBarry Smith   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
436d50806bdSBarry Smith   int        i,j,k,anzi,pnzi,apnzj,pnzj,cnzj,prow,crow;
437d50806bdSBarry Smith   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
438d50806bdSBarry Smith 
439d50806bdSBarry Smith   PetscFunctionBegin;
440d50806bdSBarry Smith 
441d50806bdSBarry Smith   /* This error checking should be unnecessary if the symbolic was performed */
442d50806bdSBarry Smith   if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
443d50806bdSBarry Smith   if (pn!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,cm);
444d50806bdSBarry Smith   if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an);
445d50806bdSBarry Smith   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
446d50806bdSBarry Smith   if (pn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn, cn);
447d50806bdSBarry Smith 
448d50806bdSBarry Smith   if (!logkey_matapplyptap_numeric) {
449d50806bdSBarry Smith     ierr = PetscLogEventRegister(&logkey_matapplyptap_numeric,"MatApplyPtAP_Numeric",MAT_COOKIE);CHKERRQ(ierr);
450d50806bdSBarry Smith   }
451d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr);
452d50806bdSBarry Smith   flops = 0;
453d50806bdSBarry Smith 
454716bacf3SKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
455716bacf3SKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
456d50806bdSBarry Smith   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
457d50806bdSBarry Smith 
458716bacf3SKris Buschelman   apj      = (int *)(apa + cn);
459716bacf3SKris Buschelman   apjdense = apj + cn;
460716bacf3SKris Buschelman 
461d50806bdSBarry Smith   for (i=0;i<am;i++) {
462d50806bdSBarry Smith     /* Form sparse row of A*P */
463d50806bdSBarry Smith     anzi  = ai[i+1] - ai[i];
464d50806bdSBarry Smith     apnzj = 0;
465d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
466d50806bdSBarry Smith       prow = *aj++;
467d50806bdSBarry Smith       pnzj = pi[prow+1] - pi[prow];
468d50806bdSBarry Smith       pjj  = pj + pi[prow];
469d50806bdSBarry Smith       paj  = pa + pi[prow];
470d50806bdSBarry Smith       for (k=0;k<pnzj;k++) {
471716bacf3SKris Buschelman         if (!apjdense[pjj[k]]) {
472716bacf3SKris Buschelman           apjdense[pjj[k]] = -1;
473d50806bdSBarry Smith           apj[apnzj++]     = pjj[k];
474d50806bdSBarry Smith         }
475d50806bdSBarry Smith         apa[pjj[k]] += (*aa)*paj[k];
476d50806bdSBarry Smith       }
477d50806bdSBarry Smith       flops += 2*pnzj;
478d50806bdSBarry Smith       aa++;
479d50806bdSBarry Smith     }
480d50806bdSBarry Smith 
481d50806bdSBarry Smith     /* Sort the j index array for quick sparse axpy. */
482d50806bdSBarry Smith     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
483d50806bdSBarry Smith 
484d50806bdSBarry Smith     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
485d50806bdSBarry Smith     pnzi = pi[i+1] - pi[i];
486d50806bdSBarry Smith     for (j=0;j<pnzi;j++) {
487d50806bdSBarry Smith       int nextap=0;
488d50806bdSBarry Smith       crow = *pJ++;
489d50806bdSBarry Smith       cnzj = ci[crow+1] - ci[crow];
490d50806bdSBarry Smith       cjj  = cj + ci[crow];
491d50806bdSBarry Smith       caj  = ca + ci[crow];
492d50806bdSBarry Smith       /* Perform the sparse axpy operation.  Note cjj includes apj. */
493716bacf3SKris Buschelman       for (k=0;nextap<apnzj;k++) {
494d50806bdSBarry Smith         if (cjj[k]==apj[nextap]) {
495d50806bdSBarry Smith           caj[k] += (*pA)*apa[apj[nextap++]];
496d50806bdSBarry Smith         }
497d50806bdSBarry Smith       }
498d50806bdSBarry Smith       flops += 2*apnzj;
499d50806bdSBarry Smith       pA++;
500d50806bdSBarry Smith     }
501d50806bdSBarry Smith 
502716bacf3SKris Buschelman     /* Zero the current row info for A*P */
503d50806bdSBarry Smith     for (j=0;j<apnzj;j++) {
504d50806bdSBarry Smith       apa[apj[j]]      = 0.;
505716bacf3SKris Buschelman       apjdense[apj[j]] = 0;
506d50806bdSBarry Smith     }
507d50806bdSBarry Smith   }
508*2216b3a4SKris Buschelman 
509*2216b3a4SKris Buschelman   /* Assemble the final matrix and clean up */
510*2216b3a4SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
511*2216b3a4SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
512d50806bdSBarry Smith   ierr = PetscFree(apa);CHKERRQ(ierr);
513d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
514d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr);
515*2216b3a4SKris Buschelman 
516d50806bdSBarry Smith   PetscFunctionReturn(0);
517d50806bdSBarry Smith }
518d50806bdSBarry Smith 
519d50806bdSBarry Smith #undef __FUNCT__
520d50806bdSBarry Smith #define __FUNCT__ "MatApplyPtAP_SeqAIJ"
521d50806bdSBarry Smith int MatApplyPtAP_SeqAIJ(Mat A,Mat P,Mat *C) {
522d50806bdSBarry Smith   int ierr;
523d50806bdSBarry Smith 
524d50806bdSBarry Smith   PetscFunctionBegin;
525716bacf3SKris Buschelman   if (!logkey_matapplyptap) {
526716bacf3SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matapplyptap,"MatApplyPtAP",MAT_COOKIE);CHKERRQ(ierr);
527716bacf3SKris Buschelman   }
528*2216b3a4SKris Buschelman   ierr = PetscLogEventBegin(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr);
529d50806bdSBarry Smith   ierr = MatApplyPtAP_SeqAIJ_Symbolic(A,P,C);CHKERRQ(ierr);
530d50806bdSBarry Smith   ierr = MatApplyPtAP_SeqAIJ_Numeric(A,P,*C);CHKERRQ(ierr);
531*2216b3a4SKris Buschelman   ierr = PetscLogEventEnd(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr);
532d50806bdSBarry Smith   PetscFunctionReturn(0);
533d50806bdSBarry Smith }
534