xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 7a3c3d58f0dc30013810dc66cc68344148a68c77)
1be1d678aSKris Buschelman 
2d50806bdSBarry Smith /*
32499ec78SHong Zhang   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4d50806bdSBarry Smith           C = A * B
5d50806bdSBarry Smith */
6d50806bdSBarry Smith 
7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
9c6db04a5SJed Brown #include <petscbt.h>
10af0996ceSBarry Smith #include <petsc/private/isimpl.h>
1107475bc1SBarry Smith #include <../src/mat/impls/dense/seq/dense.h>
127bab7c10SHong Zhang 
13df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
14df97dc6dSFande Kong {
15df97dc6dSFande Kong   PetscErrorCode ierr;
16df97dc6dSFande Kong 
17df97dc6dSFande Kong   PetscFunctionBegin;
18df97dc6dSFande Kong   if (C->ops->matmultnumeric) {
19df97dc6dSFande Kong     ierr = (*C->ops->matmultnumeric)(A,B,C);CHKERRQ(ierr);
20df97dc6dSFande Kong   } else {
21df97dc6dSFande Kong     ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);CHKERRQ(ierr);
22df97dc6dSFande Kong   }
23df97dc6dSFande Kong   PetscFunctionReturn(0);
24df97dc6dSFande Kong }
25df97dc6dSFande Kong 
264222ddf1SHong Zhang /* Modified from MatCreateSeqAIJWithArrays() */
27e4e71118SStefano Zampini PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat)
28df97dc6dSFande Kong {
29df97dc6dSFande Kong   PetscErrorCode ierr;
304222ddf1SHong Zhang   PetscInt       ii;
314222ddf1SHong Zhang   Mat_SeqAIJ     *aij;
32e4e71118SStefano Zampini   PetscBool      isseqaij;
335c66b693SKris Buschelman 
345c66b693SKris Buschelman   PetscFunctionBegin;
354222ddf1SHong Zhang   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
364222ddf1SHong Zhang   ierr = MatSetSizes(mat,m,n,m,n);CHKERRQ(ierr);
374222ddf1SHong Zhang 
38e4e71118SStefano Zampini   if (!mtype) {
39e4e71118SStefano Zampini     ierr = PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
40e4e71118SStefano Zampini     if (!isseqaij) { ierr = MatSetType(mat,MATSEQAIJ);CHKERRQ(ierr); }
41e4e71118SStefano Zampini   } else {
42e4e71118SStefano Zampini     ierr = MatSetType(mat,mtype);CHKERRQ(ierr);
43e4e71118SStefano Zampini   }
444222ddf1SHong Zhang   ierr = MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
454222ddf1SHong Zhang   aij  = (Mat_SeqAIJ*)(mat)->data;
464222ddf1SHong Zhang   ierr = PetscMalloc1(m,&aij->imax);CHKERRQ(ierr);
474222ddf1SHong Zhang   ierr = PetscMalloc1(m,&aij->ilen);CHKERRQ(ierr);
484222ddf1SHong Zhang 
494222ddf1SHong Zhang   aij->i            = i;
504222ddf1SHong Zhang   aij->j            = j;
514222ddf1SHong Zhang   aij->a            = a;
524222ddf1SHong Zhang   aij->singlemalloc = PETSC_FALSE;
534222ddf1SHong Zhang   aij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
544222ddf1SHong Zhang   aij->free_a       = PETSC_FALSE;
554222ddf1SHong Zhang   aij->free_ij      = PETSC_FALSE;
564222ddf1SHong Zhang 
574222ddf1SHong Zhang   for (ii=0; ii<m; ii++) {
584222ddf1SHong Zhang     aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii];
5925023028SHong Zhang   }
604222ddf1SHong Zhang 
615c66b693SKris Buschelman   PetscFunctionReturn(0);
625c66b693SKris Buschelman }
631c24bd37SHong Zhang 
644222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
654222ddf1SHong Zhang {
664222ddf1SHong Zhang   PetscErrorCode      ierr;
674222ddf1SHong Zhang   Mat_Product         *product = C->product;
684222ddf1SHong Zhang   MatProductAlgorithm alg;
694222ddf1SHong Zhang   PetscBool           flg;
704222ddf1SHong Zhang 
714222ddf1SHong Zhang   PetscFunctionBegin;
724222ddf1SHong Zhang   if (product) {
734222ddf1SHong Zhang     alg = product->alg;
744222ddf1SHong Zhang   } else {
754222ddf1SHong Zhang     alg = "sorted";
764222ddf1SHong Zhang   }
774222ddf1SHong Zhang   /* sorted */
784222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"sorted",&flg);CHKERRQ(ierr);
794222ddf1SHong Zhang   if (flg) {
804222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);CHKERRQ(ierr);
814222ddf1SHong Zhang     PetscFunctionReturn(0);
824222ddf1SHong Zhang   }
834222ddf1SHong Zhang 
844222ddf1SHong Zhang   /* scalable */
854222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"scalable",&flg);CHKERRQ(ierr);
864222ddf1SHong Zhang   if (flg) {
874222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr);
884222ddf1SHong Zhang     PetscFunctionReturn(0);
894222ddf1SHong Zhang   }
904222ddf1SHong Zhang 
914222ddf1SHong Zhang   /* scalable_fast */
924222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"scalable_fast",&flg);CHKERRQ(ierr);
934222ddf1SHong Zhang   if (flg) {
944222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr);
954222ddf1SHong Zhang     PetscFunctionReturn(0);
964222ddf1SHong Zhang   }
974222ddf1SHong Zhang 
984222ddf1SHong Zhang   /* heap */
994222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"heap",&flg);CHKERRQ(ierr);
1004222ddf1SHong Zhang   if (flg) {
1014222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr);
1024222ddf1SHong Zhang     PetscFunctionReturn(0);
1034222ddf1SHong Zhang   }
1044222ddf1SHong Zhang 
1054222ddf1SHong Zhang   /* btheap */
1064222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"btheap",&flg);CHKERRQ(ierr);
1074222ddf1SHong Zhang   if (flg) {
1084222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr);
1094222ddf1SHong Zhang     PetscFunctionReturn(0);
1104222ddf1SHong Zhang   }
1114222ddf1SHong Zhang 
1124222ddf1SHong Zhang   /* llcondensed */
1134222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"llcondensed",&flg);CHKERRQ(ierr);
1144222ddf1SHong Zhang   if (flg) {
1154222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);CHKERRQ(ierr);
1164222ddf1SHong Zhang     PetscFunctionReturn(0);
1174222ddf1SHong Zhang   }
1184222ddf1SHong Zhang 
1194222ddf1SHong Zhang   /* rowmerge */
1204222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"rowmerge",&flg);CHKERRQ(ierr);
1214222ddf1SHong Zhang   if (flg) {
1224222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);CHKERRQ(ierr);
1234222ddf1SHong Zhang     PetscFunctionReturn(0);
1244222ddf1SHong Zhang   }
1254222ddf1SHong Zhang 
1264222ddf1SHong Zhang #if defined(PETSC_HAVE_HYPRE)
1274222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"hypre",&flg);CHKERRQ(ierr);
1284222ddf1SHong Zhang   if (flg) {
1294222ddf1SHong Zhang     ierr = MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);CHKERRQ(ierr);
1304222ddf1SHong Zhang     PetscFunctionReturn(0);
1314222ddf1SHong Zhang   }
1324222ddf1SHong Zhang #endif
1334222ddf1SHong Zhang 
1344222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1354222ddf1SHong Zhang   PetscFunctionReturn(0);
1364222ddf1SHong Zhang }
1374222ddf1SHong Zhang 
1384222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C)
139b561aa9dSHong Zhang {
140b561aa9dSHong Zhang   PetscErrorCode     ierr;
141b561aa9dSHong Zhang   Mat_SeqAIJ         *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
142971236abSHong Zhang   PetscInt           *ai=a->i,*bi=b->i,*ci,*cj;
143c1ba5575SJed Brown   PetscInt           am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
144b561aa9dSHong Zhang   PetscReal          afill;
145eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
146eca6b86aSHong Zhang   PetscTable         ta;
147fb908031SHong Zhang   PetscBT            lnkbt;
1480298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
149b561aa9dSHong Zhang 
150b561aa9dSHong Zhang   PetscFunctionBegin;
151bd958071SHong Zhang   /* Get ci and cj */
152bd958071SHong Zhang   /*---------------*/
153fb908031SHong Zhang   /* Allocate ci array, arrays for fill computation and */
154fb908031SHong Zhang   /* free space for accumulating nonzero column info */
155854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
156fb908031SHong Zhang   ci[0] = 0;
157fb908031SHong Zhang 
158fb908031SHong Zhang   /* create and initialize a linked list */
159c373ccc6SHong Zhang   ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr);
160c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
161eca6b86aSHong Zhang   ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr);
162eca6b86aSHong Zhang   ierr = PetscTableDestroy(&ta);CHKERRQ(ierr);
163eca6b86aSHong Zhang 
164eca6b86aSHong Zhang   ierr = PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);CHKERRQ(ierr);
165fb908031SHong Zhang 
166fb908031SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
167f91af8c7SBarry Smith   ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
1682205254eSKarl Rupp 
169fb908031SHong Zhang   current_space = free_space;
170fb908031SHong Zhang 
171fb908031SHong Zhang   /* Determine ci and cj */
172fb908031SHong Zhang   for (i=0; i<am; i++) {
173fb908031SHong Zhang     anzi = ai[i+1] - ai[i];
174fb908031SHong Zhang     aj   = a->j + ai[i];
175fb908031SHong Zhang     for (j=0; j<anzi; j++) {
176fb908031SHong Zhang       brow = aj[j];
177fb908031SHong Zhang       bnzj = bi[brow+1] - bi[brow];
178fb908031SHong Zhang       bj   = b->j + bi[brow];
179fb908031SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
180fb908031SHong Zhang       ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr);
181fb908031SHong Zhang     }
182fb908031SHong Zhang     cnzi = lnk[0];
183fb908031SHong Zhang 
184fb908031SHong Zhang     /* If free space is not available, make more free space */
185fb908031SHong Zhang     /* Double the amount of total space in the list */
186fb908031SHong Zhang     if (current_space->local_remaining<cnzi) {
187f91af8c7SBarry Smith       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
188fb908031SHong Zhang       ndouble++;
189fb908031SHong Zhang     }
190fb908031SHong Zhang 
191fb908031SHong Zhang     /* Copy data into free space, then initialize lnk */
192fb908031SHong Zhang     ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr);
1932205254eSKarl Rupp 
194fb908031SHong Zhang     current_space->array           += cnzi;
195fb908031SHong Zhang     current_space->local_used      += cnzi;
196fb908031SHong Zhang     current_space->local_remaining -= cnzi;
1972205254eSKarl Rupp 
198fb908031SHong Zhang     ci[i+1] = ci[i] + cnzi;
199fb908031SHong Zhang   }
200fb908031SHong Zhang 
201fb908031SHong Zhang   /* Column indices are in the list of free space */
202fb908031SHong Zhang   /* Allocate space for cj, initialize cj, and */
203fb908031SHong Zhang   /* destroy list of free space and other temporary array(s) */
204854ce69bSBarry Smith   ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr);
205fb908031SHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
206fb908031SHong Zhang   ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr);
207b561aa9dSHong Zhang 
20826be0446SHong Zhang   /* put together the new symbolic matrix */
209e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr);
2104222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
21158c24d83SHong Zhang 
21258c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
21358c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
2144222ddf1SHong Zhang   c                         = (Mat_SeqAIJ*)(C->data);
215aa1aec99SHong Zhang   c->free_a                 = PETSC_FALSE;
216e6b907acSBarry Smith   c->free_ij                = PETSC_TRUE;
21758c24d83SHong Zhang   c->nonew                  = 0;
2184222ddf1SHong Zhang 
2194222ddf1SHong Zhang   /* fast, needs non-scalable O(bn) array 'abdense' */
2204222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
2210b7e3e3dSHong Zhang 
2227212da7cSHong Zhang   /* set MatInfo */
2237212da7cSHong Zhang   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
224f2b054eeSHong Zhang   if (afill < 1.0) afill = 1.0;
2257212da7cSHong Zhang   c->maxnz                     = ci[am];
2267212da7cSHong Zhang   c->nz                        = ci[am];
2274222ddf1SHong Zhang   C->info.mallocs           = ndouble;
2284222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
2294222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
2307212da7cSHong Zhang 
2317212da7cSHong Zhang #if defined(PETSC_USE_INFO)
2327212da7cSHong Zhang   if (ci[am]) {
2334222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
2344222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
235f2b054eeSHong Zhang   } else {
2364222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
237be0fcf8dSHong Zhang   }
238f2b054eeSHong Zhang #endif
23958c24d83SHong Zhang   PetscFunctionReturn(0);
24058c24d83SHong Zhang }
241d50806bdSBarry Smith 
242df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C)
243d50806bdSBarry Smith {
244dfbe8321SBarry Smith   PetscErrorCode ierr;
245d13dce4bSSatish Balay   PetscLogDouble flops=0.0;
246d50806bdSBarry Smith   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
247d50806bdSBarry Smith   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
248d50806bdSBarry Smith   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
24938baddfdSBarry Smith   PetscInt       *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
250c58ca2e3SHong Zhang   PetscInt       am   =A->rmap->n,cm=C->rmap->n;
251519eb980SHong Zhang   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
252aa1aec99SHong Zhang   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca,valtmp;
253aa1aec99SHong Zhang   PetscScalar    *ab_dense;
254d50806bdSBarry Smith 
255d50806bdSBarry Smith   PetscFunctionBegin;
256aa1aec99SHong Zhang   if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
257854ce69bSBarry Smith     ierr      = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
258aa1aec99SHong Zhang     c->a      = ca;
259aa1aec99SHong Zhang     c->free_a = PETSC_TRUE;
260aa1aec99SHong Zhang   } else {
261aa1aec99SHong Zhang     ca        = c->a;
262d90d86d0SMatthew G. Knepley   }
263d90d86d0SMatthew G. Knepley   if (!c->matmult_abdense) {
2641795a4d1SJed Brown     ierr = PetscCalloc1(B->cmap->N,&ab_dense);CHKERRQ(ierr);
265d90d86d0SMatthew G. Knepley     c->matmult_abdense = ab_dense;
266d90d86d0SMatthew G. Knepley   } else {
267aa1aec99SHong Zhang     ab_dense = c->matmult_abdense;
268aa1aec99SHong Zhang   }
269aa1aec99SHong Zhang 
270c124e916SHong Zhang   /* clean old values in C */
271580bdb30SBarry Smith   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
272d50806bdSBarry Smith   /* Traverse A row-wise. */
273d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
274d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
275d50806bdSBarry Smith   for (i=0; i<am; i++) {
276d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
277d50806bdSBarry Smith     for (j=0; j<anzi; j++) {
278519eb980SHong Zhang       brow = aj[j];
279d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
280d50806bdSBarry Smith       bjj  = bj + bi[brow];
281d50806bdSBarry Smith       baj  = ba + bi[brow];
282519eb980SHong Zhang       /* perform dense axpy */
28336ec6d2dSHong Zhang       valtmp = aa[j];
284519eb980SHong Zhang       for (k=0; k<bnzi; k++) {
28536ec6d2dSHong Zhang         ab_dense[bjj[k]] += valtmp*baj[k];
286519eb980SHong Zhang       }
287519eb980SHong Zhang       flops += 2*bnzi;
288519eb980SHong Zhang     }
289c58ca2e3SHong Zhang     aj += anzi; aa += anzi;
290c58ca2e3SHong Zhang 
291c58ca2e3SHong Zhang     cnzi = ci[i+1] - ci[i];
292519eb980SHong Zhang     for (k=0; k<cnzi; k++) {
293519eb980SHong Zhang       ca[k]          += ab_dense[cj[k]];
294519eb980SHong Zhang       ab_dense[cj[k]] = 0.0; /* zero ab_dense */
295519eb980SHong Zhang     }
296519eb980SHong Zhang     flops += cnzi;
297519eb980SHong Zhang     cj    += cnzi; ca += cnzi;
298519eb980SHong Zhang   }
299c58ca2e3SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
300c58ca2e3SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
301c58ca2e3SHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
302c58ca2e3SHong Zhang   PetscFunctionReturn(0);
303c58ca2e3SHong Zhang }
304c58ca2e3SHong Zhang 
30525023028SHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
306c58ca2e3SHong Zhang {
307c58ca2e3SHong Zhang   PetscErrorCode ierr;
308c58ca2e3SHong Zhang   PetscLogDouble flops=0.0;
309c58ca2e3SHong Zhang   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
310c58ca2e3SHong Zhang   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
311c58ca2e3SHong Zhang   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
312c58ca2e3SHong Zhang   PetscInt       *ai  = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
313c58ca2e3SHong Zhang   PetscInt       am   = A->rmap->N,cm=C->rmap->N;
314c58ca2e3SHong Zhang   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
31536ec6d2dSHong Zhang   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
316c58ca2e3SHong Zhang   PetscInt       nextb;
317c58ca2e3SHong Zhang 
318c58ca2e3SHong Zhang   PetscFunctionBegin;
319cf742fccSHong Zhang   if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
320cf742fccSHong Zhang     ierr      = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
321cf742fccSHong Zhang     c->a      = ca;
322cf742fccSHong Zhang     c->free_a = PETSC_TRUE;
323cf742fccSHong Zhang   }
324cf742fccSHong Zhang 
325c58ca2e3SHong Zhang   /* clean old values in C */
326580bdb30SBarry Smith   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
327c58ca2e3SHong Zhang   /* Traverse A row-wise. */
328c58ca2e3SHong Zhang   /* Build the ith row in C by summing over nonzero columns in A, */
329c58ca2e3SHong Zhang   /* the rows of B corresponding to nonzeros of A. */
330519eb980SHong Zhang   for (i=0; i<am; i++) {
331519eb980SHong Zhang     anzi = ai[i+1] - ai[i];
332519eb980SHong Zhang     cnzi = ci[i+1] - ci[i];
333519eb980SHong Zhang     for (j=0; j<anzi; j++) {
334519eb980SHong Zhang       brow = aj[j];
335519eb980SHong Zhang       bnzi = bi[brow+1] - bi[brow];
336519eb980SHong Zhang       bjj  = bj + bi[brow];
337519eb980SHong Zhang       baj  = ba + bi[brow];
338519eb980SHong Zhang       /* perform sparse axpy */
33936ec6d2dSHong Zhang       valtmp = aa[j];
340c124e916SHong Zhang       nextb  = 0;
341c124e916SHong Zhang       for (k=0; nextb<bnzi; k++) {
342c124e916SHong Zhang         if (cj[k] == bjj[nextb]) { /* ccol == bcol */
34336ec6d2dSHong Zhang           ca[k] += valtmp*baj[nextb++];
344c124e916SHong Zhang         }
345d50806bdSBarry Smith       }
346d50806bdSBarry Smith       flops += 2*bnzi;
347d50806bdSBarry Smith     }
348519eb980SHong Zhang     aj += anzi; aa += anzi;
349519eb980SHong Zhang     cj += cnzi; ca += cnzi;
350d50806bdSBarry Smith   }
351c58ca2e3SHong Zhang 
352716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
354d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
355d50806bdSBarry Smith   PetscFunctionReturn(0);
356d50806bdSBarry Smith }
357bc011b1eSHong Zhang 
3584222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C)
35925296bd5SBarry Smith {
36025296bd5SBarry Smith   PetscErrorCode     ierr;
36125296bd5SBarry Smith   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
36225296bd5SBarry Smith   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
3633c50cad2SHong Zhang   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
36425296bd5SBarry Smith   MatScalar          *ca;
36525296bd5SBarry Smith   PetscReal          afill;
366eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
367eca6b86aSHong Zhang   PetscTable         ta;
3680298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
36925296bd5SBarry Smith 
37025296bd5SBarry Smith   PetscFunctionBegin;
3713c50cad2SHong Zhang   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
3723c50cad2SHong Zhang   /*-----------------------------------------------------------------------------------------*/
3733c50cad2SHong Zhang   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
374854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
3753c50cad2SHong Zhang   ci[0] = 0;
3763c50cad2SHong Zhang 
3773c50cad2SHong Zhang   /* create and initialize a linked list */
378c373ccc6SHong Zhang   ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr);
379c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
380eca6b86aSHong Zhang   ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr);
381eca6b86aSHong Zhang   ierr = PetscTableDestroy(&ta);CHKERRQ(ierr);
382eca6b86aSHong Zhang 
383eca6b86aSHong Zhang   ierr = PetscLLCondensedCreate_fast(Crmax,&lnk);CHKERRQ(ierr);
3843c50cad2SHong Zhang 
3853c50cad2SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
386f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
3873c50cad2SHong Zhang   current_space = free_space;
3883c50cad2SHong Zhang 
3893c50cad2SHong Zhang   /* Determine ci and cj */
3903c50cad2SHong Zhang   for (i=0; i<am; i++) {
3913c50cad2SHong Zhang     anzi = ai[i+1] - ai[i];
3923c50cad2SHong Zhang     aj   = a->j + ai[i];
3933c50cad2SHong Zhang     for (j=0; j<anzi; j++) {
3943c50cad2SHong Zhang       brow = aj[j];
3953c50cad2SHong Zhang       bnzj = bi[brow+1] - bi[brow];
3963c50cad2SHong Zhang       bj   = b->j + bi[brow];
3973c50cad2SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
3983c50cad2SHong Zhang       ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr);
3993c50cad2SHong Zhang     }
4003c50cad2SHong Zhang     cnzi = lnk[1];
4013c50cad2SHong Zhang 
4023c50cad2SHong Zhang     /* If free space is not available, make more free space */
4033c50cad2SHong Zhang     /* Double the amount of total space in the list */
4043c50cad2SHong Zhang     if (current_space->local_remaining<cnzi) {
405f91af8c7SBarry Smith       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4063c50cad2SHong Zhang       ndouble++;
4073c50cad2SHong Zhang     }
4083c50cad2SHong Zhang 
4093c50cad2SHong Zhang     /* Copy data into free space, then initialize lnk */
4103c50cad2SHong Zhang     ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr);
4112205254eSKarl Rupp 
4123c50cad2SHong Zhang     current_space->array           += cnzi;
4133c50cad2SHong Zhang     current_space->local_used      += cnzi;
4143c50cad2SHong Zhang     current_space->local_remaining -= cnzi;
4152205254eSKarl Rupp 
4163c50cad2SHong Zhang     ci[i+1] = ci[i] + cnzi;
4173c50cad2SHong Zhang   }
4183c50cad2SHong Zhang 
4193c50cad2SHong Zhang   /* Column indices are in the list of free space */
4203c50cad2SHong Zhang   /* Allocate space for cj, initialize cj, and */
4213c50cad2SHong Zhang   /* destroy list of free space and other temporary array(s) */
422854ce69bSBarry Smith   ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr);
4233c50cad2SHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
4243c50cad2SHong Zhang   ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr);
42525296bd5SBarry Smith 
42625296bd5SBarry Smith   /* Allocate space for ca */
427580bdb30SBarry Smith   ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr);
42825296bd5SBarry Smith 
42925296bd5SBarry Smith   /* put together the new symbolic matrix */
430e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr);
4314222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
43225296bd5SBarry Smith 
43325296bd5SBarry Smith   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
43425296bd5SBarry Smith   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
4354222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
43625296bd5SBarry Smith   c->free_a  = PETSC_TRUE;
43725296bd5SBarry Smith   c->free_ij = PETSC_TRUE;
43825296bd5SBarry Smith   c->nonew   = 0;
4392205254eSKarl Rupp 
4404222ddf1SHong Zhang   /* slower, less memory */
4414222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
44225296bd5SBarry Smith 
44325296bd5SBarry Smith   /* set MatInfo */
44425296bd5SBarry Smith   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
44525296bd5SBarry Smith   if (afill < 1.0) afill = 1.0;
44625296bd5SBarry Smith   c->maxnz                     = ci[am];
44725296bd5SBarry Smith   c->nz                        = ci[am];
4484222ddf1SHong Zhang   C->info.mallocs           = ndouble;
4494222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
4504222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
45125296bd5SBarry Smith 
45225296bd5SBarry Smith #if defined(PETSC_USE_INFO)
45325296bd5SBarry Smith   if (ci[am]) {
4544222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
4554222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
45625296bd5SBarry Smith   } else {
4574222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
45825296bd5SBarry Smith   }
45925296bd5SBarry Smith #endif
46025296bd5SBarry Smith   PetscFunctionReturn(0);
46125296bd5SBarry Smith }
46225296bd5SBarry Smith 
4634222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C)
464e9e4536cSHong Zhang {
465e9e4536cSHong Zhang   PetscErrorCode     ierr;
466e9e4536cSHong Zhang   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
467bf9555e6SHong Zhang   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
46825c41797SHong Zhang   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
469e9e4536cSHong Zhang   MatScalar          *ca;
470e9e4536cSHong Zhang   PetscReal          afill;
471eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
472eca6b86aSHong Zhang   PetscTable         ta;
4730298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
474e9e4536cSHong Zhang 
475e9e4536cSHong Zhang   PetscFunctionBegin;
476bd958071SHong Zhang   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
477bd958071SHong Zhang   /*---------------------------------------------------------------------------------------------*/
478bd958071SHong Zhang   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
479854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
480bd958071SHong Zhang   ci[0] = 0;
481bd958071SHong Zhang 
482bd958071SHong Zhang   /* create and initialize a linked list */
483c373ccc6SHong Zhang   ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr);
484c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
485eca6b86aSHong Zhang   ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr);
486eca6b86aSHong Zhang   ierr = PetscTableDestroy(&ta);CHKERRQ(ierr);
487eca6b86aSHong Zhang   ierr = PetscLLCondensedCreate_Scalable(Crmax,&lnk);CHKERRQ(ierr);
488bd958071SHong Zhang 
489bd958071SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
490f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
491bd958071SHong Zhang   current_space = free_space;
492bd958071SHong Zhang 
493bd958071SHong Zhang   /* Determine ci and cj */
494bd958071SHong Zhang   for (i=0; i<am; i++) {
495bd958071SHong Zhang     anzi = ai[i+1] - ai[i];
496bd958071SHong Zhang     aj   = a->j + ai[i];
497bd958071SHong Zhang     for (j=0; j<anzi; j++) {
498bd958071SHong Zhang       brow = aj[j];
499bd958071SHong Zhang       bnzj = bi[brow+1] - bi[brow];
500bd958071SHong Zhang       bj   = b->j + bi[brow];
501bd958071SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
502bd958071SHong Zhang       ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr);
503bd958071SHong Zhang     }
504bd958071SHong Zhang     cnzi = lnk[0];
505bd958071SHong Zhang 
506bd958071SHong Zhang     /* If free space is not available, make more free space */
507bd958071SHong Zhang     /* Double the amount of total space in the list */
508bd958071SHong Zhang     if (current_space->local_remaining<cnzi) {
509f91af8c7SBarry Smith       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
510bd958071SHong Zhang       ndouble++;
511bd958071SHong Zhang     }
512bd958071SHong Zhang 
513bd958071SHong Zhang     /* Copy data into free space, then initialize lnk */
514bd958071SHong Zhang     ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr);
5152205254eSKarl Rupp 
516bd958071SHong Zhang     current_space->array           += cnzi;
517bd958071SHong Zhang     current_space->local_used      += cnzi;
518bd958071SHong Zhang     current_space->local_remaining -= cnzi;
5192205254eSKarl Rupp 
520bd958071SHong Zhang     ci[i+1] = ci[i] + cnzi;
521bd958071SHong Zhang   }
522bd958071SHong Zhang 
523bd958071SHong Zhang   /* Column indices are in the list of free space */
524bd958071SHong Zhang   /* Allocate space for cj, initialize cj, and */
525bd958071SHong Zhang   /* destroy list of free space and other temporary array(s) */
526854ce69bSBarry Smith   ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr);
527bd958071SHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
528bd958071SHong Zhang   ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr);
529e9e4536cSHong Zhang 
530e9e4536cSHong Zhang   /* Allocate space for ca */
531bd958071SHong Zhang   /*-----------------------*/
532580bdb30SBarry Smith   ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr);
533e9e4536cSHong Zhang 
534e9e4536cSHong Zhang   /* put together the new symbolic matrix */
535e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr);
5364222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
537e9e4536cSHong Zhang 
538e9e4536cSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
539e9e4536cSHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
5404222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
541e9e4536cSHong Zhang   c->free_a  = PETSC_TRUE;
542e9e4536cSHong Zhang   c->free_ij = PETSC_TRUE;
543e9e4536cSHong Zhang   c->nonew   = 0;
5442205254eSKarl Rupp 
5454222ddf1SHong Zhang   /* slower, less memory */
5464222ddf1SHong Zhang   C->ops->matmultnumeric    = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
547e9e4536cSHong Zhang 
548e9e4536cSHong Zhang   /* set MatInfo */
549e9e4536cSHong Zhang   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
550e9e4536cSHong Zhang   if (afill < 1.0) afill = 1.0;
551e9e4536cSHong Zhang   c->maxnz                     = ci[am];
552e9e4536cSHong Zhang   c->nz                        = ci[am];
5534222ddf1SHong Zhang   C->info.mallocs           = ndouble;
5544222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
5554222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
556e9e4536cSHong Zhang 
557e9e4536cSHong Zhang #if defined(PETSC_USE_INFO)
558e9e4536cSHong Zhang   if (ci[am]) {
5594222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
5604222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
561e9e4536cSHong Zhang   } else {
5624222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
563e9e4536cSHong Zhang   }
564e9e4536cSHong Zhang #endif
565e9e4536cSHong Zhang   PetscFunctionReturn(0);
566e9e4536cSHong Zhang }
567e9e4536cSHong Zhang 
5684222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C)
5690ced3a2bSJed Brown {
5700ced3a2bSJed Brown   PetscErrorCode     ierr;
5710ced3a2bSJed Brown   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
5720ced3a2bSJed Brown   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
5730ced3a2bSJed Brown   PetscInt           *ci,*cj,*bb;
5740ced3a2bSJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
5750ced3a2bSJed Brown   PetscReal          afill;
5760ced3a2bSJed Brown   PetscInt           i,j,col,ndouble = 0;
5770298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
5780ced3a2bSJed Brown   PetscHeap          h;
5790ced3a2bSJed Brown 
5800ced3a2bSJed Brown   PetscFunctionBegin;
581cd093f1dSJed Brown   /* Get ci and cj - by merging sorted rows using a heap */
5820ced3a2bSJed Brown   /*---------------------------------------------------------------------------------------------*/
5830ced3a2bSJed Brown   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
584854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
5850ced3a2bSJed Brown   ci[0] = 0;
5860ced3a2bSJed Brown 
5870ced3a2bSJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
588f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
5890ced3a2bSJed Brown   current_space = free_space;
5900ced3a2bSJed Brown 
5910ced3a2bSJed Brown   ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr);
592785e854fSJed Brown   ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr);
5930ced3a2bSJed Brown 
5940ced3a2bSJed Brown   /* Determine ci and cj */
5950ced3a2bSJed Brown   for (i=0; i<am; i++) {
5960ced3a2bSJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
5970ced3a2bSJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
5980ced3a2bSJed Brown     ci[i+1] = ci[i];
5990ced3a2bSJed Brown     /* Populate the min heap */
6000ced3a2bSJed Brown     for (j=0; j<anzi; j++) {
6010ced3a2bSJed Brown       bb[j] = bi[acol[j]];         /* bb points at the start of the row */
6020ced3a2bSJed Brown       if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
6030ced3a2bSJed Brown         ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr);
6040ced3a2bSJed Brown       }
6050ced3a2bSJed Brown     }
6060ced3a2bSJed Brown     /* Pick off the min element, adding it to free space */
6070ced3a2bSJed Brown     ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
6080ced3a2bSJed Brown     while (j >= 0) {
6090ced3a2bSJed Brown       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
610f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);CHKERRQ(ierr);
6110ced3a2bSJed Brown         ndouble++;
6120ced3a2bSJed Brown       }
6130ced3a2bSJed Brown       *(current_space->array++) = col;
6140ced3a2bSJed Brown       current_space->local_used++;
6150ced3a2bSJed Brown       current_space->local_remaining--;
6160ced3a2bSJed Brown       ci[i+1]++;
6170ced3a2bSJed Brown 
6180ced3a2bSJed Brown       /* stash if anything else remains in this row of B */
6190ced3a2bSJed Brown       if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr);}
6200ced3a2bSJed Brown       while (1) {               /* pop and stash any other rows of B that also had an entry in this column */
6210ced3a2bSJed Brown         PetscInt j2,col2;
6220ced3a2bSJed Brown         ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr);
6230ced3a2bSJed Brown         if (col2 != col) break;
6240ced3a2bSJed Brown         ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr);
6250ced3a2bSJed Brown         if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr);}
6260ced3a2bSJed Brown       }
6270ced3a2bSJed Brown       /* Put any stashed elements back into the min heap */
6280ced3a2bSJed Brown       ierr = PetscHeapUnstash(h);CHKERRQ(ierr);
6290ced3a2bSJed Brown       ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
6300ced3a2bSJed Brown     }
6310ced3a2bSJed Brown   }
6320ced3a2bSJed Brown   ierr = PetscFree(bb);CHKERRQ(ierr);
6330ced3a2bSJed Brown   ierr = PetscHeapDestroy(&h);CHKERRQ(ierr);
6340ced3a2bSJed Brown 
6350ced3a2bSJed Brown   /* Column indices are in the list of free space */
6360ced3a2bSJed Brown   /* Allocate space for cj, initialize cj, and */
6370ced3a2bSJed Brown   /* destroy list of free space and other temporary array(s) */
638785e854fSJed Brown   ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr);
6390ced3a2bSJed Brown   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
6400ced3a2bSJed Brown 
6410ced3a2bSJed Brown   /* put together the new symbolic matrix */
642e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr);
6434222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6440ced3a2bSJed Brown 
6450ced3a2bSJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6460ced3a2bSJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
6474222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
6480ced3a2bSJed Brown   c->free_a  = PETSC_TRUE;
6490ced3a2bSJed Brown   c->free_ij = PETSC_TRUE;
6500ced3a2bSJed Brown   c->nonew   = 0;
65126fbe8dcSKarl Rupp 
6524222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
6530ced3a2bSJed Brown 
6540ced3a2bSJed Brown   /* set MatInfo */
6550ced3a2bSJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
6560ced3a2bSJed Brown   if (afill < 1.0) afill = 1.0;
6570ced3a2bSJed Brown   c->maxnz                     = ci[am];
6580ced3a2bSJed Brown   c->nz                        = ci[am];
6594222ddf1SHong Zhang   C->info.mallocs           = ndouble;
6604222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
6614222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
6620ced3a2bSJed Brown 
6630ced3a2bSJed Brown #if defined(PETSC_USE_INFO)
6640ced3a2bSJed Brown   if (ci[am]) {
6654222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
6664222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
6670ced3a2bSJed Brown   } else {
6684222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
6690ced3a2bSJed Brown   }
6700ced3a2bSJed Brown #endif
6710ced3a2bSJed Brown   PetscFunctionReturn(0);
6720ced3a2bSJed Brown }
673e9e4536cSHong Zhang 
6744222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C)
6758a07c6f1SJed Brown {
6768a07c6f1SJed Brown   PetscErrorCode     ierr;
6778a07c6f1SJed Brown   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
6788a07c6f1SJed Brown   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
6798a07c6f1SJed Brown   PetscInt           *ci,*cj,*bb;
6808a07c6f1SJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
6818a07c6f1SJed Brown   PetscReal          afill;
6828a07c6f1SJed Brown   PetscInt           i,j,col,ndouble = 0;
6830298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
6848a07c6f1SJed Brown   PetscHeap          h;
6858a07c6f1SJed Brown   PetscBT            bt;
6868a07c6f1SJed Brown 
6878a07c6f1SJed Brown   PetscFunctionBegin;
688cd093f1dSJed Brown   /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
6898a07c6f1SJed Brown   /*---------------------------------------------------------------------------------------------*/
6908a07c6f1SJed Brown   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
691854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
6928a07c6f1SJed Brown   ci[0] = 0;
6938a07c6f1SJed Brown 
6948a07c6f1SJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
695f91af8c7SBarry Smith   ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
6962205254eSKarl Rupp 
6978a07c6f1SJed Brown   current_space = free_space;
6988a07c6f1SJed Brown 
6998a07c6f1SJed Brown   ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr);
700785e854fSJed Brown   ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr);
7018a07c6f1SJed Brown   ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr);
7028a07c6f1SJed Brown 
7038a07c6f1SJed Brown   /* Determine ci and cj */
7048a07c6f1SJed Brown   for (i=0; i<am; i++) {
7058a07c6f1SJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
7068a07c6f1SJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
7078a07c6f1SJed Brown     const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
7088a07c6f1SJed Brown     ci[i+1] = ci[i];
7098a07c6f1SJed Brown     /* Populate the min heap */
7108a07c6f1SJed Brown     for (j=0; j<anzi; j++) {
7118a07c6f1SJed Brown       PetscInt brow = acol[j];
7128a07c6f1SJed Brown       for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
7138a07c6f1SJed Brown         PetscInt bcol = bj[bb[j]];
7148a07c6f1SJed Brown         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
7158a07c6f1SJed Brown           ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr);
7168a07c6f1SJed Brown           bb[j]++;
7178a07c6f1SJed Brown           break;
7188a07c6f1SJed Brown         }
7198a07c6f1SJed Brown       }
7208a07c6f1SJed Brown     }
7218a07c6f1SJed Brown     /* Pick off the min element, adding it to free space */
7228a07c6f1SJed Brown     ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
7238a07c6f1SJed Brown     while (j >= 0) {
7248a07c6f1SJed Brown       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
7250298fd71SBarry Smith         fptr = NULL;                      /* need PetscBTMemzero */
726f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);CHKERRQ(ierr);
7278a07c6f1SJed Brown         ndouble++;
7288a07c6f1SJed Brown       }
7298a07c6f1SJed Brown       *(current_space->array++) = col;
7308a07c6f1SJed Brown       current_space->local_used++;
7318a07c6f1SJed Brown       current_space->local_remaining--;
7328a07c6f1SJed Brown       ci[i+1]++;
7338a07c6f1SJed Brown 
7348a07c6f1SJed Brown       /* stash if anything else remains in this row of B */
7358a07c6f1SJed Brown       for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
7368a07c6f1SJed Brown         PetscInt bcol = bj[bb[j]];
7378a07c6f1SJed Brown         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
7388a07c6f1SJed Brown           ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr);
7398a07c6f1SJed Brown           bb[j]++;
7408a07c6f1SJed Brown           break;
7418a07c6f1SJed Brown         }
7428a07c6f1SJed Brown       }
7438a07c6f1SJed Brown       ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
7448a07c6f1SJed Brown     }
7458a07c6f1SJed Brown     if (fptr) {                 /* Clear the bits for this row */
7468a07c6f1SJed Brown       for (; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr);}
7478a07c6f1SJed Brown     } else {                    /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
7488a07c6f1SJed Brown       ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr);
7498a07c6f1SJed Brown     }
7508a07c6f1SJed Brown   }
7518a07c6f1SJed Brown   ierr = PetscFree(bb);CHKERRQ(ierr);
7528a07c6f1SJed Brown   ierr = PetscHeapDestroy(&h);CHKERRQ(ierr);
7538a07c6f1SJed Brown   ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
7548a07c6f1SJed Brown 
7558a07c6f1SJed Brown   /* Column indices are in the list of free space */
7568a07c6f1SJed Brown   /* Allocate space for cj, initialize cj, and */
7578a07c6f1SJed Brown   /* destroy list of free space and other temporary array(s) */
758785e854fSJed Brown   ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr);
7598a07c6f1SJed Brown   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
7608a07c6f1SJed Brown 
7618a07c6f1SJed Brown   /* put together the new symbolic matrix */
762e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr);
7634222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
7648a07c6f1SJed Brown 
7658a07c6f1SJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
7668a07c6f1SJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
7674222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
7688a07c6f1SJed Brown   c->free_a  = PETSC_TRUE;
7698a07c6f1SJed Brown   c->free_ij = PETSC_TRUE;
7708a07c6f1SJed Brown   c->nonew   = 0;
77126fbe8dcSKarl Rupp 
7724222ddf1SHong Zhang   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
7738a07c6f1SJed Brown 
7748a07c6f1SJed Brown   /* set MatInfo */
7758a07c6f1SJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
7768a07c6f1SJed Brown   if (afill < 1.0) afill = 1.0;
7778a07c6f1SJed Brown   c->maxnz                     = ci[am];
7788a07c6f1SJed Brown   c->nz                        = ci[am];
7794222ddf1SHong Zhang   C->info.mallocs           = ndouble;
7804222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
7814222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
7828a07c6f1SJed Brown 
7838a07c6f1SJed Brown #if defined(PETSC_USE_INFO)
7848a07c6f1SJed Brown   if (ci[am]) {
7854222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
7864222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
7878a07c6f1SJed Brown   } else {
7884222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
7898a07c6f1SJed Brown   }
7908a07c6f1SJed Brown #endif
7918a07c6f1SJed Brown   PetscFunctionReturn(0);
7928a07c6f1SJed Brown }
7938a07c6f1SJed Brown 
794d7ed1a4dSandi selinger 
7954222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C)
796d7ed1a4dSandi selinger {
797d7ed1a4dSandi selinger   PetscErrorCode     ierr;
798d7ed1a4dSandi selinger   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
799d7ed1a4dSandi selinger   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1;
800d7ed1a4dSandi selinger   PetscInt           *ci,*cj,*outputj,worki_L1[9],worki_L2[9];
801d7ed1a4dSandi selinger   PetscInt           c_maxmem,a_maxrownnz=0,a_rownnz;
802d7ed1a4dSandi selinger   const PetscInt     workcol[8]={0,1,2,3,4,5,6,7};
803d7ed1a4dSandi selinger   const PetscInt     am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
804d7ed1a4dSandi selinger   const PetscInt     *brow_ptr[8],*brow_end[8];
805d7ed1a4dSandi selinger   PetscInt           window[8];
806d7ed1a4dSandi selinger   PetscInt           window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows;
807d7ed1a4dSandi selinger   PetscInt           i,k,ndouble=0,L1_rowsleft,rowsleft;
808d7ed1a4dSandi selinger   PetscReal          afill;
809f83700f2Sandi selinger   PetscInt           *workj_L1,*workj_L2,*workj_L3;
8107660c4dbSandi selinger   PetscInt           L1_nnz,L2_nnz;
811d7ed1a4dSandi selinger 
812d7ed1a4dSandi selinger   /* Step 1: Get upper bound on memory required for allocation.
813d7ed1a4dSandi selinger              Because of the way virtual memory works,
814d7ed1a4dSandi selinger              only the memory pages that are actually needed will be physically allocated. */
815d7ed1a4dSandi selinger   PetscFunctionBegin;
816d7ed1a4dSandi selinger   ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr);
817d7ed1a4dSandi selinger   for (i=0; i<am; i++) {
818d7ed1a4dSandi selinger     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
819d7ed1a4dSandi selinger     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
820d7ed1a4dSandi selinger     a_rownnz = 0;
821d7ed1a4dSandi selinger     for (k=0; k<anzi; ++k) {
822d7ed1a4dSandi selinger       a_rownnz += bi[acol[k]+1] - bi[acol[k]];
823d7ed1a4dSandi selinger       if (a_rownnz > bn) {
824d7ed1a4dSandi selinger         a_rownnz = bn;
825d7ed1a4dSandi selinger         break;
826d7ed1a4dSandi selinger       }
827d7ed1a4dSandi selinger     }
828d7ed1a4dSandi selinger     a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
829d7ed1a4dSandi selinger   }
830d7ed1a4dSandi selinger   /* temporary work areas for merging rows */
831d7ed1a4dSandi selinger   ierr = PetscMalloc1(a_maxrownnz*8,&workj_L1);CHKERRQ(ierr);
832f83700f2Sandi selinger   ierr = PetscMalloc1(a_maxrownnz*8,&workj_L2);CHKERRQ(ierr);
833f83700f2Sandi selinger   ierr = PetscMalloc1(a_maxrownnz,&workj_L3);CHKERRQ(ierr);
834d7ed1a4dSandi selinger 
8357660c4dbSandi selinger   /* This should be enough for almost all matrices. If not, memory is reallocated later. */
8367660c4dbSandi selinger   c_maxmem = 8*(ai[am]+bi[bm]);
837d7ed1a4dSandi selinger   /* Step 2: Populate pattern for C */
838d7ed1a4dSandi selinger   ierr  = PetscMalloc1(c_maxmem,&cj);CHKERRQ(ierr);
839d7ed1a4dSandi selinger 
840d7ed1a4dSandi selinger   ci_nnz       = 0;
841d7ed1a4dSandi selinger   ci[0]        = 0;
842d7ed1a4dSandi selinger   worki_L1[0]  = 0;
843d7ed1a4dSandi selinger   worki_L2[0]  = 0;
844d7ed1a4dSandi selinger   for (i=0; i<am; i++) {
845d7ed1a4dSandi selinger     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
846d7ed1a4dSandi selinger     const PetscInt *acol = aj + ai[i];      /* column indices of nonzero entries in this row */
847d7ed1a4dSandi selinger     rowsleft             = anzi;
848d7ed1a4dSandi selinger     inputcol_L1          = acol;
849d7ed1a4dSandi selinger     L2_nnz               = 0;
8507660c4dbSandi selinger     L2_nrows             = 1;  /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1   */
8517660c4dbSandi selinger     worki_L2[1]          = 0;
85208fe1b3cSKarl Rupp     outputi_nnz          = 0;
853d7ed1a4dSandi selinger 
854d7ed1a4dSandi selinger     /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem  -> allocate more memory */
855d7ed1a4dSandi selinger     while (ci_nnz+a_maxrownnz > c_maxmem) {
856d7ed1a4dSandi selinger       c_maxmem *= 2;
857d7ed1a4dSandi selinger       ndouble++;
858d7ed1a4dSandi selinger       ierr = PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);CHKERRQ(ierr);
859d7ed1a4dSandi selinger     }
860d7ed1a4dSandi selinger 
861d7ed1a4dSandi selinger     while (rowsleft) {
862d7ed1a4dSandi selinger       L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
863d7ed1a4dSandi selinger       L1_nrows    = 0;
8647660c4dbSandi selinger       L1_nnz      = 0;
865d7ed1a4dSandi selinger       inputcol    = inputcol_L1;
866d7ed1a4dSandi selinger       inputi      = bi;
867d7ed1a4dSandi selinger       inputj      = bj;
868d7ed1a4dSandi selinger 
869d7ed1a4dSandi selinger       /* The following macro is used to specialize for small rows in A.
870d7ed1a4dSandi selinger          This helps with compiler unrolling, improving performance substantially.
871f83700f2Sandi selinger           Input:  inputj   inputi  inputcol  bn
872d7ed1a4dSandi selinger           Output: outputj  outputi_nnz                       */
873d7ed1a4dSandi selinger        #define MatMatMultSymbolic_RowMergeMacro(ANNZ)                        \
874d7ed1a4dSandi selinger          window_min  = bn;                                                   \
8757660c4dbSandi selinger          outputi_nnz = 0;                                                    \
8767660c4dbSandi selinger          for (k=0; k<ANNZ; ++k) {                                            \
877d7ed1a4dSandi selinger            brow_ptr[k] = inputj + inputi[inputcol[k]];                       \
878d7ed1a4dSandi selinger            brow_end[k] = inputj + inputi[inputcol[k]+1];                     \
879d7ed1a4dSandi selinger            window[k]   = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn;   \
880d7ed1a4dSandi selinger            window_min  = PetscMin(window[k], window_min);                    \
881d7ed1a4dSandi selinger          }                                                                   \
882d7ed1a4dSandi selinger          while (window_min < bn) {                                           \
883d7ed1a4dSandi selinger            outputj[outputi_nnz++] = window_min;                              \
884d7ed1a4dSandi selinger            /* advance front and compute new minimum */                       \
885d7ed1a4dSandi selinger            old_window_min = window_min;                                      \
886d7ed1a4dSandi selinger            window_min = bn;                                                  \
887d7ed1a4dSandi selinger            for (k=0; k<ANNZ; ++k) {                                          \
888d7ed1a4dSandi selinger              if (window[k] == old_window_min) {                              \
889d7ed1a4dSandi selinger                brow_ptr[k]++;                                                \
890d7ed1a4dSandi selinger                window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
891d7ed1a4dSandi selinger              }                                                               \
892d7ed1a4dSandi selinger              window_min = PetscMin(window[k], window_min);                   \
893d7ed1a4dSandi selinger            }                                                                 \
894d7ed1a4dSandi selinger          }
895d7ed1a4dSandi selinger 
896d7ed1a4dSandi selinger       /************** L E V E L  1 ***************/
897d7ed1a4dSandi selinger       /* Merge up to 8 rows of B to L1 work array*/
898d7ed1a4dSandi selinger       while (L1_rowsleft) {
8997660c4dbSandi selinger         outputi_nnz = 0;
9007660c4dbSandi selinger         if (anzi > 8)  outputj = workj_L1 + L1_nnz;     /* Level 1 rowmerge*/
9017660c4dbSandi selinger         else           outputj = cj + ci_nnz;           /* Merge directly to C */
9027660c4dbSandi selinger 
903d7ed1a4dSandi selinger         switch (L1_rowsleft) {
904d7ed1a4dSandi selinger         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
905d7ed1a4dSandi selinger                  brow_end[0] = inputj + inputi[inputcol[0]+1];
906d7ed1a4dSandi selinger                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
907d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
908d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
909d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
910d7ed1a4dSandi selinger                  break;
911d7ed1a4dSandi selinger         case 2:  MatMatMultSymbolic_RowMergeMacro(2);
912d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
913d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
914d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
915d7ed1a4dSandi selinger                  break;
916d7ed1a4dSandi selinger         case 3: MatMatMultSymbolic_RowMergeMacro(3);
917d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
918d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
919d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
920d7ed1a4dSandi selinger                  break;
921d7ed1a4dSandi selinger         case 4:  MatMatMultSymbolic_RowMergeMacro(4);
922d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
923d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
924d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
925d7ed1a4dSandi selinger                  break;
926d7ed1a4dSandi selinger         case 5:  MatMatMultSymbolic_RowMergeMacro(5);
927d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
928d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
929d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
930d7ed1a4dSandi selinger                  break;
931d7ed1a4dSandi selinger         case 6:  MatMatMultSymbolic_RowMergeMacro(6);
932d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
933d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
934d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
935d7ed1a4dSandi selinger                  break;
936d7ed1a4dSandi selinger         case 7:  MatMatMultSymbolic_RowMergeMacro(7);
937d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
938d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
939d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
940d7ed1a4dSandi selinger                  break;
941d7ed1a4dSandi selinger         default: MatMatMultSymbolic_RowMergeMacro(8);
942d7ed1a4dSandi selinger                  inputcol    += 8;
943d7ed1a4dSandi selinger                  rowsleft    -= 8;
944d7ed1a4dSandi selinger                  L1_rowsleft -= 8;
945d7ed1a4dSandi selinger                  break;
946d7ed1a4dSandi selinger         }
947d7ed1a4dSandi selinger         inputcol_L1           = inputcol;
9487660c4dbSandi selinger         L1_nnz               += outputi_nnz;
9497660c4dbSandi selinger         worki_L1[++L1_nrows]  = L1_nnz;
950d7ed1a4dSandi selinger       }
951d7ed1a4dSandi selinger 
952d7ed1a4dSandi selinger       /********************** L E V E L  2 ************************/
953d7ed1a4dSandi selinger       /* Merge from L1 work array to either C or to L2 work array */
954d7ed1a4dSandi selinger       if (anzi > 8) {
955d7ed1a4dSandi selinger         inputi      = worki_L1;
956d7ed1a4dSandi selinger         inputj      = workj_L1;
957d7ed1a4dSandi selinger         inputcol    = workcol;
958d7ed1a4dSandi selinger         outputi_nnz = 0;
959d7ed1a4dSandi selinger 
960d7ed1a4dSandi selinger         if (anzi <= 64) outputj = cj + ci_nnz;        /* Merge from L1 work array to C */
961d7ed1a4dSandi selinger         else            outputj = workj_L2 + L2_nnz;  /* Merge from L1 work array to L2 work array */
962d7ed1a4dSandi selinger 
963d7ed1a4dSandi selinger         switch (L1_nrows) {
964d7ed1a4dSandi selinger         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
965d7ed1a4dSandi selinger                  brow_end[0] = inputj + inputi[inputcol[0]+1];
966d7ed1a4dSandi selinger                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
967d7ed1a4dSandi selinger                  break;
968d7ed1a4dSandi selinger         case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
969d7ed1a4dSandi selinger         case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
970d7ed1a4dSandi selinger         case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
971d7ed1a4dSandi selinger         case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
972d7ed1a4dSandi selinger         case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
973d7ed1a4dSandi selinger         case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
974d7ed1a4dSandi selinger         case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
975d7ed1a4dSandi selinger         default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
976d7ed1a4dSandi selinger         }
977d7ed1a4dSandi selinger         L2_nnz               += outputi_nnz;
978d7ed1a4dSandi selinger         worki_L2[++L2_nrows]  = L2_nnz;
979d7ed1a4dSandi selinger 
9807660c4dbSandi selinger         /************************ L E V E L  3 **********************/
9817660c4dbSandi selinger         /* Merge from L2 work array to either C or to L2 work array */
982d7ed1a4dSandi selinger         if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
983d7ed1a4dSandi selinger           inputi      = worki_L2;
984d7ed1a4dSandi selinger           inputj      = workj_L2;
985d7ed1a4dSandi selinger           inputcol    = workcol;
986d7ed1a4dSandi selinger           outputi_nnz = 0;
987f83700f2Sandi selinger           if (rowsleft) outputj = workj_L3;
988d7ed1a4dSandi selinger           else          outputj = cj + ci_nnz;
989d7ed1a4dSandi selinger           switch (L2_nrows) {
990d7ed1a4dSandi selinger           case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
991d7ed1a4dSandi selinger                    brow_end[0] = inputj + inputi[inputcol[0]+1];
992d7ed1a4dSandi selinger                    for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
993d7ed1a4dSandi selinger                    break;
994d7ed1a4dSandi selinger           case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
995d7ed1a4dSandi selinger           case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
996d7ed1a4dSandi selinger           case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
997d7ed1a4dSandi selinger           case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
998d7ed1a4dSandi selinger           case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
999d7ed1a4dSandi selinger           case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
1000d7ed1a4dSandi selinger           case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
1001d7ed1a4dSandi selinger           default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1002d7ed1a4dSandi selinger           }
1003d7ed1a4dSandi selinger           L2_nrows    = 1;
10047660c4dbSandi selinger           L2_nnz      = outputi_nnz;
10057660c4dbSandi selinger           worki_L2[1] = outputi_nnz;
10067660c4dbSandi selinger           /* Copy to workj_L2 */
1007d7ed1a4dSandi selinger           if (rowsleft) {
10087660c4dbSandi selinger             for (k=0; k<outputi_nnz; ++k)  workj_L2[k] = outputj[k];
1009d7ed1a4dSandi selinger           }
1010d7ed1a4dSandi selinger         }
1011d7ed1a4dSandi selinger       }
1012d7ed1a4dSandi selinger     }  /* while (rowsleft) */
1013d7ed1a4dSandi selinger #undef MatMatMultSymbolic_RowMergeMacro
1014d7ed1a4dSandi selinger 
1015d7ed1a4dSandi selinger     /* terminate current row */
1016d7ed1a4dSandi selinger     ci_nnz += outputi_nnz;
1017d7ed1a4dSandi selinger     ci[i+1] = ci_nnz;
1018d7ed1a4dSandi selinger   }
1019d7ed1a4dSandi selinger 
1020d7ed1a4dSandi selinger   /* Step 3: Create the new symbolic matrix */
1021e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr);
10224222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
1023d7ed1a4dSandi selinger 
1024d7ed1a4dSandi selinger   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1025d7ed1a4dSandi selinger   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
10264222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
1027d7ed1a4dSandi selinger   c->free_a  = PETSC_TRUE;
1028d7ed1a4dSandi selinger   c->free_ij = PETSC_TRUE;
1029d7ed1a4dSandi selinger   c->nonew   = 0;
1030d7ed1a4dSandi selinger 
10314222ddf1SHong Zhang   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1032d7ed1a4dSandi selinger 
1033d7ed1a4dSandi selinger   /* set MatInfo */
1034d7ed1a4dSandi selinger   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1035d7ed1a4dSandi selinger   if (afill < 1.0) afill = 1.0;
1036d7ed1a4dSandi selinger   c->maxnz                     = ci[am];
1037d7ed1a4dSandi selinger   c->nz                        = ci[am];
10384222ddf1SHong Zhang   C->info.mallocs           = ndouble;
10394222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
10404222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
1041d7ed1a4dSandi selinger 
1042d7ed1a4dSandi selinger #if defined(PETSC_USE_INFO)
1043d7ed1a4dSandi selinger   if (ci[am]) {
10444222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
10454222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
1046d7ed1a4dSandi selinger   } else {
10474222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
1048d7ed1a4dSandi selinger   }
1049d7ed1a4dSandi selinger #endif
1050d7ed1a4dSandi selinger 
1051d7ed1a4dSandi selinger   /* Step 4: Free temporary work areas */
1052d7ed1a4dSandi selinger   ierr = PetscFree(workj_L1);CHKERRQ(ierr);
1053d7ed1a4dSandi selinger   ierr = PetscFree(workj_L2);CHKERRQ(ierr);
1054f83700f2Sandi selinger   ierr = PetscFree(workj_L3);CHKERRQ(ierr);
1055d7ed1a4dSandi selinger   PetscFunctionReturn(0);
1056d7ed1a4dSandi selinger }
1057d7ed1a4dSandi selinger 
1058cd093f1dSJed Brown /* concatenate unique entries and then sort */
10594222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C)
1060cd093f1dSJed Brown {
1061cd093f1dSJed Brown   PetscErrorCode     ierr;
1062cd093f1dSJed Brown   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
1063cd093f1dSJed Brown   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
1064cd093f1dSJed Brown   PetscInt           *ci,*cj;
1065cd093f1dSJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
1066cd093f1dSJed Brown   PetscReal          afill;
1067cd093f1dSJed Brown   PetscInt           i,j,ndouble = 0;
1068cd093f1dSJed Brown   PetscSegBuffer     seg,segrow;
1069cd093f1dSJed Brown   char               *seen;
1070cd093f1dSJed Brown 
1071cd093f1dSJed Brown   PetscFunctionBegin;
1072854ce69bSBarry Smith   ierr  = PetscMalloc1(am+1,&ci);CHKERRQ(ierr);
1073cd093f1dSJed Brown   ci[0] = 0;
1074cd093f1dSJed Brown 
1075cd093f1dSJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1076cd093f1dSJed Brown   ierr = PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);CHKERRQ(ierr);
1077cd093f1dSJed Brown   ierr = PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);CHKERRQ(ierr);
1078580bdb30SBarry Smith   ierr = PetscCalloc1(bn,&seen);CHKERRQ(ierr);
1079cd093f1dSJed Brown 
1080cd093f1dSJed Brown   /* Determine ci and cj */
1081cd093f1dSJed Brown   for (i=0; i<am; i++) {
1082cd093f1dSJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1083cd093f1dSJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1084cd093f1dSJed Brown     PetscInt packlen = 0,*PETSC_RESTRICT crow;
1085cd093f1dSJed Brown     /* Pack segrow */
1086cd093f1dSJed Brown     for (j=0; j<anzi; j++) {
1087cd093f1dSJed Brown       PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
1088cd093f1dSJed Brown       for (k=bjstart; k<bjend; k++) {
1089cd093f1dSJed Brown         PetscInt bcol = bj[k];
1090cd093f1dSJed Brown         if (!seen[bcol]) { /* new entry */
1091cd093f1dSJed Brown           PetscInt *PETSC_RESTRICT slot;
1092cd093f1dSJed Brown           ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr);
1093cd093f1dSJed Brown           *slot = bcol;
1094cd093f1dSJed Brown           seen[bcol] = 1;
1095cd093f1dSJed Brown           packlen++;
1096cd093f1dSJed Brown         }
1097cd093f1dSJed Brown       }
1098cd093f1dSJed Brown     }
1099cd093f1dSJed Brown     ierr = PetscSegBufferGetInts(seg,packlen,&crow);CHKERRQ(ierr);
1100cd093f1dSJed Brown     ierr = PetscSegBufferExtractTo(segrow,crow);CHKERRQ(ierr);
1101cd093f1dSJed Brown     ierr = PetscSortInt(packlen,crow);CHKERRQ(ierr);
1102cd093f1dSJed Brown     ci[i+1] = ci[i] + packlen;
1103cd093f1dSJed Brown     for (j=0; j<packlen; j++) seen[crow[j]] = 0;
1104cd093f1dSJed Brown   }
1105cd093f1dSJed Brown   ierr = PetscSegBufferDestroy(&segrow);CHKERRQ(ierr);
1106cd093f1dSJed Brown   ierr = PetscFree(seen);CHKERRQ(ierr);
1107cd093f1dSJed Brown 
1108cd093f1dSJed Brown   /* Column indices are in the segmented buffer */
1109cd093f1dSJed Brown   ierr = PetscSegBufferExtractAlloc(seg,&cj);CHKERRQ(ierr);
1110cd093f1dSJed Brown   ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr);
1111cd093f1dSJed Brown 
1112cd093f1dSJed Brown   /* put together the new symbolic matrix */
1113e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr);
11144222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
1115cd093f1dSJed Brown 
1116cd093f1dSJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1117cd093f1dSJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
11184222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
1119cd093f1dSJed Brown   c->free_a  = PETSC_TRUE;
1120cd093f1dSJed Brown   c->free_ij = PETSC_TRUE;
1121cd093f1dSJed Brown   c->nonew   = 0;
1122cd093f1dSJed Brown 
11234222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1124cd093f1dSJed Brown 
1125cd093f1dSJed Brown   /* set MatInfo */
1126cd093f1dSJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1127cd093f1dSJed Brown   if (afill < 1.0) afill = 1.0;
1128cd093f1dSJed Brown   c->maxnz                     = ci[am];
1129cd093f1dSJed Brown   c->nz                        = ci[am];
11304222ddf1SHong Zhang   C->info.mallocs           = ndouble;
11314222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
11324222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
1133cd093f1dSJed Brown 
1134cd093f1dSJed Brown #if defined(PETSC_USE_INFO)
1135cd093f1dSJed Brown   if (ci[am]) {
11364222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
11374222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
1138cd093f1dSJed Brown   } else {
11394222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
1140cd093f1dSJed Brown   }
1141cd093f1dSJed Brown #endif
1142cd093f1dSJed Brown   PetscFunctionReturn(0);
1143cd093f1dSJed Brown }
1144cd093f1dSJed Brown 
11452128a86cSHong Zhang PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A)
11462128a86cSHong Zhang {
11472128a86cSHong Zhang   PetscErrorCode      ierr;
11484c7df5ccSHong Zhang   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)A->data;
114940192850SHong Zhang   Mat_MatMatTransMult *abt=a->abt;
11502128a86cSHong Zhang 
11512128a86cSHong Zhang   PetscFunctionBegin;
115240192850SHong Zhang   ierr = (abt->destroy)(A);CHKERRQ(ierr);
115340192850SHong Zhang   ierr = MatTransposeColoringDestroy(&abt->matcoloring);CHKERRQ(ierr);
115440192850SHong Zhang   ierr = MatDestroy(&abt->Bt_den);CHKERRQ(ierr);
115540192850SHong Zhang   ierr = MatDestroy(&abt->ABt_den);CHKERRQ(ierr);
115640192850SHong Zhang   ierr = PetscFree(abt);CHKERRQ(ierr);
11572128a86cSHong Zhang   PetscFunctionReturn(0);
11582128a86cSHong Zhang }
11592128a86cSHong Zhang 
11604222ddf1SHong Zhang PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1161bc011b1eSHong Zhang {
11625df89d91SHong Zhang   PetscErrorCode      ierr;
116381d82fe4SHong Zhang   Mat                 Bt;
116481d82fe4SHong Zhang   PetscInt            *bti,*btj;
116540192850SHong Zhang   Mat_MatMatTransMult *abt;
11664c7df5ccSHong Zhang   Mat_SeqAIJ          *c;
11674222ddf1SHong Zhang   Mat_Product         *product = C->product;
11684222ddf1SHong Zhang   MatProductAlgorithm alg = product->alg;
1169d2853540SHong Zhang 
117081d82fe4SHong Zhang   PetscFunctionBegin;
117181d82fe4SHong Zhang   /* create symbolic Bt */
117281d82fe4SHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr);
11730298fd71SBarry Smith   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);CHKERRQ(ierr);
117433d57670SJed Brown   ierr = MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr);
117504b858e0SBarry Smith   ierr = MatSetType(Bt,((PetscObject)A)->type_name);CHKERRQ(ierr);
117681d82fe4SHong Zhang 
117781d82fe4SHong Zhang   /* get symbolic C=A*Bt */
11784222ddf1SHong Zhang   ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); /* set algorithm for C = A*Bt */
117981d82fe4SHong Zhang   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr);
11804222ddf1SHong Zhang   ierr = MatProductSetAlgorithm(C,alg);CHKERRQ(ierr); /* resume original algorithm for ABt product */
118181d82fe4SHong Zhang 
11822128a86cSHong Zhang   /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
1183b00a9115SJed Brown   ierr   = PetscNew(&abt);CHKERRQ(ierr);
11844222ddf1SHong Zhang   c      = (Mat_SeqAIJ*)C->data;
118540192850SHong Zhang   c->abt = abt;
11862128a86cSHong Zhang 
118740192850SHong Zhang   abt->usecoloring = PETSC_FALSE;
11884222ddf1SHong Zhang   abt->destroy     = C->ops->destroy;
11894222ddf1SHong Zhang   C->ops->destroy  = MatDestroy_SeqAIJ_MatMatMultTrans;
11904222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
11912128a86cSHong Zhang 
11924222ddf1SHong Zhang   abt->usecoloring = PETSC_FALSE;
11934222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"color",&abt->usecoloring);CHKERRQ(ierr);
119440192850SHong Zhang   if (abt->usecoloring) {
1195b9af6bddSHong Zhang     /* Create MatTransposeColoring from symbolic C=A*B^T */
1196b9af6bddSHong Zhang     MatTransposeColoring matcoloring;
1197335efc43SPeter Brune     MatColoring          coloring;
11982128a86cSHong Zhang     ISColoring           iscoloring;
11992128a86cSHong Zhang     Mat                  Bt_dense,C_dense;
1200e8354b3eSHong Zhang 
12014222ddf1SHong Zhang     /* inode causes memory problem */
12024222ddf1SHong Zhang     ierr = MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
12034222ddf1SHong Zhang 
12044222ddf1SHong Zhang     ierr = MatColoringCreate(C,&coloring);CHKERRQ(ierr);
1205335efc43SPeter Brune     ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr);
1206335efc43SPeter Brune     ierr = MatColoringSetType(coloring,MATCOLORINGSL);CHKERRQ(ierr);
1207335efc43SPeter Brune     ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr);
1208335efc43SPeter Brune     ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr);
1209335efc43SPeter Brune     ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr);
12104222ddf1SHong Zhang     ierr = MatTransposeColoringCreate(C,iscoloring,&matcoloring);CHKERRQ(ierr);
12112205254eSKarl Rupp 
121240192850SHong Zhang     abt->matcoloring = matcoloring;
12132205254eSKarl Rupp 
12142128a86cSHong Zhang     ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr);
12152128a86cSHong Zhang 
12162128a86cSHong Zhang     /* Create Bt_dense and C_dense = A*Bt_dense */
12172128a86cSHong Zhang     ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr);
12182128a86cSHong Zhang     ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr);
12192128a86cSHong Zhang     ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr);
12200298fd71SBarry Smith     ierr = MatSeqDenseSetPreallocation(Bt_dense,NULL);CHKERRQ(ierr);
12212205254eSKarl Rupp 
12222128a86cSHong Zhang     Bt_dense->assembled = PETSC_TRUE;
122340192850SHong Zhang     abt->Bt_den         = Bt_dense;
12242128a86cSHong Zhang 
12252128a86cSHong Zhang     ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr);
12262128a86cSHong Zhang     ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr);
12272128a86cSHong Zhang     ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr);
12280298fd71SBarry Smith     ierr = MatSeqDenseSetPreallocation(C_dense,NULL);CHKERRQ(ierr);
12292205254eSKarl Rupp 
12302128a86cSHong Zhang     Bt_dense->assembled = PETSC_TRUE;
123140192850SHong Zhang     abt->ABt_den  = C_dense;
1232f94ccd6cSHong Zhang 
1233f94ccd6cSHong Zhang #if defined(PETSC_USE_INFO)
12341ce71dffSSatish Balay     {
12354222ddf1SHong Zhang       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
12364222ddf1SHong Zhang       ierr = PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));CHKERRQ(ierr);
12371ce71dffSSatish Balay     }
1238f94ccd6cSHong Zhang #endif
12392128a86cSHong Zhang   }
1240e99be685SHong Zhang   /* clean up */
1241e99be685SHong Zhang   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
1242e99be685SHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr);
12435df89d91SHong Zhang   PetscFunctionReturn(0);
12445df89d91SHong Zhang }
12455df89d91SHong Zhang 
12466fc122caSHong Zhang PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
12475df89d91SHong Zhang {
12485df89d91SHong Zhang   PetscErrorCode      ierr;
12495df89d91SHong Zhang   Mat_SeqAIJ          *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1250e2cac8caSJed Brown   PetscInt            *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
125181d82fe4SHong Zhang   PetscInt            cm   =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
12525df89d91SHong Zhang   PetscLogDouble      flops=0.0;
1253aa1aec99SHong Zhang   MatScalar           *aa  =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
125440192850SHong Zhang   Mat_MatMatTransMult *abt = c->abt;
12555df89d91SHong Zhang 
12565df89d91SHong Zhang   PetscFunctionBegin;
125758ed3ceaSHong Zhang   /* clear old values in C */
125858ed3ceaSHong Zhang   if (!c->a) {
1259580bdb30SBarry Smith     ierr      = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
126058ed3ceaSHong Zhang     c->a      = ca;
126158ed3ceaSHong Zhang     c->free_a = PETSC_TRUE;
126258ed3ceaSHong Zhang   } else {
126358ed3ceaSHong Zhang     ca =  c->a;
1264580bdb30SBarry Smith     ierr = PetscArrayzero(ca,ci[cm]+1);CHKERRQ(ierr);
126558ed3ceaSHong Zhang   }
126658ed3ceaSHong Zhang 
126740192850SHong Zhang   if (abt->usecoloring) {
126840192850SHong Zhang     MatTransposeColoring matcoloring = abt->matcoloring;
126940192850SHong Zhang     Mat                  Bt_dense,C_dense = abt->ABt_den;
1270c8db22f6SHong Zhang 
1271b9af6bddSHong Zhang     /* Get Bt_dense by Apply MatTransposeColoring to B */
127240192850SHong Zhang     Bt_dense = abt->Bt_den;
1273b9af6bddSHong Zhang     ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr);
1274c8db22f6SHong Zhang 
1275c8db22f6SHong Zhang     /* C_dense = A*Bt_dense */
12762128a86cSHong Zhang     ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr);
1277c8db22f6SHong Zhang 
12782128a86cSHong Zhang     /* Recover C from C_dense */
1279b9af6bddSHong Zhang     ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr);
1280c8db22f6SHong Zhang     PetscFunctionReturn(0);
1281c8db22f6SHong Zhang   }
1282c8db22f6SHong Zhang 
12831fa1209cSHong Zhang   for (i=0; i<cm; i++) {
128481d82fe4SHong Zhang     anzi = ai[i+1] - ai[i];
128581d82fe4SHong Zhang     acol = aj + ai[i];
12866973769bSHong Zhang     aval = aa + ai[i];
12871fa1209cSHong Zhang     cnzi = ci[i+1] - ci[i];
12881fa1209cSHong Zhang     ccol = cj + ci[i];
12896973769bSHong Zhang     cval = ca + ci[i];
12901fa1209cSHong Zhang     for (j=0; j<cnzi; j++) {
129181d82fe4SHong Zhang       brow = ccol[j];
129281d82fe4SHong Zhang       bnzj = bi[brow+1] - bi[brow];
129381d82fe4SHong Zhang       bcol = bj + bi[brow];
12946973769bSHong Zhang       bval = ba + bi[brow];
12956973769bSHong Zhang 
129681d82fe4SHong Zhang       /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
129781d82fe4SHong Zhang       nexta = 0; nextb = 0;
129881d82fe4SHong Zhang       while (nexta<anzi && nextb<bnzj) {
12997b6d5e96SMark Adams         while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
130081d82fe4SHong Zhang         if (nexta == anzi) break;
13017b6d5e96SMark Adams         while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
130281d82fe4SHong Zhang         if (nextb == bnzj) break;
130381d82fe4SHong Zhang         if (acol[nexta] == bcol[nextb]) {
13046973769bSHong Zhang           cval[j] += aval[nexta]*bval[nextb];
130581d82fe4SHong Zhang           nexta++; nextb++;
130681d82fe4SHong Zhang           flops += 2;
13071fa1209cSHong Zhang         }
13081fa1209cSHong Zhang       }
130981d82fe4SHong Zhang     }
131081d82fe4SHong Zhang   }
131181d82fe4SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
131281d82fe4SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
131381d82fe4SHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
13145df89d91SHong Zhang   PetscFunctionReturn(0);
13155df89d91SHong Zhang }
13165df89d91SHong Zhang 
13176d373c3eSHong Zhang PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(Mat A)
13186d373c3eSHong Zhang {
13196d373c3eSHong Zhang   PetscErrorCode      ierr;
13206d373c3eSHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
13216d373c3eSHong Zhang   Mat_MatTransMatMult *atb = a->atb;
13226d373c3eSHong Zhang 
13236d373c3eSHong Zhang   PetscFunctionBegin;
13246473ade5SStefano Zampini   if (atb) {
13256d373c3eSHong Zhang     ierr = MatDestroy(&atb->At);CHKERRQ(ierr);
13266473ade5SStefano Zampini     ierr = (*atb->destroy)(A);CHKERRQ(ierr);
13276473ade5SStefano Zampini   }
13286d373c3eSHong Zhang   ierr = PetscFree(atb);CHKERRQ(ierr);
13296d373c3eSHong Zhang   PetscFunctionReturn(0);
13306d373c3eSHong Zhang }
13316d373c3eSHong Zhang 
13324222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
13335df89d91SHong Zhang {
1334bc011b1eSHong Zhang   PetscErrorCode      ierr;
1335bc011b1eSHong Zhang   Mat                 At;
133638baddfdSBarry Smith   PetscInt            *ati,*atj;
13374222ddf1SHong Zhang   Mat_Product         *product = C->product;
13384222ddf1SHong Zhang   MatProductAlgorithm alg;
13394222ddf1SHong Zhang   PetscBool           flg;
1340bc011b1eSHong Zhang 
1341bc011b1eSHong Zhang   PetscFunctionBegin;
13424222ddf1SHong Zhang   if (product) {
13434222ddf1SHong Zhang     alg = product->alg;
13444222ddf1SHong Zhang   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"!product, not supported yet");
13454222ddf1SHong Zhang 
13464222ddf1SHong Zhang   /* outerproduct */
13474222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"outerproduct",&flg);CHKERRQ(ierr);
13484222ddf1SHong Zhang   if (flg) {
1349bc011b1eSHong Zhang     /* create symbolic At */
1350bc011b1eSHong Zhang     ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
13510298fd71SBarry Smith     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);CHKERRQ(ierr);
135233d57670SJed Brown     ierr = MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr);
135304b858e0SBarry Smith     ierr = MatSetType(At,((PetscObject)A)->type_name);CHKERRQ(ierr);
1354bc011b1eSHong Zhang 
1355bc011b1eSHong Zhang     /* get symbolic C=At*B */
1356*7a3c3d58SStefano Zampini     ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr);
1357bc011b1eSHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
1358bc011b1eSHong Zhang 
1359bc011b1eSHong Zhang     /* clean up */
13606bf464f9SBarry Smith     ierr = MatDestroy(&At);CHKERRQ(ierr);
1361bc011b1eSHong Zhang     ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
13626d373c3eSHong Zhang 
13634222ddf1SHong Zhang     C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
1364*7a3c3d58SStefano Zampini     ierr = MatProductSetAlgorithm(C,"outerproduct");CHKERRQ(ierr);
13654222ddf1SHong Zhang     PetscFunctionReturn(0);
13664222ddf1SHong Zhang   }
13674222ddf1SHong Zhang 
13684222ddf1SHong Zhang   /* matmatmult */
13694222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"at*b",&flg);CHKERRQ(ierr);
13704222ddf1SHong Zhang   if (flg) {
13714222ddf1SHong Zhang     Mat_MatTransMatMult *atb;
13724222ddf1SHong Zhang     Mat_SeqAIJ          *c;
13734222ddf1SHong Zhang 
13744222ddf1SHong Zhang     ierr = PetscNew(&atb);CHKERRQ(ierr);
13754222ddf1SHong Zhang     ierr = MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
1376*7a3c3d58SStefano Zampini     ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr);
13774222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
13784222ddf1SHong Zhang 
13794222ddf1SHong Zhang     c               = (Mat_SeqAIJ*)C->data;
13804222ddf1SHong Zhang     c->atb          = atb;
13814222ddf1SHong Zhang     atb->At         = At;
13824222ddf1SHong Zhang     atb->destroy    = C->ops->destroy;
13834222ddf1SHong Zhang     atb->updateAt   = PETSC_FALSE; /* because At is computed here */
13844222ddf1SHong Zhang     C->ops->destroy = MatDestroy_SeqAIJ_MatTransMatMult;
13854222ddf1SHong Zhang 
13864222ddf1SHong Zhang     C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
1387*7a3c3d58SStefano Zampini     ierr = MatProductSetAlgorithm(C,"at*b");CHKERRQ(ierr);
13884222ddf1SHong Zhang     PetscFunctionReturn(0);
13894222ddf1SHong Zhang   }
13904222ddf1SHong Zhang 
13914222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1392bc011b1eSHong Zhang   PetscFunctionReturn(0);
1393bc011b1eSHong Zhang }
1394bc011b1eSHong Zhang 
139575648e8dSHong Zhang PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1396bc011b1eSHong Zhang {
1397bc011b1eSHong Zhang   PetscErrorCode ierr;
13980fbc74f4SHong Zhang   Mat_SeqAIJ     *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1399d0f46423SBarry Smith   PetscInt       am   =A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1400d0f46423SBarry Smith   PetscInt       cm   =C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1401d13dce4bSSatish Balay   PetscLogDouble flops=0.0;
1402aa1aec99SHong Zhang   MatScalar      *aa  =a->a,*ba,*ca,*caj;
1403bc011b1eSHong Zhang 
1404bc011b1eSHong Zhang   PetscFunctionBegin;
1405aa1aec99SHong Zhang   if (!c->a) {
1406580bdb30SBarry Smith     ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
14072205254eSKarl Rupp 
1408aa1aec99SHong Zhang     c->a      = ca;
1409aa1aec99SHong Zhang     c->free_a = PETSC_TRUE;
1410aa1aec99SHong Zhang   } else {
1411aa1aec99SHong Zhang     ca   = c->a;
1412580bdb30SBarry Smith     ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
1413aa1aec99SHong Zhang   }
1414bc011b1eSHong Zhang 
1415bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1416bc011b1eSHong Zhang   for (i=0; i<am; i++) {
1417bc011b1eSHong Zhang     bj   = b->j + bi[i];
1418bc011b1eSHong Zhang     ba   = b->a + bi[i];
1419bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
1420bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
1421bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
1422bc011b1eSHong Zhang       nextb = 0;
14230fbc74f4SHong Zhang       crow  = *aj++;
1424bc011b1eSHong Zhang       cjj   = cj + ci[crow];
1425bc011b1eSHong Zhang       caj   = ca + ci[crow];
1426bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
1427bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
14280fbc74f4SHong Zhang         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
14290fbc74f4SHong Zhang           caj[k] += (*aa)*(*(ba+nextb));
1430bc011b1eSHong Zhang           nextb++;
1431bc011b1eSHong Zhang         }
1432bc011b1eSHong Zhang       }
1433bc011b1eSHong Zhang       flops += 2*bnzi;
14340fbc74f4SHong Zhang       aa++;
1435bc011b1eSHong Zhang     }
1436bc011b1eSHong Zhang   }
1437bc011b1eSHong Zhang 
1438bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
1439bc011b1eSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1440bc011b1eSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441bc011b1eSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1442bc011b1eSHong Zhang   PetscFunctionReturn(0);
1443bc011b1eSHong Zhang }
14449123193aSHong Zhang 
14454222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)
14469123193aSHong Zhang {
14479123193aSHong Zhang   PetscErrorCode ierr;
14489123193aSHong Zhang 
14499123193aSHong Zhang   PetscFunctionBegin;
14505a586d82SBarry Smith   ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr);
14512205254eSKarl Rupp 
14524222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
14539123193aSHong Zhang   PetscFunctionReturn(0);
14549123193aSHong Zhang }
14559123193aSHong Zhang 
145687753ddeSHong Zhang PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
14579123193aSHong Zhang {
1458f32d5d43SBarry Smith   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
1459612438f1SStefano Zampini   Mat_SeqDense      *bd=(Mat_SeqDense*)B->data;
14601ca9667aSStefano Zampini   Mat_SeqDense      *cd=(Mat_SeqDense*)C->data;
14619123193aSHong Zhang   PetscErrorCode    ierr;
14621ca9667aSStefano Zampini   PetscScalar       *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4;
1463a4af7ca8SStefano Zampini   const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av;
1464daf57211SHong Zhang   const PetscInt    *aj;
1465612438f1SStefano Zampini   PetscInt          cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n;
14661ca9667aSStefano Zampini   PetscInt          clda=cd->lda;
14671ca9667aSStefano Zampini   PetscInt          am4=4*clda,bm4=4*bm,col,i,j,n;
14689123193aSHong Zhang 
14699123193aSHong Zhang   PetscFunctionBegin;
1470f32d5d43SBarry Smith   if (!cm || !cn) PetscFunctionReturn(0);
1471a4af7ca8SStefano Zampini   ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
14728c778c55SBarry Smith   ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr);
1473a4af7ca8SStefano Zampini   ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
1474f32d5d43SBarry Smith   b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
14751ca9667aSStefano Zampini   c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda;
14761ca9667aSStefano Zampini   for (col=0; col<(cn/4)*4; col += 4) {  /* over columns of C */
14771ca9667aSStefano Zampini     for (i=0; i<am; i++) {        /* over rows of A in those columns */
1478f32d5d43SBarry Smith       r1 = r2 = r3 = r4 = 0.0;
1479f32d5d43SBarry Smith       n  = a->i[i+1] - a->i[i];
1480f32d5d43SBarry Smith       aj = a->j + a->i[i];
1481a4af7ca8SStefano Zampini       aa = av + a->i[i];
1482f32d5d43SBarry Smith       for (j=0; j<n; j++) {
14831ca9667aSStefano Zampini         const PetscScalar aatmp = aa[j];
14841ca9667aSStefano Zampini         const PetscInt    ajtmp = aj[j];
1485730858b9SHong Zhang         r1 += aatmp*b1[ajtmp];
1486730858b9SHong Zhang         r2 += aatmp*b2[ajtmp];
1487730858b9SHong Zhang         r3 += aatmp*b3[ajtmp];
1488730858b9SHong Zhang         r4 += aatmp*b4[ajtmp];
14899123193aSHong Zhang       }
149087753ddeSHong Zhang       c1[i] += r1;
149187753ddeSHong Zhang       c2[i] += r2;
149287753ddeSHong Zhang       c3[i] += r3;
149387753ddeSHong Zhang       c4[i] += r4;
1494f32d5d43SBarry Smith     }
1495730858b9SHong Zhang     b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4;
1496730858b9SHong Zhang     c1 += am4; c2 += am4; c3 += am4; c4 += am4;
1497f32d5d43SBarry Smith   }
1498f32d5d43SBarry Smith   for (; col<cn; col++) {   /* over extra columns of C */
1499f32d5d43SBarry Smith     for (i=0; i<am; i++) {  /* over rows of C in those columns */
1500f32d5d43SBarry Smith       r1 = 0.0;
1501f32d5d43SBarry Smith       n  = a->i[i+1] - a->i[i];
1502f32d5d43SBarry Smith       aj = a->j + a->i[i];
1503a4af7ca8SStefano Zampini       aa = av + a->i[i];
1504f32d5d43SBarry Smith       for (j=0; j<n; j++) {
1505730858b9SHong Zhang         r1 += aa[j]*b1[aj[j]];
1506f32d5d43SBarry Smith       }
150787753ddeSHong Zhang       c1[i] += r1;
1508f32d5d43SBarry Smith     }
1509f32d5d43SBarry Smith     b1 += bm;
15101ca9667aSStefano Zampini     c1 += clda;
1511f32d5d43SBarry Smith   }
1512dc0b31edSSatish Balay   ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr);
15138c778c55SBarry Smith   ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr);
1514a4af7ca8SStefano Zampini   ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
1515a4af7ca8SStefano Zampini   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
1516f32d5d43SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1517f32d5d43SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1518f32d5d43SBarry Smith   PetscFunctionReturn(0);
1519f32d5d43SBarry Smith }
1520f32d5d43SBarry Smith 
152187753ddeSHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1522f32d5d43SBarry Smith {
1523f32d5d43SBarry Smith   PetscErrorCode ierr;
1524f32d5d43SBarry Smith 
1525f32d5d43SBarry Smith   PetscFunctionBegin;
152687753ddeSHong Zhang   if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n);
152787753ddeSHong Zhang   if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
152887753ddeSHong Zhang   if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);
15294324174eSBarry Smith 
153087753ddeSHong Zhang   ierr = MatZeroEntries(C);CHKERRQ(ierr);
153187753ddeSHong Zhang   ierr = MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C);CHKERRQ(ierr);
15329123193aSHong Zhang   PetscFunctionReturn(0);
15339123193aSHong Zhang }
1534b1683b59SHong Zhang 
15354222ddf1SHong Zhang /* ------------------------------------------------------- */
15364222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
15374222ddf1SHong Zhang {
15384222ddf1SHong Zhang   PetscFunctionBegin;
15394222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
15404222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
15414222ddf1SHong Zhang   /* dense mat may not call MatProductSymbolic(), thus set C->ops->productnumeric here */
15424222ddf1SHong Zhang   C->ops->productnumeric  = MatProductNumeric_AB;
15434222ddf1SHong Zhang   PetscFunctionReturn(0);
15444222ddf1SHong Zhang }
15454222ddf1SHong Zhang 
15464222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
15474222ddf1SHong Zhang {
15484222ddf1SHong Zhang   PetscFunctionBegin;
15494222ddf1SHong Zhang   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqAIJ_SeqDense;
15504222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_AtB;
15514222ddf1SHong Zhang   C->ops->productnumeric           = MatProductNumeric_AtB;
15524222ddf1SHong Zhang   PetscFunctionReturn(0);
15534222ddf1SHong Zhang }
15544222ddf1SHong Zhang 
15554222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
15564222ddf1SHong Zhang {
15574222ddf1SHong Zhang   PetscErrorCode ierr;
15584222ddf1SHong Zhang   Mat_Product    *product = C->product;
15594222ddf1SHong Zhang 
15604222ddf1SHong Zhang   PetscFunctionBegin;
15614222ddf1SHong Zhang   switch (product->type) {
15624222ddf1SHong Zhang   case MATPRODUCT_AB:
15634222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);CHKERRQ(ierr);
15644222ddf1SHong Zhang     break;
15654222ddf1SHong Zhang   case MATPRODUCT_AtB:
15664222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);CHKERRQ(ierr);
15674222ddf1SHong Zhang     break;
15684222ddf1SHong Zhang   case MATPRODUCT_PtAP:
15694222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqDense(C);CHKERRQ(ierr);
15704222ddf1SHong Zhang     break;
15714222ddf1SHong Zhang   default:
15724222ddf1SHong Zhang     /* Use MatProduct_Basic() if there is no specific implementation */
15734222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_Basic;
15744222ddf1SHong Zhang   }
15754222ddf1SHong Zhang   PetscFunctionReturn(0);
15764222ddf1SHong Zhang }
15774222ddf1SHong Zhang /* ------------------------------------------------------- */
15784222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
15794222ddf1SHong Zhang {
15804222ddf1SHong Zhang   PetscErrorCode ierr;
15814222ddf1SHong Zhang   Mat_Product    *product = C->product;
15824222ddf1SHong Zhang   Mat            A = product->A;
15834222ddf1SHong Zhang   PetscBool      baij;
15844222ddf1SHong Zhang 
15854222ddf1SHong Zhang   PetscFunctionBegin;
15864222ddf1SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);CHKERRQ(ierr);
15874222ddf1SHong Zhang   if (!baij) { /* A is seqsbaij */
15884222ddf1SHong Zhang     PetscBool sbaij;
15894222ddf1SHong Zhang     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
15904222ddf1SHong Zhang     if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format");
15914222ddf1SHong Zhang 
15924222ddf1SHong Zhang     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
15934222ddf1SHong Zhang   } else { /* A is seqbaij */
15944222ddf1SHong Zhang     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
15954222ddf1SHong Zhang   }
15964222ddf1SHong Zhang 
15974222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
15984222ddf1SHong Zhang   C->ops->productnumeric  = MatProductNumeric_AB;
15994222ddf1SHong Zhang   PetscFunctionReturn(0);
16004222ddf1SHong Zhang }
16014222ddf1SHong Zhang 
16024222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
16034222ddf1SHong Zhang {
16044222ddf1SHong Zhang   PetscErrorCode ierr;
16054222ddf1SHong Zhang   Mat_Product    *product = C->product;
16064222ddf1SHong Zhang 
16074222ddf1SHong Zhang   PetscFunctionBegin;
16084222ddf1SHong Zhang   if (product->type == MATPRODUCT_AB) {
16094222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);CHKERRQ(ierr);
1610544a5e07SHong Zhang   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqXBAIJ and SeqDense matrices",MatProductTypes[product->type]);
16114222ddf1SHong Zhang   PetscFunctionReturn(0);
16124222ddf1SHong Zhang }
16134222ddf1SHong Zhang /* ------------------------------------------------------- */
16144222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
16154222ddf1SHong Zhang {
16164222ddf1SHong Zhang   PetscFunctionBegin;
16174222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
16184222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
16194222ddf1SHong Zhang   C->ops->productnumeric  = MatProductNumeric_AB;
16204222ddf1SHong Zhang   PetscFunctionReturn(0);
16214222ddf1SHong Zhang }
16224222ddf1SHong Zhang 
16234222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
16244222ddf1SHong Zhang {
16254222ddf1SHong Zhang   PetscErrorCode ierr;
16264222ddf1SHong Zhang   Mat_Product    *product = C->product;
16274222ddf1SHong Zhang 
16284222ddf1SHong Zhang   PetscFunctionBegin;
16294222ddf1SHong Zhang   if (product->type == MATPRODUCT_AB) {
16304222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);CHKERRQ(ierr);
1631544a5e07SHong Zhang   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqDense and SeqAIJ matrices",MatProductTypes[product->type]);
16324222ddf1SHong Zhang   PetscFunctionReturn(0);
16334222ddf1SHong Zhang }
16344222ddf1SHong Zhang /* ------------------------------------------------------- */
16354222ddf1SHong Zhang 
1636b9af6bddSHong Zhang PetscErrorCode  MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1637c8db22f6SHong Zhang {
1638c8db22f6SHong Zhang   PetscErrorCode ierr;
16392f5f1f90SHong Zhang   Mat_SeqAIJ     *b       = (Mat_SeqAIJ*)B->data;
16402f5f1f90SHong Zhang   Mat_SeqDense   *btdense = (Mat_SeqDense*)Btdense->data;
16412f5f1f90SHong Zhang   PetscInt       *bi      = b->i,*bj=b->j;
16422f5f1f90SHong Zhang   PetscInt       m        = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
16432f5f1f90SHong Zhang   MatScalar      *btval,*btval_den,*ba=b->a;
16442f5f1f90SHong Zhang   PetscInt       *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;
1645c8db22f6SHong Zhang 
1646c8db22f6SHong Zhang   PetscFunctionBegin;
16472f5f1f90SHong Zhang   btval_den=btdense->v;
1648580bdb30SBarry Smith   ierr     = PetscArrayzero(btval_den,m*n);CHKERRQ(ierr);
16492f5f1f90SHong Zhang   for (k=0; k<ncolors; k++) {
16502f5f1f90SHong Zhang     ncolumns = coloring->ncolumns[k];
16512f5f1f90SHong Zhang     for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1652d2853540SHong Zhang       col   = *(columns + colorforcol[k] + l);
16532f5f1f90SHong Zhang       btcol = bj + bi[col];
16542f5f1f90SHong Zhang       btval = ba + bi[col];
16552f5f1f90SHong Zhang       anz   = bi[col+1] - bi[col];
1656d2853540SHong Zhang       for (j=0; j<anz; j++) {
16572f5f1f90SHong Zhang         brow            = btcol[j];
16582f5f1f90SHong Zhang         btval_den[brow] = btval[j];
1659c8db22f6SHong Zhang       }
1660c8db22f6SHong Zhang     }
16612f5f1f90SHong Zhang     btval_den += m;
1662c8db22f6SHong Zhang   }
1663c8db22f6SHong Zhang   PetscFunctionReturn(0);
1664c8db22f6SHong Zhang }
1665c8db22f6SHong Zhang 
1666b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
16678972f759SHong Zhang {
16688972f759SHong Zhang   PetscErrorCode    ierr;
1669b2d2b04fSHong Zhang   Mat_SeqAIJ        *csp = (Mat_SeqAIJ*)Csp->data;
16701683a169SBarry Smith   const PetscScalar *ca_den,*ca_den_ptr;
16711683a169SBarry Smith   PetscScalar       *ca=csp->a;
1672f99a636bSHong Zhang   PetscInt          k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors;
1673e88777f2SHong Zhang   PetscInt          brows=matcoloring->brows,*den2sp=matcoloring->den2sp;
1674077f23c2SHong Zhang   PetscInt          nrows,*row,*idx;
1675077f23c2SHong Zhang   PetscInt          *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow;
16768972f759SHong Zhang 
16778972f759SHong Zhang   PetscFunctionBegin;
16781683a169SBarry Smith   ierr   = MatDenseGetArrayRead(Cden,&ca_den);CHKERRQ(ierr);
1679f99a636bSHong Zhang 
1680077f23c2SHong Zhang   if (brows > 0) {
1681077f23c2SHong Zhang     PetscInt *lstart,row_end,row_start;
1682980ae229SHong Zhang     lstart = matcoloring->lstart;
1683580bdb30SBarry Smith     ierr = PetscArrayzero(lstart,ncolors);CHKERRQ(ierr);
1684eeb4fd02SHong Zhang 
1685077f23c2SHong Zhang     row_end = brows;
1686eeb4fd02SHong Zhang     if (row_end > m) row_end = m;
1687077f23c2SHong Zhang     for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */
1688077f23c2SHong Zhang       ca_den_ptr = ca_den;
1689980ae229SHong Zhang       for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */
1690eeb4fd02SHong Zhang         nrows = matcoloring->nrows[k];
1691eeb4fd02SHong Zhang         row   = rows  + colorforrow[k];
1692eeb4fd02SHong Zhang         idx   = den2sp + colorforrow[k];
1693eeb4fd02SHong Zhang         for (l=lstart[k]; l<nrows; l++) {
1694eeb4fd02SHong Zhang           if (row[l] >= row_end) {
1695eeb4fd02SHong Zhang             lstart[k] = l;
1696eeb4fd02SHong Zhang             break;
1697eeb4fd02SHong Zhang           } else {
1698077f23c2SHong Zhang             ca[idx[l]] = ca_den_ptr[row[l]];
1699eeb4fd02SHong Zhang           }
1700eeb4fd02SHong Zhang         }
1701077f23c2SHong Zhang         ca_den_ptr += m;
1702eeb4fd02SHong Zhang       }
1703077f23c2SHong Zhang       row_end += brows;
1704eeb4fd02SHong Zhang       if (row_end > m) row_end = m;
1705eeb4fd02SHong Zhang     }
1706077f23c2SHong Zhang   } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1707077f23c2SHong Zhang     ca_den_ptr = ca_den;
1708077f23c2SHong Zhang     for (k=0; k<ncolors; k++) {
1709077f23c2SHong Zhang       nrows = matcoloring->nrows[k];
1710077f23c2SHong Zhang       row   = rows  + colorforrow[k];
1711077f23c2SHong Zhang       idx   = den2sp + colorforrow[k];
1712077f23c2SHong Zhang       for (l=0; l<nrows; l++) {
1713077f23c2SHong Zhang         ca[idx[l]] = ca_den_ptr[row[l]];
1714077f23c2SHong Zhang       }
1715077f23c2SHong Zhang       ca_den_ptr += m;
1716077f23c2SHong Zhang     }
1717f99a636bSHong Zhang   }
1718f99a636bSHong Zhang 
17191683a169SBarry Smith   ierr = MatDenseRestoreArrayRead(Cden,&ca_den);CHKERRQ(ierr);
1720f99a636bSHong Zhang #if defined(PETSC_USE_INFO)
1721077f23c2SHong Zhang   if (matcoloring->brows > 0) {
1722f99a636bSHong Zhang     ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);CHKERRQ(ierr);
1723e88777f2SHong Zhang   } else {
1724077f23c2SHong Zhang     ierr = PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");CHKERRQ(ierr);
1725e88777f2SHong Zhang   }
1726f99a636bSHong Zhang #endif
17278972f759SHong Zhang   PetscFunctionReturn(0);
17288972f759SHong Zhang }
17298972f759SHong Zhang 
1730b9af6bddSHong Zhang PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1731b1683b59SHong Zhang {
1732b1683b59SHong Zhang   PetscErrorCode ierr;
1733e88777f2SHong Zhang   PetscInt       i,n,nrows,Nbs,j,k,m,ncols,col,cm;
17341a83f524SJed Brown   const PetscInt *is,*ci,*cj,*row_idx;
1735b28d80bdSHong Zhang   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
1736b1683b59SHong Zhang   IS             *isa;
1737b28d80bdSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1738e88777f2SHong Zhang   PetscInt       *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i;
1739e88777f2SHong Zhang   PetscInt       *colorforcol,*columns,*columns_i,brows;
1740e88777f2SHong Zhang   PetscBool      flg;
1741b1683b59SHong Zhang 
1742b1683b59SHong Zhang   PetscFunctionBegin;
1743071fcb05SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);CHKERRQ(ierr);
1744e99be685SHong Zhang 
17457ecccc15SHong Zhang   /* bs >1 is not being tested yet! */
1746e88777f2SHong Zhang   Nbs       = mat->cmap->N/bs;
1747b1683b59SHong Zhang   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
1748e88777f2SHong Zhang   c->N      = Nbs;
1749e88777f2SHong Zhang   c->m      = c->M;
1750b1683b59SHong Zhang   c->rstart = 0;
1751e88777f2SHong Zhang   c->brows  = 100;
1752b1683b59SHong Zhang 
1753b1683b59SHong Zhang   c->ncolors = nis;
1754dcca6d9dSJed Brown   ierr = PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);CHKERRQ(ierr);
1755854ce69bSBarry Smith   ierr = PetscMalloc1(csp->nz+1,&rows);CHKERRQ(ierr);
1756854ce69bSBarry Smith   ierr = PetscMalloc1(csp->nz+1,&den2sp);CHKERRQ(ierr);
1757e88777f2SHong Zhang 
1758e88777f2SHong Zhang   brows = c->brows;
1759c5929fdfSBarry Smith   ierr = PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);CHKERRQ(ierr);
1760e88777f2SHong Zhang   if (flg) c->brows = brows;
1761eeb4fd02SHong Zhang   if (brows > 0) {
1762854ce69bSBarry Smith     ierr = PetscMalloc1(nis+1,&c->lstart);CHKERRQ(ierr);
1763e88777f2SHong Zhang   }
17642205254eSKarl Rupp 
1765d2853540SHong Zhang   colorforrow[0] = 0;
1766d2853540SHong Zhang   rows_i         = rows;
1767f99a636bSHong Zhang   den2sp_i       = den2sp;
1768b1683b59SHong Zhang 
1769854ce69bSBarry Smith   ierr = PetscMalloc1(nis+1,&colorforcol);CHKERRQ(ierr);
1770854ce69bSBarry Smith   ierr = PetscMalloc1(Nbs+1,&columns);CHKERRQ(ierr);
17712205254eSKarl Rupp 
1772d2853540SHong Zhang   colorforcol[0] = 0;
1773d2853540SHong Zhang   columns_i      = columns;
1774d2853540SHong Zhang 
1775077f23c2SHong Zhang   /* get column-wise storage of mat */
1776077f23c2SHong Zhang   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
1777b1683b59SHong Zhang 
1778b28d80bdSHong Zhang   cm   = c->m;
1779854ce69bSBarry Smith   ierr = PetscMalloc1(cm+1,&rowhit);CHKERRQ(ierr);
1780854ce69bSBarry Smith   ierr = PetscMalloc1(cm+1,&idxhit);CHKERRQ(ierr);
1781f99a636bSHong Zhang   for (i=0; i<nis; i++) { /* loop over color */
1782b1683b59SHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
1783b1683b59SHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
17842205254eSKarl Rupp 
1785b1683b59SHong Zhang     c->ncolumns[i] = n;
1786b1683b59SHong Zhang     if (n) {
1787580bdb30SBarry Smith       ierr = PetscArraycpy(columns_i,is,n);CHKERRQ(ierr);
1788b1683b59SHong Zhang     }
1789d2853540SHong Zhang     colorforcol[i+1] = colorforcol[i] + n;
1790d2853540SHong Zhang     columns_i       += n;
1791b1683b59SHong Zhang 
1792b1683b59SHong Zhang     /* fast, crude version requires O(N*N) work */
1793580bdb30SBarry Smith     ierr = PetscArrayzero(rowhit,cm);CHKERRQ(ierr);
1794e99be685SHong Zhang 
1795b7caf3d6SHong Zhang     for (j=0; j<n; j++) { /* loop over columns*/
1796b1683b59SHong Zhang       col     = is[j];
1797d2853540SHong Zhang       row_idx = cj + ci[col];
1798b1683b59SHong Zhang       m       = ci[col+1] - ci[col];
1799b7caf3d6SHong Zhang       for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */
1800e99be685SHong Zhang         idxhit[*row_idx]   = spidx[ci[col] + k];
1801d2853540SHong Zhang         rowhit[*row_idx++] = col + 1;
1802b1683b59SHong Zhang       }
1803b1683b59SHong Zhang     }
1804b1683b59SHong Zhang     /* count the number of hits */
1805b1683b59SHong Zhang     nrows = 0;
1806e8354b3eSHong Zhang     for (j=0; j<cm; j++) {
1807b1683b59SHong Zhang       if (rowhit[j]) nrows++;
1808b1683b59SHong Zhang     }
1809b1683b59SHong Zhang     c->nrows[i]      = nrows;
1810d2853540SHong Zhang     colorforrow[i+1] = colorforrow[i] + nrows;
1811d2853540SHong Zhang 
1812b1683b59SHong Zhang     nrows = 0;
1813b7caf3d6SHong Zhang     for (j=0; j<cm; j++) { /* loop over rows */
1814b1683b59SHong Zhang       if (rowhit[j]) {
1815d2853540SHong Zhang         rows_i[nrows]   = j;
181612b89a43SHong Zhang         den2sp_i[nrows] = idxhit[j];
1817b1683b59SHong Zhang         nrows++;
1818b1683b59SHong Zhang       }
1819b1683b59SHong Zhang     }
1820e88777f2SHong Zhang     den2sp_i += nrows;
1821077f23c2SHong Zhang 
1822b1683b59SHong Zhang     ierr    = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
1823f99a636bSHong Zhang     rows_i += nrows;
1824b1683b59SHong Zhang   }
18250298fd71SBarry Smith   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
1826b28d80bdSHong Zhang   ierr = PetscFree(rowhit);CHKERRQ(ierr);
1827071fcb05SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);CHKERRQ(ierr);
1828d2853540SHong Zhang   if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);
1829b28d80bdSHong Zhang 
1830d2853540SHong Zhang   c->colorforrow = colorforrow;
1831d2853540SHong Zhang   c->rows        = rows;
1832f99a636bSHong Zhang   c->den2sp      = den2sp;
1833d2853540SHong Zhang   c->colorforcol = colorforcol;
1834d2853540SHong Zhang   c->columns     = columns;
18352205254eSKarl Rupp 
1836f94ccd6cSHong Zhang   ierr = PetscFree(idxhit);CHKERRQ(ierr);
1837b1683b59SHong Zhang   PetscFunctionReturn(0);
1838b1683b59SHong Zhang }
1839d013fc79Sandi selinger 
18404222ddf1SHong Zhang /* --------------------------------------------------------------- */
18414222ddf1SHong Zhang static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1842df97dc6dSFande Kong {
18434222ddf1SHong Zhang   PetscErrorCode ierr;
18444222ddf1SHong Zhang   Mat_Product    *product = C->product;
18454222ddf1SHong Zhang   Mat            A=product->A,B=product->B;
18464222ddf1SHong Zhang 
1847df97dc6dSFande Kong   PetscFunctionBegin;
18484222ddf1SHong Zhang   if (C->ops->mattransposemultnumeric) {
18494222ddf1SHong Zhang     /* Alg: "outerproduct" */
18504222ddf1SHong Zhang     ierr = (C->ops->mattransposemultnumeric)(A,B,C);CHKERRQ(ierr);
18514222ddf1SHong Zhang   } else {
18524222ddf1SHong Zhang     /* Alg: "matmatmult" -- C = At*B */
18534222ddf1SHong Zhang     Mat_SeqAIJ          *c = (Mat_SeqAIJ*)C->data;
18544222ddf1SHong Zhang     Mat_MatTransMatMult *atb = c->atb;
18554222ddf1SHong Zhang     Mat                 At = atb->At;
18564222ddf1SHong Zhang 
18574222ddf1SHong Zhang     if (atb->updateAt) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */
18584222ddf1SHong Zhang       ierr = MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);CHKERRQ(ierr);
18594222ddf1SHong Zhang     }
18604222ddf1SHong Zhang     ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(At,B,C);CHKERRQ(ierr);
18614222ddf1SHong Zhang     atb->updateAt = PETSC_TRUE;
18624222ddf1SHong Zhang   }
1863df97dc6dSFande Kong   PetscFunctionReturn(0);
1864df97dc6dSFande Kong }
1865df97dc6dSFande Kong 
18664222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1867d013fc79Sandi selinger {
1868d013fc79Sandi selinger   PetscErrorCode ierr;
18694222ddf1SHong Zhang   Mat_Product    *product = C->product;
18704222ddf1SHong Zhang   Mat            A=product->A,B=product->B;
18714222ddf1SHong Zhang   PetscReal      fill=product->fill;
1872d013fc79Sandi selinger 
1873d013fc79Sandi selinger   PetscFunctionBegin;
18744222ddf1SHong Zhang   ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
18752869b61bSandi selinger 
18764222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
18774222ddf1SHong Zhang   PetscFunctionReturn(0);
18782869b61bSandi selinger }
1879d013fc79Sandi selinger 
18804222ddf1SHong Zhang /* --------------------------------------------------------------- */
18814222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
18824222ddf1SHong Zhang {
18834222ddf1SHong Zhang   PetscErrorCode ierr;
18844222ddf1SHong Zhang   Mat_Product    *product = C->product;
18854222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
18864222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
18874222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
18884222ddf1SHong Zhang   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
18894222ddf1SHong Zhang   PetscInt       nalg = 7;
18904222ddf1SHong Zhang #else
18914222ddf1SHong Zhang   const char     *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"};
18924222ddf1SHong Zhang   PetscInt       nalg = 8;
18934222ddf1SHong Zhang #endif
18944222ddf1SHong Zhang 
18954222ddf1SHong Zhang   PetscFunctionBegin;
18964222ddf1SHong Zhang   /* Set default algorithm */
18974222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
18984222ddf1SHong Zhang   if (flg) {
18994222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
1900d013fc79Sandi selinger   }
1901d013fc79Sandi selinger 
19024222ddf1SHong Zhang   /* Get runtime option */
19034222ddf1SHong Zhang   if (product->api_user) {
19044222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
19054222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
19064222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19074222ddf1SHong Zhang   } else {
19084222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
19094222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
19104222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1911d013fc79Sandi selinger   }
19124222ddf1SHong Zhang   if (flg) {
19134222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
1914d013fc79Sandi selinger   }
1915d013fc79Sandi selinger 
19164222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
19174222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
19184222ddf1SHong Zhang   PetscFunctionReturn(0);
19194222ddf1SHong Zhang }
1920d013fc79Sandi selinger 
19214222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
19224222ddf1SHong Zhang {
19234222ddf1SHong Zhang   PetscErrorCode ierr;
19244222ddf1SHong Zhang   Mat_Product    *product = C->product;
19254222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
19264222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
19274222ddf1SHong Zhang   const char     *algTypes[2] = {"at*b","outerproduct"};
19284222ddf1SHong Zhang   PetscInt       nalg = 2;
1929d013fc79Sandi selinger 
19304222ddf1SHong Zhang   PetscFunctionBegin;
19314222ddf1SHong Zhang   /* Set default algorithm */
19324222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
19334222ddf1SHong Zhang   if (flg) {
19344222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19354222ddf1SHong Zhang   }
1936d013fc79Sandi selinger 
19374222ddf1SHong Zhang   /* Get runtime option */
19384222ddf1SHong Zhang   if (product->api_user) {
19394222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
19404222ddf1SHong Zhang     ierr = PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19414222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19424222ddf1SHong Zhang   } else {
19434222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
19444222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19454222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19464222ddf1SHong Zhang   }
19474222ddf1SHong Zhang   if (flg) {
19484222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19494222ddf1SHong Zhang   }
1950d013fc79Sandi selinger 
19514222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
19524222ddf1SHong Zhang   PetscFunctionReturn(0);
19534222ddf1SHong Zhang }
19544222ddf1SHong Zhang 
19554222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
19564222ddf1SHong Zhang {
19574222ddf1SHong Zhang   PetscErrorCode ierr;
19584222ddf1SHong Zhang   Mat_Product    *product = C->product;
19594222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
19604222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
19614222ddf1SHong Zhang   const char     *algTypes[2] = {"default","color"};
19624222ddf1SHong Zhang   PetscInt       nalg = 2;
19634222ddf1SHong Zhang 
19644222ddf1SHong Zhang   PetscFunctionBegin;
19654222ddf1SHong Zhang   /* Set default algorithm */
19664222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
19674222ddf1SHong Zhang   if (!flg) {
19684222ddf1SHong Zhang     alg = 1;
19694222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19704222ddf1SHong Zhang   }
19714222ddf1SHong Zhang 
19724222ddf1SHong Zhang   /* Get runtime option */
19734222ddf1SHong Zhang   if (product->api_user) {
19744222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
19754222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19764222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19774222ddf1SHong Zhang   } else {
19784222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
19794222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19804222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19814222ddf1SHong Zhang   }
19824222ddf1SHong Zhang   if (flg) {
19834222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19844222ddf1SHong Zhang   }
19854222ddf1SHong Zhang 
19864222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
19874222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
19884222ddf1SHong Zhang   PetscFunctionReturn(0);
19894222ddf1SHong Zhang }
19904222ddf1SHong Zhang 
19914222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
19924222ddf1SHong Zhang {
19934222ddf1SHong Zhang   PetscErrorCode ierr;
19944222ddf1SHong Zhang   Mat_Product    *product = C->product;
19954222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
19964222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm -- alg=1 should be default!!! */
19974222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
19984222ddf1SHong Zhang   const char      *algTypes[2] = {"scalable","rap"};
19994222ddf1SHong Zhang   PetscInt        nalg = 2;
20004222ddf1SHong Zhang #else
20014222ddf1SHong Zhang   const char      *algTypes[3] = {"scalable","rap","hypre"};
20024222ddf1SHong Zhang   PetscInt        nalg = 3;
20034222ddf1SHong Zhang #endif
20044222ddf1SHong Zhang 
20054222ddf1SHong Zhang   PetscFunctionBegin;
20064222ddf1SHong Zhang   /* Set default algorithm */
20074222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
20084222ddf1SHong Zhang   if (flg) {
20094222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20104222ddf1SHong Zhang   }
20114222ddf1SHong Zhang 
20124222ddf1SHong Zhang   /* Get runtime option */
20134222ddf1SHong Zhang   if (product->api_user) {
20144222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
20154222ddf1SHong Zhang     ierr = PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20164222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20174222ddf1SHong Zhang   } else {
20184222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
20194222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20204222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20214222ddf1SHong Zhang   }
20224222ddf1SHong Zhang   if (flg) {
20234222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20244222ddf1SHong Zhang   }
20254222ddf1SHong Zhang 
20264222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
20274222ddf1SHong Zhang   PetscFunctionReturn(0);
20284222ddf1SHong Zhang }
20294222ddf1SHong Zhang 
20304222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
20314222ddf1SHong Zhang {
20324222ddf1SHong Zhang   PetscErrorCode ierr;
20334222ddf1SHong Zhang   Mat_Product    *product = C->product;
20344222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
20354222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
20364222ddf1SHong Zhang   const char     *algTypes[3] = {"r*a*rt","r*art","coloring_rart"};
20374222ddf1SHong Zhang   PetscInt        nalg = 3;
20384222ddf1SHong Zhang 
20394222ddf1SHong Zhang   PetscFunctionBegin;
20404222ddf1SHong Zhang   /* Set default algorithm */
20414222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
20424222ddf1SHong Zhang   if (flg) {
20434222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20444222ddf1SHong Zhang   }
20454222ddf1SHong Zhang 
20464222ddf1SHong Zhang   /* Get runtime option */
20474222ddf1SHong Zhang   if (product->api_user) {
20484222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");CHKERRQ(ierr);
20494222ddf1SHong Zhang     ierr = PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20504222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20514222ddf1SHong Zhang   } else {
20524222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");CHKERRQ(ierr);
20534222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20544222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20554222ddf1SHong Zhang   }
20564222ddf1SHong Zhang   if (flg) {
20574222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20584222ddf1SHong Zhang   }
20594222ddf1SHong Zhang 
20604222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
20614222ddf1SHong Zhang   PetscFunctionReturn(0);
20624222ddf1SHong Zhang }
20634222ddf1SHong Zhang 
20644222ddf1SHong Zhang /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
20654222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
20664222ddf1SHong Zhang {
20674222ddf1SHong Zhang   PetscErrorCode ierr;
20684222ddf1SHong Zhang   Mat_Product    *product = C->product;
20694222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
20704222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
20714222ddf1SHong Zhang   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
20724222ddf1SHong Zhang   PetscInt       nalg = 7;
20734222ddf1SHong Zhang 
20744222ddf1SHong Zhang   PetscFunctionBegin;
20754222ddf1SHong Zhang   /* Set default algorithm */
20764222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
20774222ddf1SHong Zhang   if (flg) {
20784222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20794222ddf1SHong Zhang   }
20804222ddf1SHong Zhang 
20814222ddf1SHong Zhang   /* Get runtime option */
20824222ddf1SHong Zhang   if (product->api_user) {
20834222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");CHKERRQ(ierr);
20844222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
20854222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20864222ddf1SHong Zhang   } else {
20874222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");CHKERRQ(ierr);
20884222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
20894222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20904222ddf1SHong Zhang   }
20914222ddf1SHong Zhang   if (flg) {
20924222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20934222ddf1SHong Zhang   }
20944222ddf1SHong Zhang 
20954222ddf1SHong Zhang   C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
20964222ddf1SHong Zhang   C->ops->productsymbolic    = MatProductSymbolic_ABC;
20974222ddf1SHong Zhang   PetscFunctionReturn(0);
20984222ddf1SHong Zhang }
20994222ddf1SHong Zhang 
21004222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
21014222ddf1SHong Zhang {
21024222ddf1SHong Zhang   PetscErrorCode ierr;
21034222ddf1SHong Zhang   Mat_Product    *product = C->product;
21044222ddf1SHong Zhang 
21054222ddf1SHong Zhang   PetscFunctionBegin;
21064222ddf1SHong Zhang   switch (product->type) {
21074222ddf1SHong Zhang   case MATPRODUCT_AB:
21084222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_AB(C);CHKERRQ(ierr);
21094222ddf1SHong Zhang     break;
21104222ddf1SHong Zhang   case MATPRODUCT_AtB:
21114222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_AtB(C);CHKERRQ(ierr);
21124222ddf1SHong Zhang     break;
21134222ddf1SHong Zhang   case MATPRODUCT_ABt:
21144222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_ABt(C);CHKERRQ(ierr);
21154222ddf1SHong Zhang     break;
21164222ddf1SHong Zhang   case MATPRODUCT_PtAP:
21174222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_PtAP(C);CHKERRQ(ierr);
21184222ddf1SHong Zhang     break;
21194222ddf1SHong Zhang   case MATPRODUCT_RARt:
21204222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_RARt(C);CHKERRQ(ierr);
21214222ddf1SHong Zhang     break;
21224222ddf1SHong Zhang   case MATPRODUCT_ABC:
21234222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_ABC(C);CHKERRQ(ierr);
21244222ddf1SHong Zhang     break;
2125544a5e07SHong Zhang   default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqAIJ and SeqAIJ matrices",MatProductTypes[product->type]);
21264222ddf1SHong Zhang   }
2127d013fc79Sandi selinger   PetscFunctionReturn(0);
2128d013fc79Sandi selinger }
2129