xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 544a5e07e08964aab8a0b04553d4936e9399006f)
1be1d678aSKris Buschelman 
2d50806bdSBarry Smith /*
32499ec78SHong Zhang   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4d50806bdSBarry Smith           C = A * B
5d50806bdSBarry Smith */
6d50806bdSBarry Smith 
7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
9c6db04a5SJed Brown #include <petscbt.h>
10af0996ceSBarry Smith #include <petsc/private/isimpl.h>
1107475bc1SBarry Smith #include <../src/mat/impls/dense/seq/dense.h>
127bab7c10SHong Zhang 
13df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
14df97dc6dSFande Kong {
15df97dc6dSFande Kong   PetscErrorCode ierr;
16df97dc6dSFande Kong 
17df97dc6dSFande Kong   PetscFunctionBegin;
18df97dc6dSFande Kong   if (C->ops->matmultnumeric) {
19df97dc6dSFande Kong     ierr = (*C->ops->matmultnumeric)(A,B,C);CHKERRQ(ierr);
20df97dc6dSFande Kong   } else {
21df97dc6dSFande Kong     ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);CHKERRQ(ierr);
22df97dc6dSFande Kong   }
23df97dc6dSFande Kong   PetscFunctionReturn(0);
24df97dc6dSFande Kong }
25df97dc6dSFande Kong 
264222ddf1SHong Zhang /* Modified from MatCreateSeqAIJWithArrays() */
274222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat mat)
28df97dc6dSFande Kong {
29df97dc6dSFande Kong   PetscErrorCode ierr;
304222ddf1SHong Zhang   PetscInt       ii;
314222ddf1SHong Zhang   Mat_SeqAIJ     *aij;
325c66b693SKris Buschelman 
335c66b693SKris Buschelman   PetscFunctionBegin;
344222ddf1SHong Zhang   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
354222ddf1SHong Zhang   ierr = MatSetSizes(mat,m,n,m,n);CHKERRQ(ierr);
364222ddf1SHong Zhang 
374222ddf1SHong Zhang   ierr = MatSetType(mat,MATSEQAIJ);CHKERRQ(ierr);
384222ddf1SHong Zhang   ierr = MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
394222ddf1SHong Zhang   aij  = (Mat_SeqAIJ*)(mat)->data;
404222ddf1SHong Zhang   ierr = PetscMalloc1(m,&aij->imax);CHKERRQ(ierr);
414222ddf1SHong Zhang   ierr = PetscMalloc1(m,&aij->ilen);CHKERRQ(ierr);
424222ddf1SHong Zhang 
434222ddf1SHong Zhang   aij->i            = i;
444222ddf1SHong Zhang   aij->j            = j;
454222ddf1SHong Zhang   aij->a            = a;
464222ddf1SHong Zhang   aij->singlemalloc = PETSC_FALSE;
474222ddf1SHong Zhang   aij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
484222ddf1SHong Zhang   aij->free_a       = PETSC_FALSE;
494222ddf1SHong Zhang   aij->free_ij      = PETSC_FALSE;
504222ddf1SHong Zhang 
514222ddf1SHong Zhang   for (ii=0; ii<m; ii++) {
524222ddf1SHong Zhang     aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii];
5325023028SHong Zhang   }
544222ddf1SHong Zhang 
554222ddf1SHong Zhang   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
564222ddf1SHong Zhang   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
575c66b693SKris Buschelman   PetscFunctionReturn(0);
585c66b693SKris Buschelman }
591c24bd37SHong Zhang 
604222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
614222ddf1SHong Zhang {
624222ddf1SHong Zhang   PetscErrorCode      ierr;
634222ddf1SHong Zhang   Mat_Product         *product = C->product;
644222ddf1SHong Zhang   MatProductAlgorithm alg;
654222ddf1SHong Zhang   PetscBool           flg;
664222ddf1SHong Zhang 
674222ddf1SHong Zhang   PetscFunctionBegin;
684222ddf1SHong Zhang   if (product) {
694222ddf1SHong Zhang     alg = product->alg;
704222ddf1SHong Zhang   } else {
714222ddf1SHong Zhang     alg = "sorted";
724222ddf1SHong Zhang   }
734222ddf1SHong Zhang 
744222ddf1SHong Zhang   /* sorted */
754222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"sorted",&flg);CHKERRQ(ierr);
764222ddf1SHong Zhang   if (flg) {
774222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);CHKERRQ(ierr);
784222ddf1SHong Zhang     PetscFunctionReturn(0);
794222ddf1SHong Zhang   }
804222ddf1SHong Zhang 
814222ddf1SHong Zhang   /* scalable */
824222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"scalable",&flg);CHKERRQ(ierr);
834222ddf1SHong Zhang   if (flg) {
844222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr);
854222ddf1SHong Zhang     PetscFunctionReturn(0);
864222ddf1SHong Zhang   }
874222ddf1SHong Zhang 
884222ddf1SHong Zhang   /* scalable_fast */
894222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"scalable_fast",&flg);CHKERRQ(ierr);
904222ddf1SHong Zhang   if (flg) {
914222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr);
924222ddf1SHong Zhang     PetscFunctionReturn(0);
934222ddf1SHong Zhang   }
944222ddf1SHong Zhang 
954222ddf1SHong Zhang   /* heap */
964222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"heap",&flg);CHKERRQ(ierr);
974222ddf1SHong Zhang   if (flg) {
984222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr);
994222ddf1SHong Zhang     PetscFunctionReturn(0);
1004222ddf1SHong Zhang   }
1014222ddf1SHong Zhang 
1024222ddf1SHong Zhang   /* btheap */
1034222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"btheap",&flg);CHKERRQ(ierr);
1044222ddf1SHong Zhang   if (flg) {
1054222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr);
1064222ddf1SHong Zhang     PetscFunctionReturn(0);
1074222ddf1SHong Zhang   }
1084222ddf1SHong Zhang 
1094222ddf1SHong Zhang   /* llcondensed */
1104222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"llcondensed",&flg);CHKERRQ(ierr);
1114222ddf1SHong Zhang   if (flg) {
1124222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);CHKERRQ(ierr);
1134222ddf1SHong Zhang     PetscFunctionReturn(0);
1144222ddf1SHong Zhang   }
1154222ddf1SHong Zhang 
1164222ddf1SHong Zhang   /* rowmerge */
1174222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"rowmerge",&flg);CHKERRQ(ierr);
1184222ddf1SHong Zhang   if (flg) {
1194222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);CHKERRQ(ierr);
1204222ddf1SHong Zhang     PetscFunctionReturn(0);
1214222ddf1SHong Zhang   }
1224222ddf1SHong Zhang 
1234222ddf1SHong Zhang #if defined(PETSC_HAVE_HYPRE)
1244222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"hypre",&flg);CHKERRQ(ierr);
1254222ddf1SHong Zhang   if (flg) {
1264222ddf1SHong Zhang     ierr = MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);CHKERRQ(ierr);
1274222ddf1SHong Zhang     PetscFunctionReturn(0);
1284222ddf1SHong Zhang   }
1294222ddf1SHong Zhang #endif
1304222ddf1SHong Zhang 
1314222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1324222ddf1SHong Zhang   PetscFunctionReturn(0);
1334222ddf1SHong Zhang }
1344222ddf1SHong Zhang 
1354222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C)
136b561aa9dSHong Zhang {
137b561aa9dSHong Zhang   PetscErrorCode     ierr;
138b561aa9dSHong Zhang   Mat_SeqAIJ         *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
139971236abSHong Zhang   PetscInt           *ai=a->i,*bi=b->i,*ci,*cj;
140c1ba5575SJed Brown   PetscInt           am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
141b561aa9dSHong Zhang   PetscReal          afill;
142eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
143eca6b86aSHong Zhang   PetscTable         ta;
144fb908031SHong Zhang   PetscBT            lnkbt;
1450298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
146b561aa9dSHong Zhang 
147b561aa9dSHong Zhang   PetscFunctionBegin;
148bd958071SHong Zhang   /* Get ci and cj */
149bd958071SHong Zhang   /*---------------*/
150fb908031SHong Zhang   /* Allocate ci array, arrays for fill computation and */
151fb908031SHong Zhang   /* free space for accumulating nonzero column info */
152854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
153fb908031SHong Zhang   ci[0] = 0;
154fb908031SHong Zhang 
155fb908031SHong Zhang   /* create and initialize a linked list */
156c373ccc6SHong Zhang   ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr);
157c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
158eca6b86aSHong Zhang   ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr);
159eca6b86aSHong Zhang   ierr = PetscTableDestroy(&ta);CHKERRQ(ierr);
160eca6b86aSHong Zhang 
161eca6b86aSHong Zhang   ierr = PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);CHKERRQ(ierr);
162fb908031SHong Zhang 
163fb908031SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
164f91af8c7SBarry Smith   ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
1652205254eSKarl Rupp 
166fb908031SHong Zhang   current_space = free_space;
167fb908031SHong Zhang 
168fb908031SHong Zhang   /* Determine ci and cj */
169fb908031SHong Zhang   for (i=0; i<am; i++) {
170fb908031SHong Zhang     anzi = ai[i+1] - ai[i];
171fb908031SHong Zhang     aj   = a->j + ai[i];
172fb908031SHong Zhang     for (j=0; j<anzi; j++) {
173fb908031SHong Zhang       brow = aj[j];
174fb908031SHong Zhang       bnzj = bi[brow+1] - bi[brow];
175fb908031SHong Zhang       bj   = b->j + bi[brow];
176fb908031SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
177fb908031SHong Zhang       ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr);
178fb908031SHong Zhang     }
179fb908031SHong Zhang     cnzi = lnk[0];
180fb908031SHong Zhang 
181fb908031SHong Zhang     /* If free space is not available, make more free space */
182fb908031SHong Zhang     /* Double the amount of total space in the list */
183fb908031SHong Zhang     if (current_space->local_remaining<cnzi) {
184f91af8c7SBarry Smith       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
185fb908031SHong Zhang       ndouble++;
186fb908031SHong Zhang     }
187fb908031SHong Zhang 
188fb908031SHong Zhang     /* Copy data into free space, then initialize lnk */
189fb908031SHong Zhang     ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr);
1902205254eSKarl Rupp 
191fb908031SHong Zhang     current_space->array           += cnzi;
192fb908031SHong Zhang     current_space->local_used      += cnzi;
193fb908031SHong Zhang     current_space->local_remaining -= cnzi;
1942205254eSKarl Rupp 
195fb908031SHong Zhang     ci[i+1] = ci[i] + cnzi;
196fb908031SHong Zhang   }
197fb908031SHong Zhang 
198fb908031SHong Zhang   /* Column indices are in the list of free space */
199fb908031SHong Zhang   /* Allocate space for cj, initialize cj, and */
200fb908031SHong Zhang   /* destroy list of free space and other temporary array(s) */
201854ce69bSBarry Smith   ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr);
202fb908031SHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
203fb908031SHong Zhang   ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr);
204b561aa9dSHong Zhang 
20526be0446SHong Zhang   /* put together the new symbolic matrix */
2064222ddf1SHong Zhang   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);CHKERRQ(ierr);
2074222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
2084222ddf1SHong Zhang   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
20958c24d83SHong Zhang 
21058c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
21158c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
2124222ddf1SHong Zhang   c                         = (Mat_SeqAIJ*)(C->data);
213aa1aec99SHong Zhang   c->free_a                 = PETSC_FALSE;
214e6b907acSBarry Smith   c->free_ij                = PETSC_TRUE;
21558c24d83SHong Zhang   c->nonew                  = 0;
2164222ddf1SHong Zhang 
2174222ddf1SHong Zhang   /* fast, needs non-scalable O(bn) array 'abdense' */
2184222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
2190b7e3e3dSHong Zhang 
2207212da7cSHong Zhang   /* set MatInfo */
2217212da7cSHong Zhang   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
222f2b054eeSHong Zhang   if (afill < 1.0) afill = 1.0;
2237212da7cSHong Zhang   c->maxnz                     = ci[am];
2247212da7cSHong Zhang   c->nz                        = ci[am];
2254222ddf1SHong Zhang   C->info.mallocs           = ndouble;
2264222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
2274222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
2287212da7cSHong Zhang 
2297212da7cSHong Zhang #if defined(PETSC_USE_INFO)
2307212da7cSHong Zhang   if (ci[am]) {
2314222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
2324222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
233f2b054eeSHong Zhang   } else {
2344222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
235be0fcf8dSHong Zhang   }
236f2b054eeSHong Zhang #endif
23758c24d83SHong Zhang   PetscFunctionReturn(0);
23858c24d83SHong Zhang }
239d50806bdSBarry Smith 
240df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C)
241d50806bdSBarry Smith {
242dfbe8321SBarry Smith   PetscErrorCode ierr;
243d13dce4bSSatish Balay   PetscLogDouble flops=0.0;
244d50806bdSBarry Smith   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
245d50806bdSBarry Smith   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
246d50806bdSBarry Smith   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
24738baddfdSBarry Smith   PetscInt       *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
248c58ca2e3SHong Zhang   PetscInt       am   =A->rmap->n,cm=C->rmap->n;
249519eb980SHong Zhang   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
250aa1aec99SHong Zhang   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca,valtmp;
251aa1aec99SHong Zhang   PetscScalar    *ab_dense;
252d50806bdSBarry Smith 
253d50806bdSBarry Smith   PetscFunctionBegin;
254aa1aec99SHong Zhang   if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
255854ce69bSBarry Smith     ierr      = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
256aa1aec99SHong Zhang     c->a      = ca;
257aa1aec99SHong Zhang     c->free_a = PETSC_TRUE;
258aa1aec99SHong Zhang   } else {
259aa1aec99SHong Zhang     ca        = c->a;
260d90d86d0SMatthew G. Knepley   }
261d90d86d0SMatthew G. Knepley   if (!c->matmult_abdense) {
2621795a4d1SJed Brown     ierr = PetscCalloc1(B->cmap->N,&ab_dense);CHKERRQ(ierr);
263d90d86d0SMatthew G. Knepley     c->matmult_abdense = ab_dense;
264d90d86d0SMatthew G. Knepley   } else {
265aa1aec99SHong Zhang     ab_dense = c->matmult_abdense;
266aa1aec99SHong Zhang   }
267aa1aec99SHong Zhang 
268c124e916SHong Zhang   /* clean old values in C */
269580bdb30SBarry Smith   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
270d50806bdSBarry Smith   /* Traverse A row-wise. */
271d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
272d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
273d50806bdSBarry Smith   for (i=0; i<am; i++) {
274d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
275d50806bdSBarry Smith     for (j=0; j<anzi; j++) {
276519eb980SHong Zhang       brow = aj[j];
277d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
278d50806bdSBarry Smith       bjj  = bj + bi[brow];
279d50806bdSBarry Smith       baj  = ba + bi[brow];
280519eb980SHong Zhang       /* perform dense axpy */
28136ec6d2dSHong Zhang       valtmp = aa[j];
282519eb980SHong Zhang       for (k=0; k<bnzi; k++) {
28336ec6d2dSHong Zhang         ab_dense[bjj[k]] += valtmp*baj[k];
284519eb980SHong Zhang       }
285519eb980SHong Zhang       flops += 2*bnzi;
286519eb980SHong Zhang     }
287c58ca2e3SHong Zhang     aj += anzi; aa += anzi;
288c58ca2e3SHong Zhang 
289c58ca2e3SHong Zhang     cnzi = ci[i+1] - ci[i];
290519eb980SHong Zhang     for (k=0; k<cnzi; k++) {
291519eb980SHong Zhang       ca[k]          += ab_dense[cj[k]];
292519eb980SHong Zhang       ab_dense[cj[k]] = 0.0; /* zero ab_dense */
293519eb980SHong Zhang     }
294519eb980SHong Zhang     flops += cnzi;
295519eb980SHong Zhang     cj    += cnzi; ca += cnzi;
296519eb980SHong Zhang   }
297c58ca2e3SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
298c58ca2e3SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
299c58ca2e3SHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
300c58ca2e3SHong Zhang   PetscFunctionReturn(0);
301c58ca2e3SHong Zhang }
302c58ca2e3SHong Zhang 
30325023028SHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
304c58ca2e3SHong Zhang {
305c58ca2e3SHong Zhang   PetscErrorCode ierr;
306c58ca2e3SHong Zhang   PetscLogDouble flops=0.0;
307c58ca2e3SHong Zhang   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
308c58ca2e3SHong Zhang   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
309c58ca2e3SHong Zhang   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
310c58ca2e3SHong Zhang   PetscInt       *ai  = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
311c58ca2e3SHong Zhang   PetscInt       am   = A->rmap->N,cm=C->rmap->N;
312c58ca2e3SHong Zhang   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
31336ec6d2dSHong Zhang   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
314c58ca2e3SHong Zhang   PetscInt       nextb;
315c58ca2e3SHong Zhang 
316c58ca2e3SHong Zhang   PetscFunctionBegin;
317cf742fccSHong Zhang   if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
318cf742fccSHong Zhang     ierr      = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
319cf742fccSHong Zhang     c->a      = ca;
320cf742fccSHong Zhang     c->free_a = PETSC_TRUE;
321cf742fccSHong Zhang   }
322cf742fccSHong Zhang 
323c58ca2e3SHong Zhang   /* clean old values in C */
324580bdb30SBarry Smith   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
325c58ca2e3SHong Zhang   /* Traverse A row-wise. */
326c58ca2e3SHong Zhang   /* Build the ith row in C by summing over nonzero columns in A, */
327c58ca2e3SHong Zhang   /* the rows of B corresponding to nonzeros of A. */
328519eb980SHong Zhang   for (i=0; i<am; i++) {
329519eb980SHong Zhang     anzi = ai[i+1] - ai[i];
330519eb980SHong Zhang     cnzi = ci[i+1] - ci[i];
331519eb980SHong Zhang     for (j=0; j<anzi; j++) {
332519eb980SHong Zhang       brow = aj[j];
333519eb980SHong Zhang       bnzi = bi[brow+1] - bi[brow];
334519eb980SHong Zhang       bjj  = bj + bi[brow];
335519eb980SHong Zhang       baj  = ba + bi[brow];
336519eb980SHong Zhang       /* perform sparse axpy */
33736ec6d2dSHong Zhang       valtmp = aa[j];
338c124e916SHong Zhang       nextb  = 0;
339c124e916SHong Zhang       for (k=0; nextb<bnzi; k++) {
340c124e916SHong Zhang         if (cj[k] == bjj[nextb]) { /* ccol == bcol */
34136ec6d2dSHong Zhang           ca[k] += valtmp*baj[nextb++];
342c124e916SHong Zhang         }
343d50806bdSBarry Smith       }
344d50806bdSBarry Smith       flops += 2*bnzi;
345d50806bdSBarry Smith     }
346519eb980SHong Zhang     aj += anzi; aa += anzi;
347519eb980SHong Zhang     cj += cnzi; ca += cnzi;
348d50806bdSBarry Smith   }
349c58ca2e3SHong Zhang 
350716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
351716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
353d50806bdSBarry Smith   PetscFunctionReturn(0);
354d50806bdSBarry Smith }
355bc011b1eSHong Zhang 
3564222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C)
35725296bd5SBarry Smith {
35825296bd5SBarry Smith   PetscErrorCode     ierr;
35925296bd5SBarry Smith   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
36025296bd5SBarry Smith   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
3613c50cad2SHong Zhang   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
36225296bd5SBarry Smith   MatScalar          *ca;
36325296bd5SBarry Smith   PetscReal          afill;
364eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
365eca6b86aSHong Zhang   PetscTable         ta;
3660298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
36725296bd5SBarry Smith 
36825296bd5SBarry Smith   PetscFunctionBegin;
3693c50cad2SHong Zhang   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
3703c50cad2SHong Zhang   /*-----------------------------------------------------------------------------------------*/
3713c50cad2SHong Zhang   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
372854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
3733c50cad2SHong Zhang   ci[0] = 0;
3743c50cad2SHong Zhang 
3753c50cad2SHong Zhang   /* create and initialize a linked list */
376c373ccc6SHong Zhang   ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr);
377c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
378eca6b86aSHong Zhang   ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr);
379eca6b86aSHong Zhang   ierr = PetscTableDestroy(&ta);CHKERRQ(ierr);
380eca6b86aSHong Zhang 
381eca6b86aSHong Zhang   ierr = PetscLLCondensedCreate_fast(Crmax,&lnk);CHKERRQ(ierr);
3823c50cad2SHong Zhang 
3833c50cad2SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
384f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
3853c50cad2SHong Zhang   current_space = free_space;
3863c50cad2SHong Zhang 
3873c50cad2SHong Zhang   /* Determine ci and cj */
3883c50cad2SHong Zhang   for (i=0; i<am; i++) {
3893c50cad2SHong Zhang     anzi = ai[i+1] - ai[i];
3903c50cad2SHong Zhang     aj   = a->j + ai[i];
3913c50cad2SHong Zhang     for (j=0; j<anzi; j++) {
3923c50cad2SHong Zhang       brow = aj[j];
3933c50cad2SHong Zhang       bnzj = bi[brow+1] - bi[brow];
3943c50cad2SHong Zhang       bj   = b->j + bi[brow];
3953c50cad2SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
3963c50cad2SHong Zhang       ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr);
3973c50cad2SHong Zhang     }
3983c50cad2SHong Zhang     cnzi = lnk[1];
3993c50cad2SHong Zhang 
4003c50cad2SHong Zhang     /* If free space is not available, make more free space */
4013c50cad2SHong Zhang     /* Double the amount of total space in the list */
4023c50cad2SHong Zhang     if (current_space->local_remaining<cnzi) {
403f91af8c7SBarry Smith       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4043c50cad2SHong Zhang       ndouble++;
4053c50cad2SHong Zhang     }
4063c50cad2SHong Zhang 
4073c50cad2SHong Zhang     /* Copy data into free space, then initialize lnk */
4083c50cad2SHong Zhang     ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr);
4092205254eSKarl Rupp 
4103c50cad2SHong Zhang     current_space->array           += cnzi;
4113c50cad2SHong Zhang     current_space->local_used      += cnzi;
4123c50cad2SHong Zhang     current_space->local_remaining -= cnzi;
4132205254eSKarl Rupp 
4143c50cad2SHong Zhang     ci[i+1] = ci[i] + cnzi;
4153c50cad2SHong Zhang   }
4163c50cad2SHong Zhang 
4173c50cad2SHong Zhang   /* Column indices are in the list of free space */
4183c50cad2SHong Zhang   /* Allocate space for cj, initialize cj, and */
4193c50cad2SHong Zhang   /* destroy list of free space and other temporary array(s) */
420854ce69bSBarry Smith   ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr);
4213c50cad2SHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
4223c50cad2SHong Zhang   ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr);
42325296bd5SBarry Smith 
42425296bd5SBarry Smith   /* Allocate space for ca */
425580bdb30SBarry Smith   ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr);
42625296bd5SBarry Smith 
42725296bd5SBarry Smith   /* put together the new symbolic matrix */
4284222ddf1SHong Zhang   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);CHKERRQ(ierr);
4294222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
4304222ddf1SHong Zhang   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
43125296bd5SBarry Smith 
43225296bd5SBarry Smith   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
43325296bd5SBarry Smith   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
4344222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
43525296bd5SBarry Smith   c->free_a  = PETSC_TRUE;
43625296bd5SBarry Smith   c->free_ij = PETSC_TRUE;
43725296bd5SBarry Smith   c->nonew   = 0;
4382205254eSKarl Rupp 
4394222ddf1SHong Zhang   /* slower, less memory */
4404222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
44125296bd5SBarry Smith 
44225296bd5SBarry Smith   /* set MatInfo */
44325296bd5SBarry Smith   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
44425296bd5SBarry Smith   if (afill < 1.0) afill = 1.0;
44525296bd5SBarry Smith   c->maxnz                     = ci[am];
44625296bd5SBarry Smith   c->nz                        = ci[am];
4474222ddf1SHong Zhang   C->info.mallocs           = ndouble;
4484222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
4494222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
45025296bd5SBarry Smith 
45125296bd5SBarry Smith #if defined(PETSC_USE_INFO)
45225296bd5SBarry Smith   if (ci[am]) {
4534222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
4544222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
45525296bd5SBarry Smith   } else {
4564222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
45725296bd5SBarry Smith   }
45825296bd5SBarry Smith #endif
45925296bd5SBarry Smith   PetscFunctionReturn(0);
46025296bd5SBarry Smith }
46125296bd5SBarry Smith 
4624222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C)
463e9e4536cSHong Zhang {
464e9e4536cSHong Zhang   PetscErrorCode     ierr;
465e9e4536cSHong Zhang   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
466bf9555e6SHong Zhang   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
46725c41797SHong Zhang   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
468e9e4536cSHong Zhang   MatScalar          *ca;
469e9e4536cSHong Zhang   PetscReal          afill;
470eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
471eca6b86aSHong Zhang   PetscTable         ta;
4720298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
473e9e4536cSHong Zhang 
474e9e4536cSHong Zhang   PetscFunctionBegin;
475bd958071SHong Zhang   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
476bd958071SHong Zhang   /*---------------------------------------------------------------------------------------------*/
477bd958071SHong Zhang   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
478854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
479bd958071SHong Zhang   ci[0] = 0;
480bd958071SHong Zhang 
481bd958071SHong Zhang   /* create and initialize a linked list */
482c373ccc6SHong Zhang   ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr);
483c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
484eca6b86aSHong Zhang   ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr);
485eca6b86aSHong Zhang   ierr = PetscTableDestroy(&ta);CHKERRQ(ierr);
486eca6b86aSHong Zhang   ierr = PetscLLCondensedCreate_Scalable(Crmax,&lnk);CHKERRQ(ierr);
487bd958071SHong Zhang 
488bd958071SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
489f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
490bd958071SHong Zhang   current_space = free_space;
491bd958071SHong Zhang 
492bd958071SHong Zhang   /* Determine ci and cj */
493bd958071SHong Zhang   for (i=0; i<am; i++) {
494bd958071SHong Zhang     anzi = ai[i+1] - ai[i];
495bd958071SHong Zhang     aj   = a->j + ai[i];
496bd958071SHong Zhang     for (j=0; j<anzi; j++) {
497bd958071SHong Zhang       brow = aj[j];
498bd958071SHong Zhang       bnzj = bi[brow+1] - bi[brow];
499bd958071SHong Zhang       bj   = b->j + bi[brow];
500bd958071SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
501bd958071SHong Zhang       ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr);
502bd958071SHong Zhang     }
503bd958071SHong Zhang     cnzi = lnk[0];
504bd958071SHong Zhang 
505bd958071SHong Zhang     /* If free space is not available, make more free space */
506bd958071SHong Zhang     /* Double the amount of total space in the list */
507bd958071SHong Zhang     if (current_space->local_remaining<cnzi) {
508f91af8c7SBarry Smith       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
509bd958071SHong Zhang       ndouble++;
510bd958071SHong Zhang     }
511bd958071SHong Zhang 
512bd958071SHong Zhang     /* Copy data into free space, then initialize lnk */
513bd958071SHong Zhang     ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr);
5142205254eSKarl Rupp 
515bd958071SHong Zhang     current_space->array           += cnzi;
516bd958071SHong Zhang     current_space->local_used      += cnzi;
517bd958071SHong Zhang     current_space->local_remaining -= cnzi;
5182205254eSKarl Rupp 
519bd958071SHong Zhang     ci[i+1] = ci[i] + cnzi;
520bd958071SHong Zhang   }
521bd958071SHong Zhang 
522bd958071SHong Zhang   /* Column indices are in the list of free space */
523bd958071SHong Zhang   /* Allocate space for cj, initialize cj, and */
524bd958071SHong Zhang   /* destroy list of free space and other temporary array(s) */
525854ce69bSBarry Smith   ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr);
526bd958071SHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
527bd958071SHong Zhang   ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr);
528e9e4536cSHong Zhang 
529e9e4536cSHong Zhang   /* Allocate space for ca */
530bd958071SHong Zhang   /*-----------------------*/
531580bdb30SBarry Smith   ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr);
532e9e4536cSHong Zhang 
533e9e4536cSHong Zhang   /* put together the new symbolic matrix */
5344222ddf1SHong Zhang   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);CHKERRQ(ierr);
5354222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5364222ddf1SHong Zhang   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
537e9e4536cSHong Zhang 
538e9e4536cSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
539e9e4536cSHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
5404222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
541e9e4536cSHong Zhang   c->free_a  = PETSC_TRUE;
542e9e4536cSHong Zhang   c->free_ij = PETSC_TRUE;
543e9e4536cSHong Zhang   c->nonew   = 0;
5442205254eSKarl Rupp 
5454222ddf1SHong Zhang   /* slower, less memory */
5464222ddf1SHong Zhang   C->ops->matmultnumeric    = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
547e9e4536cSHong Zhang 
548e9e4536cSHong Zhang   /* set MatInfo */
549e9e4536cSHong Zhang   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
550e9e4536cSHong Zhang   if (afill < 1.0) afill = 1.0;
551e9e4536cSHong Zhang   c->maxnz                     = ci[am];
552e9e4536cSHong Zhang   c->nz                        = ci[am];
5534222ddf1SHong Zhang   C->info.mallocs           = ndouble;
5544222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
5554222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
556e9e4536cSHong Zhang 
557e9e4536cSHong Zhang #if defined(PETSC_USE_INFO)
558e9e4536cSHong Zhang   if (ci[am]) {
5594222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
5604222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
561e9e4536cSHong Zhang   } else {
5624222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
563e9e4536cSHong Zhang   }
564e9e4536cSHong Zhang #endif
565e9e4536cSHong Zhang   PetscFunctionReturn(0);
566e9e4536cSHong Zhang }
567e9e4536cSHong Zhang 
5684222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C)
5690ced3a2bSJed Brown {
5700ced3a2bSJed Brown   PetscErrorCode     ierr;
5710ced3a2bSJed Brown   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
5720ced3a2bSJed Brown   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
5730ced3a2bSJed Brown   PetscInt           *ci,*cj,*bb;
5740ced3a2bSJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
5750ced3a2bSJed Brown   PetscReal          afill;
5760ced3a2bSJed Brown   PetscInt           i,j,col,ndouble = 0;
5770298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
5780ced3a2bSJed Brown   PetscHeap          h;
5790ced3a2bSJed Brown 
5800ced3a2bSJed Brown   PetscFunctionBegin;
581cd093f1dSJed Brown   /* Get ci and cj - by merging sorted rows using a heap */
5820ced3a2bSJed Brown   /*---------------------------------------------------------------------------------------------*/
5830ced3a2bSJed Brown   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
584854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
5850ced3a2bSJed Brown   ci[0] = 0;
5860ced3a2bSJed Brown 
5870ced3a2bSJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
588f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
5890ced3a2bSJed Brown   current_space = free_space;
5900ced3a2bSJed Brown 
5910ced3a2bSJed Brown   ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr);
592785e854fSJed Brown   ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr);
5930ced3a2bSJed Brown 
5940ced3a2bSJed Brown   /* Determine ci and cj */
5950ced3a2bSJed Brown   for (i=0; i<am; i++) {
5960ced3a2bSJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
5970ced3a2bSJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
5980ced3a2bSJed Brown     ci[i+1] = ci[i];
5990ced3a2bSJed Brown     /* Populate the min heap */
6000ced3a2bSJed Brown     for (j=0; j<anzi; j++) {
6010ced3a2bSJed Brown       bb[j] = bi[acol[j]];         /* bb points at the start of the row */
6020ced3a2bSJed Brown       if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
6030ced3a2bSJed Brown         ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr);
6040ced3a2bSJed Brown       }
6050ced3a2bSJed Brown     }
6060ced3a2bSJed Brown     /* Pick off the min element, adding it to free space */
6070ced3a2bSJed Brown     ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
6080ced3a2bSJed Brown     while (j >= 0) {
6090ced3a2bSJed Brown       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
610f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);CHKERRQ(ierr);
6110ced3a2bSJed Brown         ndouble++;
6120ced3a2bSJed Brown       }
6130ced3a2bSJed Brown       *(current_space->array++) = col;
6140ced3a2bSJed Brown       current_space->local_used++;
6150ced3a2bSJed Brown       current_space->local_remaining--;
6160ced3a2bSJed Brown       ci[i+1]++;
6170ced3a2bSJed Brown 
6180ced3a2bSJed Brown       /* stash if anything else remains in this row of B */
6190ced3a2bSJed Brown       if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr);}
6200ced3a2bSJed Brown       while (1) {               /* pop and stash any other rows of B that also had an entry in this column */
6210ced3a2bSJed Brown         PetscInt j2,col2;
6220ced3a2bSJed Brown         ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr);
6230ced3a2bSJed Brown         if (col2 != col) break;
6240ced3a2bSJed Brown         ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr);
6250ced3a2bSJed Brown         if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr);}
6260ced3a2bSJed Brown       }
6270ced3a2bSJed Brown       /* Put any stashed elements back into the min heap */
6280ced3a2bSJed Brown       ierr = PetscHeapUnstash(h);CHKERRQ(ierr);
6290ced3a2bSJed Brown       ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
6300ced3a2bSJed Brown     }
6310ced3a2bSJed Brown   }
6320ced3a2bSJed Brown   ierr = PetscFree(bb);CHKERRQ(ierr);
6330ced3a2bSJed Brown   ierr = PetscHeapDestroy(&h);CHKERRQ(ierr);
6340ced3a2bSJed Brown 
6350ced3a2bSJed Brown   /* Column indices are in the list of free space */
6360ced3a2bSJed Brown   /* Allocate space for cj, initialize cj, and */
6370ced3a2bSJed Brown   /* destroy list of free space and other temporary array(s) */
638785e854fSJed Brown   ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr);
6390ced3a2bSJed Brown   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
6400ced3a2bSJed Brown 
6410ced3a2bSJed Brown   /* put together the new symbolic matrix */
6424222ddf1SHong Zhang   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);CHKERRQ(ierr);
6434222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6444222ddf1SHong Zhang   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6450ced3a2bSJed Brown 
6460ced3a2bSJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6470ced3a2bSJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
6484222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
6490ced3a2bSJed Brown   c->free_a  = PETSC_TRUE;
6500ced3a2bSJed Brown   c->free_ij = PETSC_TRUE;
6510ced3a2bSJed Brown   c->nonew   = 0;
65226fbe8dcSKarl Rupp 
6534222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
6540ced3a2bSJed Brown 
6550ced3a2bSJed Brown   /* set MatInfo */
6560ced3a2bSJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
6570ced3a2bSJed Brown   if (afill < 1.0) afill = 1.0;
6580ced3a2bSJed Brown   c->maxnz                     = ci[am];
6590ced3a2bSJed Brown   c->nz                        = ci[am];
6604222ddf1SHong Zhang   C->info.mallocs           = ndouble;
6614222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
6624222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
6630ced3a2bSJed Brown 
6640ced3a2bSJed Brown #if defined(PETSC_USE_INFO)
6650ced3a2bSJed Brown   if (ci[am]) {
6664222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
6674222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
6680ced3a2bSJed Brown   } else {
6694222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
6700ced3a2bSJed Brown   }
6710ced3a2bSJed Brown #endif
6720ced3a2bSJed Brown   PetscFunctionReturn(0);
6730ced3a2bSJed Brown }
674e9e4536cSHong Zhang 
6754222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C)
6768a07c6f1SJed Brown {
6778a07c6f1SJed Brown   PetscErrorCode     ierr;
6788a07c6f1SJed Brown   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
6798a07c6f1SJed Brown   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
6808a07c6f1SJed Brown   PetscInt           *ci,*cj,*bb;
6818a07c6f1SJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
6828a07c6f1SJed Brown   PetscReal          afill;
6838a07c6f1SJed Brown   PetscInt           i,j,col,ndouble = 0;
6840298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
6858a07c6f1SJed Brown   PetscHeap          h;
6868a07c6f1SJed Brown   PetscBT            bt;
6878a07c6f1SJed Brown 
6888a07c6f1SJed Brown   PetscFunctionBegin;
689cd093f1dSJed Brown   /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
6908a07c6f1SJed Brown   /*---------------------------------------------------------------------------------------------*/
6918a07c6f1SJed Brown   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
692854ce69bSBarry Smith   ierr  = PetscMalloc1(am+2,&ci);CHKERRQ(ierr);
6938a07c6f1SJed Brown   ci[0] = 0;
6948a07c6f1SJed Brown 
6958a07c6f1SJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
696f91af8c7SBarry Smith   ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr);
6972205254eSKarl Rupp 
6988a07c6f1SJed Brown   current_space = free_space;
6998a07c6f1SJed Brown 
7008a07c6f1SJed Brown   ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr);
701785e854fSJed Brown   ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr);
7028a07c6f1SJed Brown   ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr);
7038a07c6f1SJed Brown 
7048a07c6f1SJed Brown   /* Determine ci and cj */
7058a07c6f1SJed Brown   for (i=0; i<am; i++) {
7068a07c6f1SJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
7078a07c6f1SJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
7088a07c6f1SJed Brown     const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
7098a07c6f1SJed Brown     ci[i+1] = ci[i];
7108a07c6f1SJed Brown     /* Populate the min heap */
7118a07c6f1SJed Brown     for (j=0; j<anzi; j++) {
7128a07c6f1SJed Brown       PetscInt brow = acol[j];
7138a07c6f1SJed Brown       for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
7148a07c6f1SJed Brown         PetscInt bcol = bj[bb[j]];
7158a07c6f1SJed Brown         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
7168a07c6f1SJed Brown           ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr);
7178a07c6f1SJed Brown           bb[j]++;
7188a07c6f1SJed Brown           break;
7198a07c6f1SJed Brown         }
7208a07c6f1SJed Brown       }
7218a07c6f1SJed Brown     }
7228a07c6f1SJed Brown     /* Pick off the min element, adding it to free space */
7238a07c6f1SJed Brown     ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
7248a07c6f1SJed Brown     while (j >= 0) {
7258a07c6f1SJed Brown       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
7260298fd71SBarry Smith         fptr = NULL;                      /* need PetscBTMemzero */
727f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);CHKERRQ(ierr);
7288a07c6f1SJed Brown         ndouble++;
7298a07c6f1SJed Brown       }
7308a07c6f1SJed Brown       *(current_space->array++) = col;
7318a07c6f1SJed Brown       current_space->local_used++;
7328a07c6f1SJed Brown       current_space->local_remaining--;
7338a07c6f1SJed Brown       ci[i+1]++;
7348a07c6f1SJed Brown 
7358a07c6f1SJed Brown       /* stash if anything else remains in this row of B */
7368a07c6f1SJed Brown       for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
7378a07c6f1SJed Brown         PetscInt bcol = bj[bb[j]];
7388a07c6f1SJed Brown         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
7398a07c6f1SJed Brown           ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr);
7408a07c6f1SJed Brown           bb[j]++;
7418a07c6f1SJed Brown           break;
7428a07c6f1SJed Brown         }
7438a07c6f1SJed Brown       }
7448a07c6f1SJed Brown       ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr);
7458a07c6f1SJed Brown     }
7468a07c6f1SJed Brown     if (fptr) {                 /* Clear the bits for this row */
7478a07c6f1SJed Brown       for (; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr);}
7488a07c6f1SJed Brown     } else {                    /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
7498a07c6f1SJed Brown       ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr);
7508a07c6f1SJed Brown     }
7518a07c6f1SJed Brown   }
7528a07c6f1SJed Brown   ierr = PetscFree(bb);CHKERRQ(ierr);
7538a07c6f1SJed Brown   ierr = PetscHeapDestroy(&h);CHKERRQ(ierr);
7548a07c6f1SJed Brown   ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
7558a07c6f1SJed Brown 
7568a07c6f1SJed Brown   /* Column indices are in the list of free space */
7578a07c6f1SJed Brown   /* Allocate space for cj, initialize cj, and */
7588a07c6f1SJed Brown   /* destroy list of free space and other temporary array(s) */
759785e854fSJed Brown   ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr);
7608a07c6f1SJed Brown   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
7618a07c6f1SJed Brown 
7628a07c6f1SJed Brown   /* put together the new symbolic matrix */
7634222ddf1SHong Zhang   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);CHKERRQ(ierr);
7644222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
7654222ddf1SHong Zhang   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
7668a07c6f1SJed Brown 
7678a07c6f1SJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
7688a07c6f1SJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
7694222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
7708a07c6f1SJed Brown   c->free_a  = PETSC_TRUE;
7718a07c6f1SJed Brown   c->free_ij = PETSC_TRUE;
7728a07c6f1SJed Brown   c->nonew   = 0;
77326fbe8dcSKarl Rupp 
7744222ddf1SHong Zhang   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
7758a07c6f1SJed Brown 
7768a07c6f1SJed Brown   /* set MatInfo */
7778a07c6f1SJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
7788a07c6f1SJed Brown   if (afill < 1.0) afill = 1.0;
7798a07c6f1SJed Brown   c->maxnz                     = ci[am];
7808a07c6f1SJed Brown   c->nz                        = ci[am];
7814222ddf1SHong Zhang   C->info.mallocs           = ndouble;
7824222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
7834222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
7848a07c6f1SJed Brown 
7858a07c6f1SJed Brown #if defined(PETSC_USE_INFO)
7868a07c6f1SJed Brown   if (ci[am]) {
7874222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
7884222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
7898a07c6f1SJed Brown   } else {
7904222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
7918a07c6f1SJed Brown   }
7928a07c6f1SJed Brown #endif
7938a07c6f1SJed Brown   PetscFunctionReturn(0);
7948a07c6f1SJed Brown }
7958a07c6f1SJed Brown 
796d7ed1a4dSandi selinger 
7974222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C)
798d7ed1a4dSandi selinger {
799d7ed1a4dSandi selinger   PetscErrorCode     ierr;
800d7ed1a4dSandi selinger   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
801d7ed1a4dSandi selinger   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1;
802d7ed1a4dSandi selinger   PetscInt           *ci,*cj,*outputj,worki_L1[9],worki_L2[9];
803d7ed1a4dSandi selinger   PetscInt           c_maxmem,a_maxrownnz=0,a_rownnz;
804d7ed1a4dSandi selinger   const PetscInt     workcol[8]={0,1,2,3,4,5,6,7};
805d7ed1a4dSandi selinger   const PetscInt     am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
806d7ed1a4dSandi selinger   const PetscInt     *brow_ptr[8],*brow_end[8];
807d7ed1a4dSandi selinger   PetscInt           window[8];
808d7ed1a4dSandi selinger   PetscInt           window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows;
809d7ed1a4dSandi selinger   PetscInt           i,k,ndouble=0,L1_rowsleft,rowsleft;
810d7ed1a4dSandi selinger   PetscReal          afill;
811f83700f2Sandi selinger   PetscInt           *workj_L1,*workj_L2,*workj_L3;
8127660c4dbSandi selinger   PetscInt           L1_nnz,L2_nnz;
813d7ed1a4dSandi selinger 
814d7ed1a4dSandi selinger   /* Step 1: Get upper bound on memory required for allocation.
815d7ed1a4dSandi selinger              Because of the way virtual memory works,
816d7ed1a4dSandi selinger              only the memory pages that are actually needed will be physically allocated. */
817d7ed1a4dSandi selinger   PetscFunctionBegin;
818d7ed1a4dSandi selinger   ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr);
819d7ed1a4dSandi selinger   for (i=0; i<am; i++) {
820d7ed1a4dSandi selinger     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
821d7ed1a4dSandi selinger     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
822d7ed1a4dSandi selinger     a_rownnz = 0;
823d7ed1a4dSandi selinger     for (k=0; k<anzi; ++k) {
824d7ed1a4dSandi selinger       a_rownnz += bi[acol[k]+1] - bi[acol[k]];
825d7ed1a4dSandi selinger       if (a_rownnz > bn) {
826d7ed1a4dSandi selinger         a_rownnz = bn;
827d7ed1a4dSandi selinger         break;
828d7ed1a4dSandi selinger       }
829d7ed1a4dSandi selinger     }
830d7ed1a4dSandi selinger     a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
831d7ed1a4dSandi selinger   }
832d7ed1a4dSandi selinger   /* temporary work areas for merging rows */
833d7ed1a4dSandi selinger   ierr = PetscMalloc1(a_maxrownnz*8,&workj_L1);CHKERRQ(ierr);
834f83700f2Sandi selinger   ierr = PetscMalloc1(a_maxrownnz*8,&workj_L2);CHKERRQ(ierr);
835f83700f2Sandi selinger   ierr = PetscMalloc1(a_maxrownnz,&workj_L3);CHKERRQ(ierr);
836d7ed1a4dSandi selinger 
8377660c4dbSandi selinger   /* This should be enough for almost all matrices. If not, memory is reallocated later. */
8387660c4dbSandi selinger   c_maxmem = 8*(ai[am]+bi[bm]);
839d7ed1a4dSandi selinger   /* Step 2: Populate pattern for C */
840d7ed1a4dSandi selinger   ierr  = PetscMalloc1(c_maxmem,&cj);CHKERRQ(ierr);
841d7ed1a4dSandi selinger 
842d7ed1a4dSandi selinger   ci_nnz       = 0;
843d7ed1a4dSandi selinger   ci[0]        = 0;
844d7ed1a4dSandi selinger   worki_L1[0]  = 0;
845d7ed1a4dSandi selinger   worki_L2[0]  = 0;
846d7ed1a4dSandi selinger   for (i=0; i<am; i++) {
847d7ed1a4dSandi selinger     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
848d7ed1a4dSandi selinger     const PetscInt *acol = aj + ai[i];      /* column indices of nonzero entries in this row */
849d7ed1a4dSandi selinger     rowsleft             = anzi;
850d7ed1a4dSandi selinger     inputcol_L1          = acol;
851d7ed1a4dSandi selinger     L2_nnz               = 0;
8527660c4dbSandi selinger     L2_nrows             = 1;  /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1   */
8537660c4dbSandi selinger     worki_L2[1]          = 0;
85408fe1b3cSKarl Rupp     outputi_nnz          = 0;
855d7ed1a4dSandi selinger 
856d7ed1a4dSandi selinger     /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem  -> allocate more memory */
857d7ed1a4dSandi selinger     while (ci_nnz+a_maxrownnz > c_maxmem) {
858d7ed1a4dSandi selinger       c_maxmem *= 2;
859d7ed1a4dSandi selinger       ndouble++;
860d7ed1a4dSandi selinger       ierr = PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);CHKERRQ(ierr);
861d7ed1a4dSandi selinger     }
862d7ed1a4dSandi selinger 
863d7ed1a4dSandi selinger     while (rowsleft) {
864d7ed1a4dSandi selinger       L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
865d7ed1a4dSandi selinger       L1_nrows    = 0;
8667660c4dbSandi selinger       L1_nnz      = 0;
867d7ed1a4dSandi selinger       inputcol    = inputcol_L1;
868d7ed1a4dSandi selinger       inputi      = bi;
869d7ed1a4dSandi selinger       inputj      = bj;
870d7ed1a4dSandi selinger 
871d7ed1a4dSandi selinger       /* The following macro is used to specialize for small rows in A.
872d7ed1a4dSandi selinger          This helps with compiler unrolling, improving performance substantially.
873f83700f2Sandi selinger           Input:  inputj   inputi  inputcol  bn
874d7ed1a4dSandi selinger           Output: outputj  outputi_nnz                       */
875d7ed1a4dSandi selinger        #define MatMatMultSymbolic_RowMergeMacro(ANNZ)                        \
876d7ed1a4dSandi selinger          window_min  = bn;                                                   \
8777660c4dbSandi selinger          outputi_nnz = 0;                                                    \
8787660c4dbSandi selinger          for (k=0; k<ANNZ; ++k) {                                            \
879d7ed1a4dSandi selinger            brow_ptr[k] = inputj + inputi[inputcol[k]];                       \
880d7ed1a4dSandi selinger            brow_end[k] = inputj + inputi[inputcol[k]+1];                     \
881d7ed1a4dSandi selinger            window[k]   = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn;   \
882d7ed1a4dSandi selinger            window_min  = PetscMin(window[k], window_min);                    \
883d7ed1a4dSandi selinger          }                                                                   \
884d7ed1a4dSandi selinger          while (window_min < bn) {                                           \
885d7ed1a4dSandi selinger            outputj[outputi_nnz++] = window_min;                              \
886d7ed1a4dSandi selinger            /* advance front and compute new minimum */                       \
887d7ed1a4dSandi selinger            old_window_min = window_min;                                      \
888d7ed1a4dSandi selinger            window_min = bn;                                                  \
889d7ed1a4dSandi selinger            for (k=0; k<ANNZ; ++k) {                                          \
890d7ed1a4dSandi selinger              if (window[k] == old_window_min) {                              \
891d7ed1a4dSandi selinger                brow_ptr[k]++;                                                \
892d7ed1a4dSandi selinger                window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
893d7ed1a4dSandi selinger              }                                                               \
894d7ed1a4dSandi selinger              window_min = PetscMin(window[k], window_min);                   \
895d7ed1a4dSandi selinger            }                                                                 \
896d7ed1a4dSandi selinger          }
897d7ed1a4dSandi selinger 
898d7ed1a4dSandi selinger       /************** L E V E L  1 ***************/
899d7ed1a4dSandi selinger       /* Merge up to 8 rows of B to L1 work array*/
900d7ed1a4dSandi selinger       while (L1_rowsleft) {
9017660c4dbSandi selinger         outputi_nnz = 0;
9027660c4dbSandi selinger         if (anzi > 8)  outputj = workj_L1 + L1_nnz;     /* Level 1 rowmerge*/
9037660c4dbSandi selinger         else           outputj = cj + ci_nnz;           /* Merge directly to C */
9047660c4dbSandi selinger 
905d7ed1a4dSandi selinger         switch (L1_rowsleft) {
906d7ed1a4dSandi selinger         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
907d7ed1a4dSandi selinger                  brow_end[0] = inputj + inputi[inputcol[0]+1];
908d7ed1a4dSandi selinger                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
909d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
910d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
911d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
912d7ed1a4dSandi selinger                  break;
913d7ed1a4dSandi selinger         case 2:  MatMatMultSymbolic_RowMergeMacro(2);
914d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
915d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
916d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
917d7ed1a4dSandi selinger                  break;
918d7ed1a4dSandi selinger         case 3: MatMatMultSymbolic_RowMergeMacro(3);
919d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
920d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
921d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
922d7ed1a4dSandi selinger                  break;
923d7ed1a4dSandi selinger         case 4:  MatMatMultSymbolic_RowMergeMacro(4);
924d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
925d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
926d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
927d7ed1a4dSandi selinger                  break;
928d7ed1a4dSandi selinger         case 5:  MatMatMultSymbolic_RowMergeMacro(5);
929d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
930d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
931d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
932d7ed1a4dSandi selinger                  break;
933d7ed1a4dSandi selinger         case 6:  MatMatMultSymbolic_RowMergeMacro(6);
934d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
935d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
936d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
937d7ed1a4dSandi selinger                  break;
938d7ed1a4dSandi selinger         case 7:  MatMatMultSymbolic_RowMergeMacro(7);
939d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
940d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
941d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
942d7ed1a4dSandi selinger                  break;
943d7ed1a4dSandi selinger         default: MatMatMultSymbolic_RowMergeMacro(8);
944d7ed1a4dSandi selinger                  inputcol    += 8;
945d7ed1a4dSandi selinger                  rowsleft    -= 8;
946d7ed1a4dSandi selinger                  L1_rowsleft -= 8;
947d7ed1a4dSandi selinger                  break;
948d7ed1a4dSandi selinger         }
949d7ed1a4dSandi selinger         inputcol_L1           = inputcol;
9507660c4dbSandi selinger         L1_nnz               += outputi_nnz;
9517660c4dbSandi selinger         worki_L1[++L1_nrows]  = L1_nnz;
952d7ed1a4dSandi selinger       }
953d7ed1a4dSandi selinger 
954d7ed1a4dSandi selinger       /********************** L E V E L  2 ************************/
955d7ed1a4dSandi selinger       /* Merge from L1 work array to either C or to L2 work array */
956d7ed1a4dSandi selinger       if (anzi > 8) {
957d7ed1a4dSandi selinger         inputi      = worki_L1;
958d7ed1a4dSandi selinger         inputj      = workj_L1;
959d7ed1a4dSandi selinger         inputcol    = workcol;
960d7ed1a4dSandi selinger         outputi_nnz = 0;
961d7ed1a4dSandi selinger 
962d7ed1a4dSandi selinger         if (anzi <= 64) outputj = cj + ci_nnz;        /* Merge from L1 work array to C */
963d7ed1a4dSandi selinger         else            outputj = workj_L2 + L2_nnz;  /* Merge from L1 work array to L2 work array */
964d7ed1a4dSandi selinger 
965d7ed1a4dSandi selinger         switch (L1_nrows) {
966d7ed1a4dSandi selinger         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
967d7ed1a4dSandi selinger                  brow_end[0] = inputj + inputi[inputcol[0]+1];
968d7ed1a4dSandi selinger                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
969d7ed1a4dSandi selinger                  break;
970d7ed1a4dSandi selinger         case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
971d7ed1a4dSandi selinger         case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
972d7ed1a4dSandi selinger         case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
973d7ed1a4dSandi selinger         case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
974d7ed1a4dSandi selinger         case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
975d7ed1a4dSandi selinger         case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
976d7ed1a4dSandi selinger         case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
977d7ed1a4dSandi selinger         default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
978d7ed1a4dSandi selinger         }
979d7ed1a4dSandi selinger         L2_nnz               += outputi_nnz;
980d7ed1a4dSandi selinger         worki_L2[++L2_nrows]  = L2_nnz;
981d7ed1a4dSandi selinger 
9827660c4dbSandi selinger         /************************ L E V E L  3 **********************/
9837660c4dbSandi selinger         /* Merge from L2 work array to either C or to L2 work array */
984d7ed1a4dSandi selinger         if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
985d7ed1a4dSandi selinger           inputi      = worki_L2;
986d7ed1a4dSandi selinger           inputj      = workj_L2;
987d7ed1a4dSandi selinger           inputcol    = workcol;
988d7ed1a4dSandi selinger           outputi_nnz = 0;
989f83700f2Sandi selinger           if (rowsleft) outputj = workj_L3;
990d7ed1a4dSandi selinger           else          outputj = cj + ci_nnz;
991d7ed1a4dSandi selinger           switch (L2_nrows) {
992d7ed1a4dSandi selinger           case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
993d7ed1a4dSandi selinger                    brow_end[0] = inputj + inputi[inputcol[0]+1];
994d7ed1a4dSandi selinger                    for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
995d7ed1a4dSandi selinger                    break;
996d7ed1a4dSandi selinger           case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
997d7ed1a4dSandi selinger           case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
998d7ed1a4dSandi selinger           case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
999d7ed1a4dSandi selinger           case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
1000d7ed1a4dSandi selinger           case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
1001d7ed1a4dSandi selinger           case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
1002d7ed1a4dSandi selinger           case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
1003d7ed1a4dSandi selinger           default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1004d7ed1a4dSandi selinger           }
1005d7ed1a4dSandi selinger           L2_nrows    = 1;
10067660c4dbSandi selinger           L2_nnz      = outputi_nnz;
10077660c4dbSandi selinger           worki_L2[1] = outputi_nnz;
10087660c4dbSandi selinger           /* Copy to workj_L2 */
1009d7ed1a4dSandi selinger           if (rowsleft) {
10107660c4dbSandi selinger             for (k=0; k<outputi_nnz; ++k)  workj_L2[k] = outputj[k];
1011d7ed1a4dSandi selinger           }
1012d7ed1a4dSandi selinger         }
1013d7ed1a4dSandi selinger       }
1014d7ed1a4dSandi selinger     }  /* while (rowsleft) */
1015d7ed1a4dSandi selinger #undef MatMatMultSymbolic_RowMergeMacro
1016d7ed1a4dSandi selinger 
1017d7ed1a4dSandi selinger     /* terminate current row */
1018d7ed1a4dSandi selinger     ci_nnz += outputi_nnz;
1019d7ed1a4dSandi selinger     ci[i+1] = ci_nnz;
1020d7ed1a4dSandi selinger   }
1021d7ed1a4dSandi selinger 
1022d7ed1a4dSandi selinger   /* Step 3: Create the new symbolic matrix */
10234222ddf1SHong Zhang   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);CHKERRQ(ierr);
10244222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
10254222ddf1SHong Zhang   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1026d7ed1a4dSandi selinger 
1027d7ed1a4dSandi selinger   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1028d7ed1a4dSandi selinger   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
10294222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
1030d7ed1a4dSandi selinger   c->free_a  = PETSC_TRUE;
1031d7ed1a4dSandi selinger   c->free_ij = PETSC_TRUE;
1032d7ed1a4dSandi selinger   c->nonew   = 0;
1033d7ed1a4dSandi selinger 
10344222ddf1SHong Zhang   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1035d7ed1a4dSandi selinger 
1036d7ed1a4dSandi selinger   /* set MatInfo */
1037d7ed1a4dSandi selinger   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1038d7ed1a4dSandi selinger   if (afill < 1.0) afill = 1.0;
1039d7ed1a4dSandi selinger   c->maxnz                     = ci[am];
1040d7ed1a4dSandi selinger   c->nz                        = ci[am];
10414222ddf1SHong Zhang   C->info.mallocs           = ndouble;
10424222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
10434222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
1044d7ed1a4dSandi selinger 
1045d7ed1a4dSandi selinger #if defined(PETSC_USE_INFO)
1046d7ed1a4dSandi selinger   if (ci[am]) {
10474222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
10484222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
1049d7ed1a4dSandi selinger   } else {
10504222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
1051d7ed1a4dSandi selinger   }
1052d7ed1a4dSandi selinger #endif
1053d7ed1a4dSandi selinger 
1054d7ed1a4dSandi selinger   /* Step 4: Free temporary work areas */
1055d7ed1a4dSandi selinger   ierr = PetscFree(workj_L1);CHKERRQ(ierr);
1056d7ed1a4dSandi selinger   ierr = PetscFree(workj_L2);CHKERRQ(ierr);
1057f83700f2Sandi selinger   ierr = PetscFree(workj_L3);CHKERRQ(ierr);
1058d7ed1a4dSandi selinger   PetscFunctionReturn(0);
1059d7ed1a4dSandi selinger }
1060d7ed1a4dSandi selinger 
1061cd093f1dSJed Brown /* concatenate unique entries and then sort */
10624222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C)
1063cd093f1dSJed Brown {
1064cd093f1dSJed Brown   PetscErrorCode     ierr;
1065cd093f1dSJed Brown   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
1066cd093f1dSJed Brown   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
1067cd093f1dSJed Brown   PetscInt           *ci,*cj;
1068cd093f1dSJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
1069cd093f1dSJed Brown   PetscReal          afill;
1070cd093f1dSJed Brown   PetscInt           i,j,ndouble = 0;
1071cd093f1dSJed Brown   PetscSegBuffer     seg,segrow;
1072cd093f1dSJed Brown   char               *seen;
1073cd093f1dSJed Brown 
1074cd093f1dSJed Brown   PetscFunctionBegin;
1075854ce69bSBarry Smith   ierr  = PetscMalloc1(am+1,&ci);CHKERRQ(ierr);
1076cd093f1dSJed Brown   ci[0] = 0;
1077cd093f1dSJed Brown 
1078cd093f1dSJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1079cd093f1dSJed Brown   ierr = PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);CHKERRQ(ierr);
1080cd093f1dSJed Brown   ierr = PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);CHKERRQ(ierr);
1081580bdb30SBarry Smith   ierr = PetscCalloc1(bn,&seen);CHKERRQ(ierr);
1082cd093f1dSJed Brown 
1083cd093f1dSJed Brown   /* Determine ci and cj */
1084cd093f1dSJed Brown   for (i=0; i<am; i++) {
1085cd093f1dSJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1086cd093f1dSJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1087cd093f1dSJed Brown     PetscInt packlen = 0,*PETSC_RESTRICT crow;
1088cd093f1dSJed Brown     /* Pack segrow */
1089cd093f1dSJed Brown     for (j=0; j<anzi; j++) {
1090cd093f1dSJed Brown       PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
1091cd093f1dSJed Brown       for (k=bjstart; k<bjend; k++) {
1092cd093f1dSJed Brown         PetscInt bcol = bj[k];
1093cd093f1dSJed Brown         if (!seen[bcol]) { /* new entry */
1094cd093f1dSJed Brown           PetscInt *PETSC_RESTRICT slot;
1095cd093f1dSJed Brown           ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr);
1096cd093f1dSJed Brown           *slot = bcol;
1097cd093f1dSJed Brown           seen[bcol] = 1;
1098cd093f1dSJed Brown           packlen++;
1099cd093f1dSJed Brown         }
1100cd093f1dSJed Brown       }
1101cd093f1dSJed Brown     }
1102cd093f1dSJed Brown     ierr = PetscSegBufferGetInts(seg,packlen,&crow);CHKERRQ(ierr);
1103cd093f1dSJed Brown     ierr = PetscSegBufferExtractTo(segrow,crow);CHKERRQ(ierr);
1104cd093f1dSJed Brown     ierr = PetscSortInt(packlen,crow);CHKERRQ(ierr);
1105cd093f1dSJed Brown     ci[i+1] = ci[i] + packlen;
1106cd093f1dSJed Brown     for (j=0; j<packlen; j++) seen[crow[j]] = 0;
1107cd093f1dSJed Brown   }
1108cd093f1dSJed Brown   ierr = PetscSegBufferDestroy(&segrow);CHKERRQ(ierr);
1109cd093f1dSJed Brown   ierr = PetscFree(seen);CHKERRQ(ierr);
1110cd093f1dSJed Brown 
1111cd093f1dSJed Brown   /* Column indices are in the segmented buffer */
1112cd093f1dSJed Brown   ierr = PetscSegBufferExtractAlloc(seg,&cj);CHKERRQ(ierr);
1113cd093f1dSJed Brown   ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr);
1114cd093f1dSJed Brown 
1115cd093f1dSJed Brown   /* put together the new symbolic matrix */
11164222ddf1SHong Zhang   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);CHKERRQ(ierr);
11174222ddf1SHong Zhang   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
11184222ddf1SHong Zhang   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1119cd093f1dSJed Brown 
1120cd093f1dSJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1121cd093f1dSJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
11224222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
1123cd093f1dSJed Brown   c->free_a  = PETSC_TRUE;
1124cd093f1dSJed Brown   c->free_ij = PETSC_TRUE;
1125cd093f1dSJed Brown   c->nonew   = 0;
1126cd093f1dSJed Brown 
11274222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1128cd093f1dSJed Brown 
1129cd093f1dSJed Brown   /* set MatInfo */
1130cd093f1dSJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1131cd093f1dSJed Brown   if (afill < 1.0) afill = 1.0;
1132cd093f1dSJed Brown   c->maxnz                     = ci[am];
1133cd093f1dSJed Brown   c->nz                        = ci[am];
11344222ddf1SHong Zhang   C->info.mallocs           = ndouble;
11354222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
11364222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
1137cd093f1dSJed Brown 
1138cd093f1dSJed Brown #if defined(PETSC_USE_INFO)
1139cd093f1dSJed Brown   if (ci[am]) {
11404222ddf1SHong Zhang     ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr);
11414222ddf1SHong Zhang     ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr);
1142cd093f1dSJed Brown   } else {
11434222ddf1SHong Zhang     ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr);
1144cd093f1dSJed Brown   }
1145cd093f1dSJed Brown #endif
1146cd093f1dSJed Brown   PetscFunctionReturn(0);
1147cd093f1dSJed Brown }
1148cd093f1dSJed Brown 
11492128a86cSHong Zhang PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A)
11502128a86cSHong Zhang {
11512128a86cSHong Zhang   PetscErrorCode      ierr;
11524c7df5ccSHong Zhang   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)A->data;
115340192850SHong Zhang   Mat_MatMatTransMult *abt=a->abt;
11542128a86cSHong Zhang 
11552128a86cSHong Zhang   PetscFunctionBegin;
115640192850SHong Zhang   ierr = (abt->destroy)(A);CHKERRQ(ierr);
115740192850SHong Zhang   ierr = MatTransposeColoringDestroy(&abt->matcoloring);CHKERRQ(ierr);
115840192850SHong Zhang   ierr = MatDestroy(&abt->Bt_den);CHKERRQ(ierr);
115940192850SHong Zhang   ierr = MatDestroy(&abt->ABt_den);CHKERRQ(ierr);
116040192850SHong Zhang   ierr = PetscFree(abt);CHKERRQ(ierr);
11612128a86cSHong Zhang   PetscFunctionReturn(0);
11622128a86cSHong Zhang }
11632128a86cSHong Zhang 
11644222ddf1SHong Zhang PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1165bc011b1eSHong Zhang {
11665df89d91SHong Zhang   PetscErrorCode      ierr;
116781d82fe4SHong Zhang   Mat                 Bt;
116881d82fe4SHong Zhang   PetscInt            *bti,*btj;
116940192850SHong Zhang   Mat_MatMatTransMult *abt;
11704c7df5ccSHong Zhang   Mat_SeqAIJ          *c;
11714222ddf1SHong Zhang   Mat_Product         *product = C->product;
11724222ddf1SHong Zhang   MatProductAlgorithm alg = product->alg;
1173d2853540SHong Zhang 
117481d82fe4SHong Zhang   PetscFunctionBegin;
117581d82fe4SHong Zhang   /* create symbolic Bt */
117681d82fe4SHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr);
11770298fd71SBarry Smith   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);CHKERRQ(ierr);
117833d57670SJed Brown   ierr = MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr);
117904b858e0SBarry Smith   ierr = MatSetType(Bt,((PetscObject)A)->type_name);CHKERRQ(ierr);
118081d82fe4SHong Zhang 
118181d82fe4SHong Zhang   /* get symbolic C=A*Bt */
11824222ddf1SHong Zhang   ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); /* set algorithm for C = A*Bt */
118381d82fe4SHong Zhang   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr);
11844222ddf1SHong Zhang   ierr = MatProductSetAlgorithm(C,alg);CHKERRQ(ierr); /* resume original algorithm for ABt product */
118581d82fe4SHong Zhang 
11862128a86cSHong Zhang   /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
1187b00a9115SJed Brown   ierr   = PetscNew(&abt);CHKERRQ(ierr);
11884222ddf1SHong Zhang   c      = (Mat_SeqAIJ*)C->data;
118940192850SHong Zhang   c->abt = abt;
11902128a86cSHong Zhang 
119140192850SHong Zhang   abt->usecoloring = PETSC_FALSE;
11924222ddf1SHong Zhang   abt->destroy     = C->ops->destroy;
11934222ddf1SHong Zhang   C->ops->destroy  = MatDestroy_SeqAIJ_MatMatMultTrans;
11944222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
11952128a86cSHong Zhang 
11964222ddf1SHong Zhang   abt->usecoloring = PETSC_FALSE;
11974222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"color",&abt->usecoloring);CHKERRQ(ierr);
119840192850SHong Zhang   if (abt->usecoloring) {
1199b9af6bddSHong Zhang     /* Create MatTransposeColoring from symbolic C=A*B^T */
1200b9af6bddSHong Zhang     MatTransposeColoring matcoloring;
1201335efc43SPeter Brune     MatColoring          coloring;
12022128a86cSHong Zhang     ISColoring           iscoloring;
12032128a86cSHong Zhang     Mat                  Bt_dense,C_dense;
1204e8354b3eSHong Zhang 
12054222ddf1SHong Zhang     /* inode causes memory problem */
12064222ddf1SHong Zhang     ierr = MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
12074222ddf1SHong Zhang 
12084222ddf1SHong Zhang     ierr = MatColoringCreate(C,&coloring);CHKERRQ(ierr);
1209335efc43SPeter Brune     ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr);
1210335efc43SPeter Brune     ierr = MatColoringSetType(coloring,MATCOLORINGSL);CHKERRQ(ierr);
1211335efc43SPeter Brune     ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr);
1212335efc43SPeter Brune     ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr);
1213335efc43SPeter Brune     ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr);
12144222ddf1SHong Zhang     ierr = MatTransposeColoringCreate(C,iscoloring,&matcoloring);CHKERRQ(ierr);
12152205254eSKarl Rupp 
121640192850SHong Zhang     abt->matcoloring = matcoloring;
12172205254eSKarl Rupp 
12182128a86cSHong Zhang     ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr);
12192128a86cSHong Zhang 
12202128a86cSHong Zhang     /* Create Bt_dense and C_dense = A*Bt_dense */
12212128a86cSHong Zhang     ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr);
12222128a86cSHong Zhang     ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr);
12232128a86cSHong Zhang     ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr);
12240298fd71SBarry Smith     ierr = MatSeqDenseSetPreallocation(Bt_dense,NULL);CHKERRQ(ierr);
12252205254eSKarl Rupp 
12262128a86cSHong Zhang     Bt_dense->assembled = PETSC_TRUE;
122740192850SHong Zhang     abt->Bt_den         = Bt_dense;
12282128a86cSHong Zhang 
12292128a86cSHong Zhang     ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr);
12302128a86cSHong Zhang     ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr);
12312128a86cSHong Zhang     ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr);
12320298fd71SBarry Smith     ierr = MatSeqDenseSetPreallocation(C_dense,NULL);CHKERRQ(ierr);
12332205254eSKarl Rupp 
12342128a86cSHong Zhang     Bt_dense->assembled = PETSC_TRUE;
123540192850SHong Zhang     abt->ABt_den  = C_dense;
1236f94ccd6cSHong Zhang 
1237f94ccd6cSHong Zhang #if defined(PETSC_USE_INFO)
12381ce71dffSSatish Balay     {
12394222ddf1SHong Zhang       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
12404222ddf1SHong Zhang       ierr = PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));CHKERRQ(ierr);
12411ce71dffSSatish Balay     }
1242f94ccd6cSHong Zhang #endif
12432128a86cSHong Zhang   }
1244e99be685SHong Zhang   /* clean up */
1245e99be685SHong Zhang   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
1246e99be685SHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr);
12475df89d91SHong Zhang   PetscFunctionReturn(0);
12485df89d91SHong Zhang }
12495df89d91SHong Zhang 
12506fc122caSHong Zhang PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
12515df89d91SHong Zhang {
12525df89d91SHong Zhang   PetscErrorCode      ierr;
12535df89d91SHong Zhang   Mat_SeqAIJ          *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1254e2cac8caSJed Brown   PetscInt            *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
125581d82fe4SHong Zhang   PetscInt            cm   =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
12565df89d91SHong Zhang   PetscLogDouble      flops=0.0;
1257aa1aec99SHong Zhang   MatScalar           *aa  =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
125840192850SHong Zhang   Mat_MatMatTransMult *abt = c->abt;
12595df89d91SHong Zhang 
12605df89d91SHong Zhang   PetscFunctionBegin;
126158ed3ceaSHong Zhang   /* clear old values in C */
126258ed3ceaSHong Zhang   if (!c->a) {
1263580bdb30SBarry Smith     ierr      = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
126458ed3ceaSHong Zhang     c->a      = ca;
126558ed3ceaSHong Zhang     c->free_a = PETSC_TRUE;
126658ed3ceaSHong Zhang   } else {
126758ed3ceaSHong Zhang     ca =  c->a;
1268580bdb30SBarry Smith     ierr = PetscArrayzero(ca,ci[cm]+1);CHKERRQ(ierr);
126958ed3ceaSHong Zhang   }
127058ed3ceaSHong Zhang 
127140192850SHong Zhang   if (abt->usecoloring) {
127240192850SHong Zhang     MatTransposeColoring matcoloring = abt->matcoloring;
127340192850SHong Zhang     Mat                  Bt_dense,C_dense = abt->ABt_den;
1274c8db22f6SHong Zhang 
1275b9af6bddSHong Zhang     /* Get Bt_dense by Apply MatTransposeColoring to B */
127640192850SHong Zhang     Bt_dense = abt->Bt_den;
1277b9af6bddSHong Zhang     ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr);
1278c8db22f6SHong Zhang 
1279c8db22f6SHong Zhang     /* C_dense = A*Bt_dense */
12802128a86cSHong Zhang     ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr);
1281c8db22f6SHong Zhang 
12822128a86cSHong Zhang     /* Recover C from C_dense */
1283b9af6bddSHong Zhang     ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr);
1284c8db22f6SHong Zhang     PetscFunctionReturn(0);
1285c8db22f6SHong Zhang   }
1286c8db22f6SHong Zhang 
12871fa1209cSHong Zhang   for (i=0; i<cm; i++) {
128881d82fe4SHong Zhang     anzi = ai[i+1] - ai[i];
128981d82fe4SHong Zhang     acol = aj + ai[i];
12906973769bSHong Zhang     aval = aa + ai[i];
12911fa1209cSHong Zhang     cnzi = ci[i+1] - ci[i];
12921fa1209cSHong Zhang     ccol = cj + ci[i];
12936973769bSHong Zhang     cval = ca + ci[i];
12941fa1209cSHong Zhang     for (j=0; j<cnzi; j++) {
129581d82fe4SHong Zhang       brow = ccol[j];
129681d82fe4SHong Zhang       bnzj = bi[brow+1] - bi[brow];
129781d82fe4SHong Zhang       bcol = bj + bi[brow];
12986973769bSHong Zhang       bval = ba + bi[brow];
12996973769bSHong Zhang 
130081d82fe4SHong Zhang       /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
130181d82fe4SHong Zhang       nexta = 0; nextb = 0;
130281d82fe4SHong Zhang       while (nexta<anzi && nextb<bnzj) {
13037b6d5e96SMark Adams         while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
130481d82fe4SHong Zhang         if (nexta == anzi) break;
13057b6d5e96SMark Adams         while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
130681d82fe4SHong Zhang         if (nextb == bnzj) break;
130781d82fe4SHong Zhang         if (acol[nexta] == bcol[nextb]) {
13086973769bSHong Zhang           cval[j] += aval[nexta]*bval[nextb];
130981d82fe4SHong Zhang           nexta++; nextb++;
131081d82fe4SHong Zhang           flops += 2;
13111fa1209cSHong Zhang         }
13121fa1209cSHong Zhang       }
131381d82fe4SHong Zhang     }
131481d82fe4SHong Zhang   }
131581d82fe4SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
131681d82fe4SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
131781d82fe4SHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
13185df89d91SHong Zhang   PetscFunctionReturn(0);
13195df89d91SHong Zhang }
13205df89d91SHong Zhang 
13216d373c3eSHong Zhang PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(Mat A)
13226d373c3eSHong Zhang {
13236d373c3eSHong Zhang   PetscErrorCode      ierr;
13246d373c3eSHong Zhang   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
13256d373c3eSHong Zhang   Mat_MatTransMatMult *atb = a->atb;
13266d373c3eSHong Zhang 
13276d373c3eSHong Zhang   PetscFunctionBegin;
13286473ade5SStefano Zampini   if (atb) {
13296d373c3eSHong Zhang     ierr = MatDestroy(&atb->At);CHKERRQ(ierr);
13306473ade5SStefano Zampini     ierr = (*atb->destroy)(A);CHKERRQ(ierr);
13316473ade5SStefano Zampini   }
13326d373c3eSHong Zhang   ierr = PetscFree(atb);CHKERRQ(ierr);
13336d373c3eSHong Zhang   PetscFunctionReturn(0);
13346d373c3eSHong Zhang }
13356d373c3eSHong Zhang 
13364222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
13375df89d91SHong Zhang {
1338bc011b1eSHong Zhang   PetscErrorCode      ierr;
1339bc011b1eSHong Zhang   Mat                 At;
134038baddfdSBarry Smith   PetscInt            *ati,*atj;
13414222ddf1SHong Zhang   Mat_Product         *product = C->product;
13424222ddf1SHong Zhang   MatProductAlgorithm alg;
13434222ddf1SHong Zhang   PetscBool           flg;
1344bc011b1eSHong Zhang 
1345bc011b1eSHong Zhang   PetscFunctionBegin;
13464222ddf1SHong Zhang   if (product) {
13474222ddf1SHong Zhang     alg = product->alg;
13484222ddf1SHong Zhang   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"!product, not supported yet");
13494222ddf1SHong Zhang 
13504222ddf1SHong Zhang   /* outerproduct */
13514222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"outerproduct",&flg);CHKERRQ(ierr);
13524222ddf1SHong Zhang   if (flg) {
1353bc011b1eSHong Zhang     /* create symbolic At */
1354bc011b1eSHong Zhang     ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
13550298fd71SBarry Smith     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);CHKERRQ(ierr);
135633d57670SJed Brown     ierr = MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr);
135704b858e0SBarry Smith     ierr = MatSetType(At,((PetscObject)A)->type_name);CHKERRQ(ierr);
1358bc011b1eSHong Zhang 
1359bc011b1eSHong Zhang     /* get symbolic C=At*B */
13604222ddf1SHong Zhang     product->alg = "sorted";
1361bc011b1eSHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
1362bc011b1eSHong Zhang 
1363bc011b1eSHong Zhang     /* clean up */
13646bf464f9SBarry Smith     ierr = MatDestroy(&At);CHKERRQ(ierr);
1365bc011b1eSHong Zhang     ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
13666d373c3eSHong Zhang 
13674222ddf1SHong Zhang     C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
13684222ddf1SHong Zhang     PetscFunctionReturn(0);
13694222ddf1SHong Zhang   }
13704222ddf1SHong Zhang 
13714222ddf1SHong Zhang   /* matmatmult */
13724222ddf1SHong Zhang   ierr = PetscStrcmp(alg,"at*b",&flg);CHKERRQ(ierr);
13734222ddf1SHong Zhang   if (flg) {
13744222ddf1SHong Zhang     Mat_MatTransMatMult *atb;
13754222ddf1SHong Zhang     Mat_SeqAIJ          *c;
13764222ddf1SHong Zhang 
13774222ddf1SHong Zhang     ierr = PetscNew(&atb);CHKERRQ(ierr);
13784222ddf1SHong Zhang     ierr = MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
13794222ddf1SHong Zhang     product->alg = "sorted";
13804222ddf1SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
13814222ddf1SHong Zhang 
13824222ddf1SHong Zhang     c               = (Mat_SeqAIJ*)C->data;
13834222ddf1SHong Zhang     c->atb          = atb;
13844222ddf1SHong Zhang     atb->At         = At;
13854222ddf1SHong Zhang     atb->destroy    = C->ops->destroy;
13864222ddf1SHong Zhang     atb->updateAt   = PETSC_FALSE; /* because At is computed here */
13874222ddf1SHong Zhang     C->ops->destroy = MatDestroy_SeqAIJ_MatTransMatMult;
13884222ddf1SHong Zhang 
13894222ddf1SHong Zhang     C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
13904222ddf1SHong Zhang     PetscFunctionReturn(0);
13914222ddf1SHong Zhang   }
13924222ddf1SHong Zhang 
13934222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1394bc011b1eSHong Zhang   PetscFunctionReturn(0);
1395bc011b1eSHong Zhang }
1396bc011b1eSHong Zhang 
139775648e8dSHong Zhang PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1398bc011b1eSHong Zhang {
1399bc011b1eSHong Zhang   PetscErrorCode ierr;
14000fbc74f4SHong Zhang   Mat_SeqAIJ     *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1401d0f46423SBarry Smith   PetscInt       am   =A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1402d0f46423SBarry Smith   PetscInt       cm   =C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1403d13dce4bSSatish Balay   PetscLogDouble flops=0.0;
1404aa1aec99SHong Zhang   MatScalar      *aa  =a->a,*ba,*ca,*caj;
1405bc011b1eSHong Zhang 
1406bc011b1eSHong Zhang   PetscFunctionBegin;
1407aa1aec99SHong Zhang   if (!c->a) {
1408580bdb30SBarry Smith     ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr);
14092205254eSKarl Rupp 
1410aa1aec99SHong Zhang     c->a      = ca;
1411aa1aec99SHong Zhang     c->free_a = PETSC_TRUE;
1412aa1aec99SHong Zhang   } else {
1413aa1aec99SHong Zhang     ca   = c->a;
1414580bdb30SBarry Smith     ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
1415aa1aec99SHong Zhang   }
1416bc011b1eSHong Zhang 
1417bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1418bc011b1eSHong Zhang   for (i=0; i<am; i++) {
1419bc011b1eSHong Zhang     bj   = b->j + bi[i];
1420bc011b1eSHong Zhang     ba   = b->a + bi[i];
1421bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
1422bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
1423bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
1424bc011b1eSHong Zhang       nextb = 0;
14250fbc74f4SHong Zhang       crow  = *aj++;
1426bc011b1eSHong Zhang       cjj   = cj + ci[crow];
1427bc011b1eSHong Zhang       caj   = ca + ci[crow];
1428bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
1429bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
14300fbc74f4SHong Zhang         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
14310fbc74f4SHong Zhang           caj[k] += (*aa)*(*(ba+nextb));
1432bc011b1eSHong Zhang           nextb++;
1433bc011b1eSHong Zhang         }
1434bc011b1eSHong Zhang       }
1435bc011b1eSHong Zhang       flops += 2*bnzi;
14360fbc74f4SHong Zhang       aa++;
1437bc011b1eSHong Zhang     }
1438bc011b1eSHong Zhang   }
1439bc011b1eSHong Zhang 
1440bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
1441bc011b1eSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442bc011b1eSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1443bc011b1eSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1444bc011b1eSHong Zhang   PetscFunctionReturn(0);
1445bc011b1eSHong Zhang }
14469123193aSHong Zhang 
14474222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)
14489123193aSHong Zhang {
14499123193aSHong Zhang   PetscErrorCode ierr;
14509123193aSHong Zhang 
14519123193aSHong Zhang   PetscFunctionBegin;
14525a586d82SBarry Smith   ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr);
14532205254eSKarl Rupp 
14544222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
14559123193aSHong Zhang   PetscFunctionReturn(0);
14569123193aSHong Zhang }
14579123193aSHong Zhang 
145887753ddeSHong Zhang PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
14599123193aSHong Zhang {
1460f32d5d43SBarry Smith   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
1461612438f1SStefano Zampini   Mat_SeqDense      *bd = (Mat_SeqDense*)B->data;
14629123193aSHong Zhang   PetscErrorCode    ierr;
1463a4af7ca8SStefano Zampini   PetscScalar       *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4,aatmp;
1464a4af7ca8SStefano Zampini   const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av;
1465daf57211SHong Zhang   const PetscInt    *aj;
1466612438f1SStefano Zampini   PetscInt          cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n;
1467daf57211SHong Zhang   PetscInt          am4=4*am,bm4=4*bm,col,i,j,n,ajtmp;
14689123193aSHong Zhang 
14699123193aSHong Zhang   PetscFunctionBegin;
1470f32d5d43SBarry Smith   if (!cm || !cn) PetscFunctionReturn(0);
1471a4af7ca8SStefano Zampini   ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
14728c778c55SBarry Smith   ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr);
1473a4af7ca8SStefano Zampini   ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
1474f32d5d43SBarry Smith   b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
1475730858b9SHong Zhang   c1 = c; c2 = c1 + am; c3 = c2 + am; c4 = c3 + am;
1476f32d5d43SBarry Smith   for (col=0; col<cn-4; col += 4) {  /* over columns of C */
1477f32d5d43SBarry Smith     for (i=0; i<am; i++) {        /* over rows of C in those columns */
1478f32d5d43SBarry Smith       r1 = r2 = r3 = r4 = 0.0;
1479f32d5d43SBarry Smith       n  = a->i[i+1] - a->i[i];
1480f32d5d43SBarry Smith       aj = a->j + a->i[i];
1481a4af7ca8SStefano Zampini       aa = av + a->i[i];
1482f32d5d43SBarry Smith       for (j=0; j<n; j++) {
1483730858b9SHong Zhang         aatmp = aa[j]; ajtmp = aj[j];
1484730858b9SHong Zhang         r1 += aatmp*b1[ajtmp];
1485730858b9SHong Zhang         r2 += aatmp*b2[ajtmp];
1486730858b9SHong Zhang         r3 += aatmp*b3[ajtmp];
1487730858b9SHong Zhang         r4 += aatmp*b4[ajtmp];
14889123193aSHong Zhang       }
148987753ddeSHong Zhang       c1[i] += r1;
149087753ddeSHong Zhang       c2[i] += r2;
149187753ddeSHong Zhang       c3[i] += r3;
149287753ddeSHong Zhang       c4[i] += r4;
1493f32d5d43SBarry Smith     }
1494730858b9SHong Zhang     b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4;
1495730858b9SHong Zhang     c1 += am4; c2 += am4; c3 += am4; c4 += am4;
1496f32d5d43SBarry Smith   }
1497f32d5d43SBarry Smith   for (; col<cn; col++) {   /* over extra columns of C */
1498f32d5d43SBarry Smith     for (i=0; i<am; i++) {  /* over rows of C in those columns */
1499f32d5d43SBarry Smith       r1 = 0.0;
1500f32d5d43SBarry Smith       n  = a->i[i+1] - a->i[i];
1501f32d5d43SBarry Smith       aj = a->j + a->i[i];
1502a4af7ca8SStefano Zampini       aa = av + a->i[i];
1503f32d5d43SBarry Smith       for (j=0; j<n; j++) {
1504730858b9SHong Zhang         r1 += aa[j]*b1[aj[j]];
1505f32d5d43SBarry Smith       }
150687753ddeSHong Zhang       c1[i] += r1;
1507f32d5d43SBarry Smith     }
1508f32d5d43SBarry Smith     b1 += bm;
1509730858b9SHong Zhang     c1 += am;
1510f32d5d43SBarry Smith   }
1511dc0b31edSSatish Balay   ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr);
15128c778c55SBarry Smith   ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr);
1513a4af7ca8SStefano Zampini   ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
1514a4af7ca8SStefano Zampini   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
1515f32d5d43SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1516f32d5d43SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1517f32d5d43SBarry Smith   PetscFunctionReturn(0);
1518f32d5d43SBarry Smith }
1519f32d5d43SBarry Smith 
152087753ddeSHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1521f32d5d43SBarry Smith {
1522f32d5d43SBarry Smith   PetscErrorCode ierr;
1523f32d5d43SBarry Smith 
1524f32d5d43SBarry Smith   PetscFunctionBegin;
152587753ddeSHong Zhang   if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n);
152687753ddeSHong Zhang   if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
152787753ddeSHong Zhang   if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);
15284324174eSBarry Smith 
152987753ddeSHong Zhang   ierr = MatZeroEntries(C);CHKERRQ(ierr);
153087753ddeSHong Zhang   ierr = MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C);CHKERRQ(ierr);
15319123193aSHong Zhang   PetscFunctionReturn(0);
15329123193aSHong Zhang }
1533b1683b59SHong Zhang 
15344222ddf1SHong Zhang /* ------------------------------------------------------- */
15354222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
15364222ddf1SHong Zhang {
15374222ddf1SHong Zhang   PetscFunctionBegin;
15384222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
15394222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
15404222ddf1SHong Zhang   /* dense mat may not call MatProductSymbolic(), thus set C->ops->productnumeric here */
15414222ddf1SHong Zhang   C->ops->productnumeric  = MatProductNumeric_AB;
15424222ddf1SHong Zhang   PetscFunctionReturn(0);
15434222ddf1SHong Zhang }
15444222ddf1SHong Zhang 
15454222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
15464222ddf1SHong Zhang {
15474222ddf1SHong Zhang   PetscFunctionBegin;
15484222ddf1SHong Zhang   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqAIJ_SeqDense;
15494222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_AtB;
15504222ddf1SHong Zhang   C->ops->productnumeric           = MatProductNumeric_AtB;
15514222ddf1SHong Zhang   PetscFunctionReturn(0);
15524222ddf1SHong Zhang }
15534222ddf1SHong Zhang 
15544222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
15554222ddf1SHong Zhang {
15564222ddf1SHong Zhang   PetscErrorCode ierr;
15574222ddf1SHong Zhang   Mat_Product    *product = C->product;
15584222ddf1SHong Zhang 
15594222ddf1SHong Zhang   PetscFunctionBegin;
15604222ddf1SHong Zhang   switch (product->type) {
15614222ddf1SHong Zhang   case MATPRODUCT_AB:
15624222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);CHKERRQ(ierr);
15634222ddf1SHong Zhang     break;
15644222ddf1SHong Zhang   case MATPRODUCT_AtB:
15654222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);CHKERRQ(ierr);
15664222ddf1SHong Zhang     break;
15674222ddf1SHong Zhang   case MATPRODUCT_PtAP:
15684222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqDense(C);CHKERRQ(ierr);
15694222ddf1SHong Zhang     break;
15704222ddf1SHong Zhang   default:
15714222ddf1SHong Zhang     /* Use MatProduct_Basic() if there is no specific implementation */
15724222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_Basic;
15734222ddf1SHong Zhang   }
15744222ddf1SHong Zhang   PetscFunctionReturn(0);
15754222ddf1SHong Zhang }
15764222ddf1SHong Zhang /* ------------------------------------------------------- */
15774222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
15784222ddf1SHong Zhang {
15794222ddf1SHong Zhang   PetscErrorCode ierr;
15804222ddf1SHong Zhang   Mat_Product    *product = C->product;
15814222ddf1SHong Zhang   Mat            A = product->A;
15824222ddf1SHong Zhang   PetscBool      baij;
15834222ddf1SHong Zhang 
15844222ddf1SHong Zhang   PetscFunctionBegin;
15854222ddf1SHong Zhang   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);CHKERRQ(ierr);
15864222ddf1SHong Zhang   if (!baij) { /* A is seqsbaij */
15874222ddf1SHong Zhang     PetscBool sbaij;
15884222ddf1SHong Zhang     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
15894222ddf1SHong Zhang     if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format");
15904222ddf1SHong Zhang 
15914222ddf1SHong Zhang     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
15924222ddf1SHong Zhang   } else { /* A is seqbaij */
15934222ddf1SHong Zhang     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
15944222ddf1SHong Zhang   }
15954222ddf1SHong Zhang 
15964222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
15974222ddf1SHong Zhang   C->ops->productnumeric  = MatProductNumeric_AB;
15984222ddf1SHong Zhang   PetscFunctionReturn(0);
15994222ddf1SHong Zhang }
16004222ddf1SHong Zhang 
16014222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
16024222ddf1SHong Zhang {
16034222ddf1SHong Zhang   PetscErrorCode ierr;
16044222ddf1SHong Zhang   Mat_Product    *product = C->product;
16054222ddf1SHong Zhang 
16064222ddf1SHong Zhang   PetscFunctionBegin;
16074222ddf1SHong Zhang   if (product->type == MATPRODUCT_AB) {
16084222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);CHKERRQ(ierr);
1609*544a5e07SHong Zhang   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqXBAIJ and SeqDense matrices",MatProductTypes[product->type]);
16104222ddf1SHong Zhang   PetscFunctionReturn(0);
16114222ddf1SHong Zhang }
16124222ddf1SHong Zhang /* ------------------------------------------------------- */
16134222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
16144222ddf1SHong Zhang {
16154222ddf1SHong Zhang   PetscFunctionBegin;
16164222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
16174222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
16184222ddf1SHong Zhang   C->ops->productnumeric  = MatProductNumeric_AB;
16194222ddf1SHong Zhang   PetscFunctionReturn(0);
16204222ddf1SHong Zhang }
16214222ddf1SHong Zhang 
16224222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
16234222ddf1SHong Zhang {
16244222ddf1SHong Zhang   PetscErrorCode ierr;
16254222ddf1SHong Zhang   Mat_Product    *product = C->product;
16264222ddf1SHong Zhang 
16274222ddf1SHong Zhang   PetscFunctionBegin;
16284222ddf1SHong Zhang   if (product->type == MATPRODUCT_AB) {
16294222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);CHKERRQ(ierr);
1630*544a5e07SHong Zhang   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqDense and SeqAIJ matrices",MatProductTypes[product->type]);
16314222ddf1SHong Zhang   PetscFunctionReturn(0);
16324222ddf1SHong Zhang }
16334222ddf1SHong Zhang /* ------------------------------------------------------- */
16344222ddf1SHong Zhang 
1635b9af6bddSHong Zhang PetscErrorCode  MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1636c8db22f6SHong Zhang {
1637c8db22f6SHong Zhang   PetscErrorCode ierr;
16382f5f1f90SHong Zhang   Mat_SeqAIJ     *b       = (Mat_SeqAIJ*)B->data;
16392f5f1f90SHong Zhang   Mat_SeqDense   *btdense = (Mat_SeqDense*)Btdense->data;
16402f5f1f90SHong Zhang   PetscInt       *bi      = b->i,*bj=b->j;
16412f5f1f90SHong Zhang   PetscInt       m        = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
16422f5f1f90SHong Zhang   MatScalar      *btval,*btval_den,*ba=b->a;
16432f5f1f90SHong Zhang   PetscInt       *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;
1644c8db22f6SHong Zhang 
1645c8db22f6SHong Zhang   PetscFunctionBegin;
16462f5f1f90SHong Zhang   btval_den=btdense->v;
1647580bdb30SBarry Smith   ierr     = PetscArrayzero(btval_den,m*n);CHKERRQ(ierr);
16482f5f1f90SHong Zhang   for (k=0; k<ncolors; k++) {
16492f5f1f90SHong Zhang     ncolumns = coloring->ncolumns[k];
16502f5f1f90SHong Zhang     for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1651d2853540SHong Zhang       col   = *(columns + colorforcol[k] + l);
16522f5f1f90SHong Zhang       btcol = bj + bi[col];
16532f5f1f90SHong Zhang       btval = ba + bi[col];
16542f5f1f90SHong Zhang       anz   = bi[col+1] - bi[col];
1655d2853540SHong Zhang       for (j=0; j<anz; j++) {
16562f5f1f90SHong Zhang         brow            = btcol[j];
16572f5f1f90SHong Zhang         btval_den[brow] = btval[j];
1658c8db22f6SHong Zhang       }
1659c8db22f6SHong Zhang     }
16602f5f1f90SHong Zhang     btval_den += m;
1661c8db22f6SHong Zhang   }
1662c8db22f6SHong Zhang   PetscFunctionReturn(0);
1663c8db22f6SHong Zhang }
1664c8db22f6SHong Zhang 
1665b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
16668972f759SHong Zhang {
16678972f759SHong Zhang   PetscErrorCode    ierr;
1668b2d2b04fSHong Zhang   Mat_SeqAIJ        *csp = (Mat_SeqAIJ*)Csp->data;
16691683a169SBarry Smith   const PetscScalar *ca_den,*ca_den_ptr;
16701683a169SBarry Smith   PetscScalar       *ca=csp->a;
1671f99a636bSHong Zhang   PetscInt          k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors;
1672e88777f2SHong Zhang   PetscInt          brows=matcoloring->brows,*den2sp=matcoloring->den2sp;
1673077f23c2SHong Zhang   PetscInt          nrows,*row,*idx;
1674077f23c2SHong Zhang   PetscInt          *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow;
16758972f759SHong Zhang 
16768972f759SHong Zhang   PetscFunctionBegin;
16771683a169SBarry Smith   ierr   = MatDenseGetArrayRead(Cden,&ca_den);CHKERRQ(ierr);
1678f99a636bSHong Zhang 
1679077f23c2SHong Zhang   if (brows > 0) {
1680077f23c2SHong Zhang     PetscInt *lstart,row_end,row_start;
1681980ae229SHong Zhang     lstart = matcoloring->lstart;
1682580bdb30SBarry Smith     ierr = PetscArrayzero(lstart,ncolors);CHKERRQ(ierr);
1683eeb4fd02SHong Zhang 
1684077f23c2SHong Zhang     row_end = brows;
1685eeb4fd02SHong Zhang     if (row_end > m) row_end = m;
1686077f23c2SHong Zhang     for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */
1687077f23c2SHong Zhang       ca_den_ptr = ca_den;
1688980ae229SHong Zhang       for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */
1689eeb4fd02SHong Zhang         nrows = matcoloring->nrows[k];
1690eeb4fd02SHong Zhang         row   = rows  + colorforrow[k];
1691eeb4fd02SHong Zhang         idx   = den2sp + colorforrow[k];
1692eeb4fd02SHong Zhang         for (l=lstart[k]; l<nrows; l++) {
1693eeb4fd02SHong Zhang           if (row[l] >= row_end) {
1694eeb4fd02SHong Zhang             lstart[k] = l;
1695eeb4fd02SHong Zhang             break;
1696eeb4fd02SHong Zhang           } else {
1697077f23c2SHong Zhang             ca[idx[l]] = ca_den_ptr[row[l]];
1698eeb4fd02SHong Zhang           }
1699eeb4fd02SHong Zhang         }
1700077f23c2SHong Zhang         ca_den_ptr += m;
1701eeb4fd02SHong Zhang       }
1702077f23c2SHong Zhang       row_end += brows;
1703eeb4fd02SHong Zhang       if (row_end > m) row_end = m;
1704eeb4fd02SHong Zhang     }
1705077f23c2SHong Zhang   } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1706077f23c2SHong Zhang     ca_den_ptr = ca_den;
1707077f23c2SHong Zhang     for (k=0; k<ncolors; k++) {
1708077f23c2SHong Zhang       nrows = matcoloring->nrows[k];
1709077f23c2SHong Zhang       row   = rows  + colorforrow[k];
1710077f23c2SHong Zhang       idx   = den2sp + colorforrow[k];
1711077f23c2SHong Zhang       for (l=0; l<nrows; l++) {
1712077f23c2SHong Zhang         ca[idx[l]] = ca_den_ptr[row[l]];
1713077f23c2SHong Zhang       }
1714077f23c2SHong Zhang       ca_den_ptr += m;
1715077f23c2SHong Zhang     }
1716f99a636bSHong Zhang   }
1717f99a636bSHong Zhang 
17181683a169SBarry Smith   ierr = MatDenseRestoreArrayRead(Cden,&ca_den);CHKERRQ(ierr);
1719f99a636bSHong Zhang #if defined(PETSC_USE_INFO)
1720077f23c2SHong Zhang   if (matcoloring->brows > 0) {
1721f99a636bSHong Zhang     ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);CHKERRQ(ierr);
1722e88777f2SHong Zhang   } else {
1723077f23c2SHong Zhang     ierr = PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");CHKERRQ(ierr);
1724e88777f2SHong Zhang   }
1725f99a636bSHong Zhang #endif
17268972f759SHong Zhang   PetscFunctionReturn(0);
17278972f759SHong Zhang }
17288972f759SHong Zhang 
1729b9af6bddSHong Zhang PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1730b1683b59SHong Zhang {
1731b1683b59SHong Zhang   PetscErrorCode ierr;
1732e88777f2SHong Zhang   PetscInt       i,n,nrows,Nbs,j,k,m,ncols,col,cm;
17331a83f524SJed Brown   const PetscInt *is,*ci,*cj,*row_idx;
1734b28d80bdSHong Zhang   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
1735b1683b59SHong Zhang   IS             *isa;
1736b28d80bdSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1737e88777f2SHong Zhang   PetscInt       *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i;
1738e88777f2SHong Zhang   PetscInt       *colorforcol,*columns,*columns_i,brows;
1739e88777f2SHong Zhang   PetscBool      flg;
1740b1683b59SHong Zhang 
1741b1683b59SHong Zhang   PetscFunctionBegin;
1742071fcb05SBarry Smith   ierr = ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);CHKERRQ(ierr);
1743e99be685SHong Zhang 
17447ecccc15SHong Zhang   /* bs >1 is not being tested yet! */
1745e88777f2SHong Zhang   Nbs       = mat->cmap->N/bs;
1746b1683b59SHong Zhang   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
1747e88777f2SHong Zhang   c->N      = Nbs;
1748e88777f2SHong Zhang   c->m      = c->M;
1749b1683b59SHong Zhang   c->rstart = 0;
1750e88777f2SHong Zhang   c->brows  = 100;
1751b1683b59SHong Zhang 
1752b1683b59SHong Zhang   c->ncolors = nis;
1753dcca6d9dSJed Brown   ierr = PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);CHKERRQ(ierr);
1754854ce69bSBarry Smith   ierr = PetscMalloc1(csp->nz+1,&rows);CHKERRQ(ierr);
1755854ce69bSBarry Smith   ierr = PetscMalloc1(csp->nz+1,&den2sp);CHKERRQ(ierr);
1756e88777f2SHong Zhang 
1757e88777f2SHong Zhang   brows = c->brows;
1758c5929fdfSBarry Smith   ierr = PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);CHKERRQ(ierr);
1759e88777f2SHong Zhang   if (flg) c->brows = brows;
1760eeb4fd02SHong Zhang   if (brows > 0) {
1761854ce69bSBarry Smith     ierr = PetscMalloc1(nis+1,&c->lstart);CHKERRQ(ierr);
1762e88777f2SHong Zhang   }
17632205254eSKarl Rupp 
1764d2853540SHong Zhang   colorforrow[0] = 0;
1765d2853540SHong Zhang   rows_i         = rows;
1766f99a636bSHong Zhang   den2sp_i       = den2sp;
1767b1683b59SHong Zhang 
1768854ce69bSBarry Smith   ierr = PetscMalloc1(nis+1,&colorforcol);CHKERRQ(ierr);
1769854ce69bSBarry Smith   ierr = PetscMalloc1(Nbs+1,&columns);CHKERRQ(ierr);
17702205254eSKarl Rupp 
1771d2853540SHong Zhang   colorforcol[0] = 0;
1772d2853540SHong Zhang   columns_i      = columns;
1773d2853540SHong Zhang 
1774077f23c2SHong Zhang   /* get column-wise storage of mat */
1775077f23c2SHong Zhang   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
1776b1683b59SHong Zhang 
1777b28d80bdSHong Zhang   cm   = c->m;
1778854ce69bSBarry Smith   ierr = PetscMalloc1(cm+1,&rowhit);CHKERRQ(ierr);
1779854ce69bSBarry Smith   ierr = PetscMalloc1(cm+1,&idxhit);CHKERRQ(ierr);
1780f99a636bSHong Zhang   for (i=0; i<nis; i++) { /* loop over color */
1781b1683b59SHong Zhang     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
1782b1683b59SHong Zhang     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
17832205254eSKarl Rupp 
1784b1683b59SHong Zhang     c->ncolumns[i] = n;
1785b1683b59SHong Zhang     if (n) {
1786580bdb30SBarry Smith       ierr = PetscArraycpy(columns_i,is,n);CHKERRQ(ierr);
1787b1683b59SHong Zhang     }
1788d2853540SHong Zhang     colorforcol[i+1] = colorforcol[i] + n;
1789d2853540SHong Zhang     columns_i       += n;
1790b1683b59SHong Zhang 
1791b1683b59SHong Zhang     /* fast, crude version requires O(N*N) work */
1792580bdb30SBarry Smith     ierr = PetscArrayzero(rowhit,cm);CHKERRQ(ierr);
1793e99be685SHong Zhang 
1794b7caf3d6SHong Zhang     for (j=0; j<n; j++) { /* loop over columns*/
1795b1683b59SHong Zhang       col     = is[j];
1796d2853540SHong Zhang       row_idx = cj + ci[col];
1797b1683b59SHong Zhang       m       = ci[col+1] - ci[col];
1798b7caf3d6SHong Zhang       for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */
1799e99be685SHong Zhang         idxhit[*row_idx]   = spidx[ci[col] + k];
1800d2853540SHong Zhang         rowhit[*row_idx++] = col + 1;
1801b1683b59SHong Zhang       }
1802b1683b59SHong Zhang     }
1803b1683b59SHong Zhang     /* count the number of hits */
1804b1683b59SHong Zhang     nrows = 0;
1805e8354b3eSHong Zhang     for (j=0; j<cm; j++) {
1806b1683b59SHong Zhang       if (rowhit[j]) nrows++;
1807b1683b59SHong Zhang     }
1808b1683b59SHong Zhang     c->nrows[i]      = nrows;
1809d2853540SHong Zhang     colorforrow[i+1] = colorforrow[i] + nrows;
1810d2853540SHong Zhang 
1811b1683b59SHong Zhang     nrows = 0;
1812b7caf3d6SHong Zhang     for (j=0; j<cm; j++) { /* loop over rows */
1813b1683b59SHong Zhang       if (rowhit[j]) {
1814d2853540SHong Zhang         rows_i[nrows]   = j;
181512b89a43SHong Zhang         den2sp_i[nrows] = idxhit[j];
1816b1683b59SHong Zhang         nrows++;
1817b1683b59SHong Zhang       }
1818b1683b59SHong Zhang     }
1819e88777f2SHong Zhang     den2sp_i += nrows;
1820077f23c2SHong Zhang 
1821b1683b59SHong Zhang     ierr    = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
1822f99a636bSHong Zhang     rows_i += nrows;
1823b1683b59SHong Zhang   }
18240298fd71SBarry Smith   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr);
1825b28d80bdSHong Zhang   ierr = PetscFree(rowhit);CHKERRQ(ierr);
1826071fcb05SBarry Smith   ierr = ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);CHKERRQ(ierr);
1827d2853540SHong Zhang   if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);
1828b28d80bdSHong Zhang 
1829d2853540SHong Zhang   c->colorforrow = colorforrow;
1830d2853540SHong Zhang   c->rows        = rows;
1831f99a636bSHong Zhang   c->den2sp      = den2sp;
1832d2853540SHong Zhang   c->colorforcol = colorforcol;
1833d2853540SHong Zhang   c->columns     = columns;
18342205254eSKarl Rupp 
1835f94ccd6cSHong Zhang   ierr = PetscFree(idxhit);CHKERRQ(ierr);
1836b1683b59SHong Zhang   PetscFunctionReturn(0);
1837b1683b59SHong Zhang }
1838d013fc79Sandi selinger 
18394222ddf1SHong Zhang /* --------------------------------------------------------------- */
18404222ddf1SHong Zhang static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1841df97dc6dSFande Kong {
18424222ddf1SHong Zhang   PetscErrorCode ierr;
18434222ddf1SHong Zhang   Mat_Product    *product = C->product;
18444222ddf1SHong Zhang   Mat            A=product->A,B=product->B;
18454222ddf1SHong Zhang 
1846df97dc6dSFande Kong   PetscFunctionBegin;
18474222ddf1SHong Zhang   if (C->ops->mattransposemultnumeric) {
18484222ddf1SHong Zhang     /* Alg: "outerproduct" */
18494222ddf1SHong Zhang     ierr = (C->ops->mattransposemultnumeric)(A,B,C);CHKERRQ(ierr);
18504222ddf1SHong Zhang   } else {
18514222ddf1SHong Zhang     /* Alg: "matmatmult" -- C = At*B */
18524222ddf1SHong Zhang     Mat_SeqAIJ          *c = (Mat_SeqAIJ*)C->data;
18534222ddf1SHong Zhang     Mat_MatTransMatMult *atb = c->atb;
18544222ddf1SHong Zhang     Mat                 At = atb->At;
18554222ddf1SHong Zhang 
18564222ddf1SHong Zhang     if (atb->updateAt) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */
18574222ddf1SHong Zhang       ierr = MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);CHKERRQ(ierr);
18584222ddf1SHong Zhang     }
18594222ddf1SHong Zhang     ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(At,B,C);CHKERRQ(ierr);
18604222ddf1SHong Zhang     atb->updateAt = PETSC_TRUE;
18614222ddf1SHong Zhang   }
1862df97dc6dSFande Kong   PetscFunctionReturn(0);
1863df97dc6dSFande Kong }
1864df97dc6dSFande Kong 
18654222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1866d013fc79Sandi selinger {
1867d013fc79Sandi selinger   PetscErrorCode ierr;
18684222ddf1SHong Zhang   Mat_Product    *product = C->product;
18694222ddf1SHong Zhang   Mat            A=product->A,B=product->B;
18704222ddf1SHong Zhang   PetscReal      fill=product->fill;
1871d013fc79Sandi selinger 
1872d013fc79Sandi selinger   PetscFunctionBegin;
18734222ddf1SHong Zhang   ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
18742869b61bSandi selinger 
18754222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
18764222ddf1SHong Zhang   PetscFunctionReturn(0);
18772869b61bSandi selinger }
1878d013fc79Sandi selinger 
18794222ddf1SHong Zhang /* --------------------------------------------------------------- */
18804222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
18814222ddf1SHong Zhang {
18824222ddf1SHong Zhang   PetscErrorCode ierr;
18834222ddf1SHong Zhang   Mat_Product    *product = C->product;
18844222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
18854222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
18864222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
18874222ddf1SHong Zhang   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
18884222ddf1SHong Zhang   PetscInt       nalg = 7;
18894222ddf1SHong Zhang #else
18904222ddf1SHong Zhang   const char     *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"};
18914222ddf1SHong Zhang   PetscInt       nalg = 8;
18924222ddf1SHong Zhang #endif
18934222ddf1SHong Zhang 
18944222ddf1SHong Zhang   PetscFunctionBegin;
18954222ddf1SHong Zhang   /* Set default algorithm */
18964222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
18974222ddf1SHong Zhang   if (flg) {
18984222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
1899d013fc79Sandi selinger   }
1900d013fc79Sandi selinger 
19014222ddf1SHong Zhang   /* Get runtime option */
19024222ddf1SHong Zhang   if (product->api_user) {
19034222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
19044222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
19054222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19064222ddf1SHong Zhang   } else {
19074222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
19084222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
19094222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1910d013fc79Sandi selinger   }
19114222ddf1SHong Zhang   if (flg) {
19124222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
1913d013fc79Sandi selinger   }
1914d013fc79Sandi selinger 
19154222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
19164222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
19174222ddf1SHong Zhang   PetscFunctionReturn(0);
19184222ddf1SHong Zhang }
1919d013fc79Sandi selinger 
19204222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
19214222ddf1SHong Zhang {
19224222ddf1SHong Zhang   PetscErrorCode ierr;
19234222ddf1SHong Zhang   Mat_Product    *product = C->product;
19244222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
19254222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
19264222ddf1SHong Zhang   const char     *algTypes[2] = {"at*b","outerproduct"};
19274222ddf1SHong Zhang   PetscInt       nalg = 2;
1928d013fc79Sandi selinger 
19294222ddf1SHong Zhang   PetscFunctionBegin;
19304222ddf1SHong Zhang   /* Set default algorithm */
19314222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
19324222ddf1SHong Zhang   if (flg) {
19334222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19344222ddf1SHong Zhang   }
1935d013fc79Sandi selinger 
19364222ddf1SHong Zhang   /* Get runtime option */
19374222ddf1SHong Zhang   if (product->api_user) {
19384222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
19394222ddf1SHong Zhang     ierr = PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19404222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19414222ddf1SHong Zhang   } else {
19424222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
19434222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19444222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19454222ddf1SHong Zhang   }
19464222ddf1SHong Zhang   if (flg) {
19474222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19484222ddf1SHong Zhang   }
1949d013fc79Sandi selinger 
19504222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
19514222ddf1SHong Zhang   PetscFunctionReturn(0);
19524222ddf1SHong Zhang }
19534222ddf1SHong Zhang 
19544222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
19554222ddf1SHong Zhang {
19564222ddf1SHong Zhang   PetscErrorCode ierr;
19574222ddf1SHong Zhang   Mat_Product    *product = C->product;
19584222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
19594222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
19604222ddf1SHong Zhang   const char     *algTypes[2] = {"default","color"};
19614222ddf1SHong Zhang   PetscInt       nalg = 2;
19624222ddf1SHong Zhang 
19634222ddf1SHong Zhang   PetscFunctionBegin;
19644222ddf1SHong Zhang   /* Set default algorithm */
19654222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
19664222ddf1SHong Zhang   if (!flg) {
19674222ddf1SHong Zhang     alg = 1;
19684222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19694222ddf1SHong Zhang   }
19704222ddf1SHong Zhang 
19714222ddf1SHong Zhang   /* Get runtime option */
19724222ddf1SHong Zhang   if (product->api_user) {
19734222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
19744222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19754222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19764222ddf1SHong Zhang   } else {
19774222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
19784222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
19794222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
19804222ddf1SHong Zhang   }
19814222ddf1SHong Zhang   if (flg) {
19824222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
19834222ddf1SHong Zhang   }
19844222ddf1SHong Zhang 
19854222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
19864222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
19874222ddf1SHong Zhang   PetscFunctionReturn(0);
19884222ddf1SHong Zhang }
19894222ddf1SHong Zhang 
19904222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
19914222ddf1SHong Zhang {
19924222ddf1SHong Zhang   PetscErrorCode ierr;
19934222ddf1SHong Zhang   Mat_Product    *product = C->product;
19944222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
19954222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm -- alg=1 should be default!!! */
19964222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
19974222ddf1SHong Zhang   const char      *algTypes[2] = {"scalable","rap"};
19984222ddf1SHong Zhang   PetscInt        nalg = 2;
19994222ddf1SHong Zhang #else
20004222ddf1SHong Zhang   const char      *algTypes[3] = {"scalable","rap","hypre"};
20014222ddf1SHong Zhang   PetscInt        nalg = 3;
20024222ddf1SHong Zhang #endif
20034222ddf1SHong Zhang 
20044222ddf1SHong Zhang   PetscFunctionBegin;
20054222ddf1SHong Zhang   /* Set default algorithm */
20064222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
20074222ddf1SHong Zhang   if (flg) {
20084222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20094222ddf1SHong Zhang   }
20104222ddf1SHong Zhang 
20114222ddf1SHong Zhang   /* Get runtime option */
20124222ddf1SHong Zhang   if (product->api_user) {
20134222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
20144222ddf1SHong Zhang     ierr = PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20154222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20164222ddf1SHong Zhang   } else {
20174222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
20184222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20194222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20204222ddf1SHong Zhang   }
20214222ddf1SHong Zhang   if (flg) {
20224222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20234222ddf1SHong Zhang   }
20244222ddf1SHong Zhang 
20254222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
20264222ddf1SHong Zhang   PetscFunctionReturn(0);
20274222ddf1SHong Zhang }
20284222ddf1SHong Zhang 
20294222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
20304222ddf1SHong Zhang {
20314222ddf1SHong Zhang   PetscErrorCode ierr;
20324222ddf1SHong Zhang   Mat_Product    *product = C->product;
20334222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
20344222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
20354222ddf1SHong Zhang   const char     *algTypes[3] = {"r*a*rt","r*art","coloring_rart"};
20364222ddf1SHong Zhang   PetscInt        nalg = 3;
20374222ddf1SHong Zhang 
20384222ddf1SHong Zhang   PetscFunctionBegin;
20394222ddf1SHong Zhang   /* Set default algorithm */
20404222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
20414222ddf1SHong Zhang   if (flg) {
20424222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20434222ddf1SHong Zhang   }
20444222ddf1SHong Zhang 
20454222ddf1SHong Zhang   /* Get runtime option */
20464222ddf1SHong Zhang   if (product->api_user) {
20474222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");CHKERRQ(ierr);
20484222ddf1SHong Zhang     ierr = PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20494222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20504222ddf1SHong Zhang   } else {
20514222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");CHKERRQ(ierr);
20524222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr);
20534222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20544222ddf1SHong Zhang   }
20554222ddf1SHong Zhang   if (flg) {
20564222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20574222ddf1SHong Zhang   }
20584222ddf1SHong Zhang 
20594222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
20604222ddf1SHong Zhang   PetscFunctionReturn(0);
20614222ddf1SHong Zhang }
20624222ddf1SHong Zhang 
20634222ddf1SHong Zhang /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
20644222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
20654222ddf1SHong Zhang {
20664222ddf1SHong Zhang   PetscErrorCode ierr;
20674222ddf1SHong Zhang   Mat_Product    *product = C->product;
20684222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
20694222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
20704222ddf1SHong Zhang   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
20714222ddf1SHong Zhang   PetscInt       nalg = 7;
20724222ddf1SHong Zhang 
20734222ddf1SHong Zhang   PetscFunctionBegin;
20744222ddf1SHong Zhang   /* Set default algorithm */
20754222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
20764222ddf1SHong Zhang   if (flg) {
20774222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20784222ddf1SHong Zhang   }
20794222ddf1SHong Zhang 
20804222ddf1SHong Zhang   /* Get runtime option */
20814222ddf1SHong Zhang   if (product->api_user) {
20824222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");CHKERRQ(ierr);
20834222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
20844222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20854222ddf1SHong Zhang   } else {
20864222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");CHKERRQ(ierr);
20874222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
20884222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
20894222ddf1SHong Zhang   }
20904222ddf1SHong Zhang   if (flg) {
20914222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
20924222ddf1SHong Zhang   }
20934222ddf1SHong Zhang 
20944222ddf1SHong Zhang   C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
20954222ddf1SHong Zhang   C->ops->productsymbolic    = MatProductSymbolic_ABC;
20964222ddf1SHong Zhang   PetscFunctionReturn(0);
20974222ddf1SHong Zhang }
20984222ddf1SHong Zhang 
20994222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
21004222ddf1SHong Zhang {
21014222ddf1SHong Zhang   PetscErrorCode ierr;
21024222ddf1SHong Zhang   Mat_Product    *product = C->product;
21034222ddf1SHong Zhang 
21044222ddf1SHong Zhang   PetscFunctionBegin;
21054222ddf1SHong Zhang   switch (product->type) {
21064222ddf1SHong Zhang   case MATPRODUCT_AB:
21074222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_AB(C);CHKERRQ(ierr);
21084222ddf1SHong Zhang     break;
21094222ddf1SHong Zhang   case MATPRODUCT_AtB:
21104222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_AtB(C);CHKERRQ(ierr);
21114222ddf1SHong Zhang     break;
21124222ddf1SHong Zhang   case MATPRODUCT_ABt:
21134222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_ABt(C);CHKERRQ(ierr);
21144222ddf1SHong Zhang     break;
21154222ddf1SHong Zhang   case MATPRODUCT_PtAP:
21164222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_PtAP(C);CHKERRQ(ierr);
21174222ddf1SHong Zhang     break;
21184222ddf1SHong Zhang   case MATPRODUCT_RARt:
21194222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_RARt(C);CHKERRQ(ierr);
21204222ddf1SHong Zhang     break;
21214222ddf1SHong Zhang   case MATPRODUCT_ABC:
21224222ddf1SHong Zhang     ierr = MatProductSetFromOptions_SeqAIJ_ABC(C);CHKERRQ(ierr);
21234222ddf1SHong Zhang     break;
2124*544a5e07SHong Zhang   default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqAIJ and SeqAIJ matrices",MatProductTypes[product->type]);
21254222ddf1SHong Zhang   }
2126d013fc79Sandi selinger   PetscFunctionReturn(0);
2127d013fc79Sandi selinger }
2128