xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 1baa6e3354bfe224b33a0c158f482508792a8a6e)
1be1d678aSKris Buschelman 
2d50806bdSBarry Smith /*
32499ec78SHong Zhang   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4d50806bdSBarry Smith           C = A * B
5d50806bdSBarry Smith */
6d50806bdSBarry Smith 
7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
9c6db04a5SJed Brown #include <petscbt.h>
10af0996ceSBarry Smith #include <petsc/private/isimpl.h>
1107475bc1SBarry Smith #include <../src/mat/impls/dense/seq/dense.h>
127bab7c10SHong Zhang 
13df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
14df97dc6dSFande Kong {
15df97dc6dSFande Kong   PetscFunctionBegin;
16df97dc6dSFande Kong   if (C->ops->matmultnumeric) {
1708401ef6SPierre Jolivet     PetscCheck(C->ops->matmultnumeric != MatMatMultNumeric_SeqAIJ_SeqAIJ,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Recursive call");
189566063dSJacob Faibussowitsch     PetscCall((*C->ops->matmultnumeric)(A,B,C));
19df97dc6dSFande Kong   } else {
209566063dSJacob Faibussowitsch     PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C));
21df97dc6dSFande Kong   }
22df97dc6dSFande Kong   PetscFunctionReturn(0);
23df97dc6dSFande Kong }
24df97dc6dSFande Kong 
254222ddf1SHong Zhang /* Modified from MatCreateSeqAIJWithArrays() */
26e4e71118SStefano Zampini PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat)
27df97dc6dSFande Kong {
284222ddf1SHong Zhang   PetscInt       ii;
294222ddf1SHong Zhang   Mat_SeqAIJ     *aij;
30cbc6b225SStefano Zampini   PetscBool      isseqaij, osingle, ofree_a, ofree_ij;
315c66b693SKris Buschelman 
325c66b693SKris Buschelman   PetscFunctionBegin;
33aed4548fSBarry Smith   PetscCheck(m <= 0 || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
349566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat,m,n,m,n));
354222ddf1SHong Zhang 
36e4e71118SStefano Zampini   if (!mtype) {
379566063dSJacob Faibussowitsch     PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij));
389566063dSJacob Faibussowitsch     if (!isseqaij) PetscCall(MatSetType(mat,MATSEQAIJ));
39e4e71118SStefano Zampini   } else {
409566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat,mtype));
41e4e71118SStefano Zampini   }
42cbc6b225SStefano Zampini 
434222ddf1SHong Zhang   aij  = (Mat_SeqAIJ*)(mat)->data;
44cbc6b225SStefano Zampini   osingle = aij->singlemalloc;
45cbc6b225SStefano Zampini   ofree_a = aij->free_a;
46cbc6b225SStefano Zampini   ofree_ij = aij->free_ij;
47cbc6b225SStefano Zampini   /* changes the free flags */
489566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL));
49cbc6b225SStefano Zampini 
509566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->ilen));
519566063dSJacob Faibussowitsch   PetscCall(PetscFree(aij->imax));
529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&aij->imax));
539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&aij->ilen));
54cbc6b225SStefano Zampini   for (ii=0,aij->nonzerorowcnt=0,aij->rmax = 0; ii<m; ii++) {
55cbc6b225SStefano Zampini     const PetscInt rnz = i[ii+1] - i[ii];
56cbc6b225SStefano Zampini     aij->nonzerorowcnt += !!rnz;
57cbc6b225SStefano Zampini     aij->rmax = PetscMax(aij->rmax,rnz);
58cbc6b225SStefano Zampini     aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii];
59cbc6b225SStefano Zampini   }
60cbc6b225SStefano Zampini   aij->maxnz = i[m];
61cbc6b225SStefano Zampini   aij->nz = i[m];
624222ddf1SHong Zhang 
63cbc6b225SStefano Zampini   if (osingle) {
649566063dSJacob Faibussowitsch     PetscCall(PetscFree3(aij->a,aij->j,aij->i));
65cbc6b225SStefano Zampini   } else {
669566063dSJacob Faibussowitsch     if (ofree_a)  PetscCall(PetscFree(aij->a));
679566063dSJacob Faibussowitsch     if (ofree_ij) PetscCall(PetscFree(aij->j));
689566063dSJacob Faibussowitsch     if (ofree_ij) PetscCall(PetscFree(aij->i));
69cbc6b225SStefano Zampini   }
704222ddf1SHong Zhang   aij->i            = i;
714222ddf1SHong Zhang   aij->j            = j;
724222ddf1SHong Zhang   aij->a            = a;
734222ddf1SHong Zhang   aij->nonew        = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */
74cbc6b225SStefano Zampini   /* default to not retain ownership */
75cbc6b225SStefano Zampini   aij->singlemalloc = PETSC_FALSE;
764222ddf1SHong Zhang   aij->free_a       = PETSC_FALSE;
774222ddf1SHong Zhang   aij->free_ij      = PETSC_FALSE;
789566063dSJacob Faibussowitsch   PetscCall(MatCheckCompressedRow(mat,aij->nonzerorowcnt,&aij->compressedrow,aij->i,m,0.6));
795c66b693SKris Buschelman   PetscFunctionReturn(0);
805c66b693SKris Buschelman }
811c24bd37SHong Zhang 
824222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
834222ddf1SHong Zhang {
844222ddf1SHong Zhang   Mat_Product         *product = C->product;
854222ddf1SHong Zhang   MatProductAlgorithm alg;
864222ddf1SHong Zhang   PetscBool           flg;
874222ddf1SHong Zhang 
884222ddf1SHong Zhang   PetscFunctionBegin;
894222ddf1SHong Zhang   if (product) {
904222ddf1SHong Zhang     alg = product->alg;
914222ddf1SHong Zhang   } else {
924222ddf1SHong Zhang     alg = "sorted";
934222ddf1SHong Zhang   }
944222ddf1SHong Zhang   /* sorted */
959566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"sorted",&flg));
964222ddf1SHong Zhang   if (flg) {
979566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C));
984222ddf1SHong Zhang     PetscFunctionReturn(0);
994222ddf1SHong Zhang   }
1004222ddf1SHong Zhang 
1014222ddf1SHong Zhang   /* scalable */
1029566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"scalable",&flg));
1034222ddf1SHong Zhang   if (flg) {
1049566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C));
1054222ddf1SHong Zhang     PetscFunctionReturn(0);
1064222ddf1SHong Zhang   }
1074222ddf1SHong Zhang 
1084222ddf1SHong Zhang   /* scalable_fast */
1099566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"scalable_fast",&flg));
1104222ddf1SHong Zhang   if (flg) {
1119566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C));
1124222ddf1SHong Zhang     PetscFunctionReturn(0);
1134222ddf1SHong Zhang   }
1144222ddf1SHong Zhang 
1154222ddf1SHong Zhang   /* heap */
1169566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"heap",&flg));
1174222ddf1SHong Zhang   if (flg) {
1189566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C));
1194222ddf1SHong Zhang     PetscFunctionReturn(0);
1204222ddf1SHong Zhang   }
1214222ddf1SHong Zhang 
1224222ddf1SHong Zhang   /* btheap */
1239566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"btheap",&flg));
1244222ddf1SHong Zhang   if (flg) {
1259566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C));
1264222ddf1SHong Zhang     PetscFunctionReturn(0);
1274222ddf1SHong Zhang   }
1284222ddf1SHong Zhang 
1294222ddf1SHong Zhang   /* llcondensed */
1309566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"llcondensed",&flg));
1314222ddf1SHong Zhang   if (flg) {
1329566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C));
1334222ddf1SHong Zhang     PetscFunctionReturn(0);
1344222ddf1SHong Zhang   }
1354222ddf1SHong Zhang 
1364222ddf1SHong Zhang   /* rowmerge */
1379566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"rowmerge",&flg));
1384222ddf1SHong Zhang   if (flg) {
1399566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C));
1404222ddf1SHong Zhang     PetscFunctionReturn(0);
1414222ddf1SHong Zhang   }
1424222ddf1SHong Zhang 
1434222ddf1SHong Zhang #if defined(PETSC_HAVE_HYPRE)
1449566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(alg,"hypre",&flg));
1454222ddf1SHong Zhang   if (flg) {
1469566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C));
1474222ddf1SHong Zhang     PetscFunctionReturn(0);
1484222ddf1SHong Zhang   }
1494222ddf1SHong Zhang #endif
1504222ddf1SHong Zhang 
1514222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1524222ddf1SHong Zhang }
1534222ddf1SHong Zhang 
1544222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C)
155b561aa9dSHong Zhang {
156b561aa9dSHong Zhang   Mat_SeqAIJ         *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
157971236abSHong Zhang   PetscInt           *ai=a->i,*bi=b->i,*ci,*cj;
158c1ba5575SJed Brown   PetscInt           am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
159b561aa9dSHong Zhang   PetscReal          afill;
160eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
161eca6b86aSHong Zhang   PetscTable         ta;
162fb908031SHong Zhang   PetscBT            lnkbt;
1630298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
164b561aa9dSHong Zhang 
165b561aa9dSHong Zhang   PetscFunctionBegin;
166bd958071SHong Zhang   /* Get ci and cj */
167bd958071SHong Zhang   /*---------------*/
168fb908031SHong Zhang   /* Allocate ci array, arrays for fill computation and */
169fb908031SHong Zhang   /* free space for accumulating nonzero column info */
1709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am+2,&ci));
171fb908031SHong Zhang   ci[0] = 0;
172fb908031SHong Zhang 
173fb908031SHong Zhang   /* create and initialize a linked list */
1749566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(bn,bn,&ta));
175c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
1769566063dSJacob Faibussowitsch   PetscCall(PetscTableGetCount(ta,&Crmax));
1779566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&ta));
178eca6b86aSHong Zhang 
1799566063dSJacob Faibussowitsch   PetscCall(PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt));
180fb908031SHong Zhang 
181fb908031SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1829566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space));
1832205254eSKarl Rupp 
184fb908031SHong Zhang   current_space = free_space;
185fb908031SHong Zhang 
186fb908031SHong Zhang   /* Determine ci and cj */
187fb908031SHong Zhang   for (i=0; i<am; i++) {
188fb908031SHong Zhang     anzi = ai[i+1] - ai[i];
189fb908031SHong Zhang     aj   = a->j + ai[i];
190fb908031SHong Zhang     for (j=0; j<anzi; j++) {
191fb908031SHong Zhang       brow = aj[j];
192fb908031SHong Zhang       bnzj = bi[brow+1] - bi[brow];
193fb908031SHong Zhang       bj   = b->j + bi[brow];
194fb908031SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
1959566063dSJacob Faibussowitsch       PetscCall(PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt));
196fb908031SHong Zhang     }
1978c78258cSHong Zhang     /* add possible missing diagonal entry */
1988c78258cSHong Zhang     if (C->force_diagonals) {
1999566063dSJacob Faibussowitsch       PetscCall(PetscLLCondensedAddSorted(1,&i,lnk,lnkbt));
2008c78258cSHong Zhang     }
201fb908031SHong Zhang     cnzi = lnk[0];
202fb908031SHong Zhang 
203fb908031SHong Zhang     /* If free space is not available, make more free space */
204fb908031SHong Zhang     /* Double the amount of total space in the list */
205fb908031SHong Zhang     if (current_space->local_remaining<cnzi) {
2069566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space));
207fb908031SHong Zhang       ndouble++;
208fb908031SHong Zhang     }
209fb908031SHong Zhang 
210fb908031SHong Zhang     /* Copy data into free space, then initialize lnk */
2119566063dSJacob Faibussowitsch     PetscCall(PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt));
2122205254eSKarl Rupp 
213fb908031SHong Zhang     current_space->array           += cnzi;
214fb908031SHong Zhang     current_space->local_used      += cnzi;
215fb908031SHong Zhang     current_space->local_remaining -= cnzi;
2162205254eSKarl Rupp 
217fb908031SHong Zhang     ci[i+1] = ci[i] + cnzi;
218fb908031SHong Zhang   }
219fb908031SHong Zhang 
220fb908031SHong Zhang   /* Column indices are in the list of free space */
221fb908031SHong Zhang   /* Allocate space for cj, initialize cj, and */
222fb908031SHong Zhang   /* destroy list of free space and other temporary array(s) */
2239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[am]+1,&cj));
2249566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
2259566063dSJacob Faibussowitsch   PetscCall(PetscLLCondensedDestroy(lnk,lnkbt));
226b561aa9dSHong Zhang 
22726be0446SHong Zhang   /* put together the new symbolic matrix */
2289566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C));
2299566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
23058c24d83SHong Zhang 
23158c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
23258c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
2334222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
234aa1aec99SHong Zhang   c->free_a  = PETSC_FALSE;
235e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
23658c24d83SHong Zhang   c->nonew   = 0;
2374222ddf1SHong Zhang 
2384222ddf1SHong Zhang   /* fast, needs non-scalable O(bn) array 'abdense' */
2394222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
2400b7e3e3dSHong Zhang 
2417212da7cSHong Zhang   /* set MatInfo */
2427212da7cSHong Zhang   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
243f2b054eeSHong Zhang   if (afill < 1.0) afill = 1.0;
2444222ddf1SHong Zhang   C->info.mallocs           = ndouble;
2454222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
2464222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
2477212da7cSHong Zhang 
2487212da7cSHong Zhang #if defined(PETSC_USE_INFO)
2497212da7cSHong Zhang   if (ci[am]) {
2509566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill));
2519566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill));
252f2b054eeSHong Zhang   } else {
2539566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Empty matrix product\n"));
254be0fcf8dSHong Zhang   }
255f2b054eeSHong Zhang #endif
25658c24d83SHong Zhang   PetscFunctionReturn(0);
25758c24d83SHong Zhang }
258d50806bdSBarry Smith 
259df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C)
260d50806bdSBarry Smith {
261d13dce4bSSatish Balay   PetscLogDouble    flops=0.0;
262d50806bdSBarry Smith   Mat_SeqAIJ        *a   = (Mat_SeqAIJ*)A->data;
263d50806bdSBarry Smith   Mat_SeqAIJ        *b   = (Mat_SeqAIJ*)B->data;
264d50806bdSBarry Smith   Mat_SeqAIJ        *c   = (Mat_SeqAIJ*)C->data;
26538baddfdSBarry Smith   PetscInt          *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
266c58ca2e3SHong Zhang   PetscInt          am   =A->rmap->n,cm=C->rmap->n;
267519eb980SHong Zhang   PetscInt          i,j,k,anzi,bnzi,cnzi,brow;
2682e5835c6SStefano Zampini   PetscScalar       *ca,valtmp;
269aa1aec99SHong Zhang   PetscScalar       *ab_dense;
2706718818eSStefano Zampini   PetscContainer    cab_dense;
2712e5835c6SStefano Zampini   const PetscScalar *aa,*ba,*baj;
272d50806bdSBarry Smith 
273d50806bdSBarry Smith   PetscFunctionBegin;
2749566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A,&aa));
2759566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B,&ba));
276aa1aec99SHong Zhang   if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
2779566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ci[cm]+1,&ca));
278aa1aec99SHong Zhang     c->a      = ca;
279aa1aec99SHong Zhang     c->free_a = PETSC_TRUE;
2806718818eSStefano Zampini   } else ca = c->a;
2816718818eSStefano Zampini 
2826718818eSStefano Zampini   /* TODO this should be done in the symbolic phase */
2836718818eSStefano Zampini   /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers
2846718818eSStefano Zampini      that is hard to eradicate) */
2859566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense));
2866718818eSStefano Zampini   if (!cab_dense) {
2879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(B->cmap->N,&ab_dense));
2889566063dSJacob Faibussowitsch     PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&cab_dense));
2899566063dSJacob Faibussowitsch     PetscCall(PetscContainerSetPointer(cab_dense,ab_dense));
2909566063dSJacob Faibussowitsch     PetscCall(PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault));
2919566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense));
2929566063dSJacob Faibussowitsch     PetscCall(PetscObjectDereference((PetscObject)cab_dense));
293d90d86d0SMatthew G. Knepley   }
2949566063dSJacob Faibussowitsch   PetscCall(PetscContainerGetPointer(cab_dense,(void**)&ab_dense));
2959566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ab_dense,B->cmap->N));
296aa1aec99SHong Zhang 
297c124e916SHong Zhang   /* clean old values in C */
2989566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca,ci[cm]));
299d50806bdSBarry Smith   /* Traverse A row-wise. */
300d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
301d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
302d50806bdSBarry Smith   for (i=0; i<am; i++) {
303d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
304d50806bdSBarry Smith     for (j=0; j<anzi; j++) {
305519eb980SHong Zhang       brow = aj[j];
306d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
307d50806bdSBarry Smith       bjj  = bj + bi[brow];
308d50806bdSBarry Smith       baj  = ba + bi[brow];
309519eb980SHong Zhang       /* perform dense axpy */
31036ec6d2dSHong Zhang       valtmp = aa[j];
311519eb980SHong Zhang       for (k=0; k<bnzi; k++) {
31236ec6d2dSHong Zhang         ab_dense[bjj[k]] += valtmp*baj[k];
313519eb980SHong Zhang       }
314519eb980SHong Zhang       flops += 2*bnzi;
315519eb980SHong Zhang     }
316c58ca2e3SHong Zhang     aj += anzi; aa += anzi;
317c58ca2e3SHong Zhang 
318c58ca2e3SHong Zhang     cnzi = ci[i+1] - ci[i];
319519eb980SHong Zhang     for (k=0; k<cnzi; k++) {
320519eb980SHong Zhang       ca[k]          += ab_dense[cj[k]];
321519eb980SHong Zhang       ab_dense[cj[k]] = 0.0; /* zero ab_dense */
322519eb980SHong Zhang     }
323519eb980SHong Zhang     flops += cnzi;
324519eb980SHong Zhang     cj    += cnzi; ca += cnzi;
325519eb980SHong Zhang   }
3262e5835c6SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
3272e5835c6SStefano Zampini   if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU;
3282e5835c6SStefano Zampini #endif
3299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
3309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
3319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(flops));
3329566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A,&aa));
3339566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B,&ba));
334c58ca2e3SHong Zhang   PetscFunctionReturn(0);
335c58ca2e3SHong Zhang }
336c58ca2e3SHong Zhang 
33725023028SHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
338c58ca2e3SHong Zhang {
339c58ca2e3SHong Zhang   PetscLogDouble    flops=0.0;
340c58ca2e3SHong Zhang   Mat_SeqAIJ        *a   = (Mat_SeqAIJ*)A->data;
341c58ca2e3SHong Zhang   Mat_SeqAIJ        *b   = (Mat_SeqAIJ*)B->data;
342c58ca2e3SHong Zhang   Mat_SeqAIJ        *c   = (Mat_SeqAIJ*)C->data;
343c58ca2e3SHong Zhang   PetscInt          *ai  = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
344c58ca2e3SHong Zhang   PetscInt          am   = A->rmap->N,cm=C->rmap->N;
345c58ca2e3SHong Zhang   PetscInt          i,j,k,anzi,bnzi,cnzi,brow;
3462e5835c6SStefano Zampini   PetscScalar       *ca=c->a,valtmp;
3472e5835c6SStefano Zampini   const PetscScalar *aa,*ba,*baj;
348c58ca2e3SHong Zhang   PetscInt          nextb;
349c58ca2e3SHong Zhang 
350c58ca2e3SHong Zhang   PetscFunctionBegin;
3519566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A,&aa));
3529566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(B,&ba));
353cf742fccSHong Zhang   if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
3549566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ci[cm]+1,&ca));
355cf742fccSHong Zhang     c->a      = ca;
356cf742fccSHong Zhang     c->free_a = PETSC_TRUE;
357cf742fccSHong Zhang   }
358cf742fccSHong Zhang 
359c58ca2e3SHong Zhang   /* clean old values in C */
3609566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca,ci[cm]));
361c58ca2e3SHong Zhang   /* Traverse A row-wise. */
362c58ca2e3SHong Zhang   /* Build the ith row in C by summing over nonzero columns in A, */
363c58ca2e3SHong Zhang   /* the rows of B corresponding to nonzeros of A. */
364519eb980SHong Zhang   for (i=0; i<am; i++) {
365519eb980SHong Zhang     anzi = ai[i+1] - ai[i];
366519eb980SHong Zhang     cnzi = ci[i+1] - ci[i];
367519eb980SHong Zhang     for (j=0; j<anzi; j++) {
368519eb980SHong Zhang       brow = aj[j];
369519eb980SHong Zhang       bnzi = bi[brow+1] - bi[brow];
370519eb980SHong Zhang       bjj  = bj + bi[brow];
371519eb980SHong Zhang       baj  = ba + bi[brow];
372519eb980SHong Zhang       /* perform sparse axpy */
37336ec6d2dSHong Zhang       valtmp = aa[j];
374c124e916SHong Zhang       nextb  = 0;
375c124e916SHong Zhang       for (k=0; nextb<bnzi; k++) {
376c124e916SHong Zhang         if (cj[k] == bjj[nextb]) { /* ccol == bcol */
37736ec6d2dSHong Zhang           ca[k] += valtmp*baj[nextb++];
378c124e916SHong Zhang         }
379d50806bdSBarry Smith       }
380d50806bdSBarry Smith       flops += 2*bnzi;
381d50806bdSBarry Smith     }
382519eb980SHong Zhang     aj += anzi; aa += anzi;
383519eb980SHong Zhang     cj += cnzi; ca += cnzi;
384d50806bdSBarry Smith   }
3852e5835c6SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
3862e5835c6SStefano Zampini   if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU;
3872e5835c6SStefano Zampini #endif
3889566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
3899566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
3909566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(flops));
3919566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A,&aa));
3929566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(B,&ba));
393d50806bdSBarry Smith   PetscFunctionReturn(0);
394d50806bdSBarry Smith }
395bc011b1eSHong Zhang 
3964222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C)
39725296bd5SBarry Smith {
39825296bd5SBarry Smith   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
39925296bd5SBarry Smith   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
4003c50cad2SHong Zhang   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
40125296bd5SBarry Smith   MatScalar          *ca;
40225296bd5SBarry Smith   PetscReal          afill;
403eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
404eca6b86aSHong Zhang   PetscTable         ta;
4050298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
40625296bd5SBarry Smith 
40725296bd5SBarry Smith   PetscFunctionBegin;
4083c50cad2SHong Zhang   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
4093c50cad2SHong Zhang   /*-----------------------------------------------------------------------------------------*/
4103c50cad2SHong Zhang   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
4119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am+2,&ci));
4123c50cad2SHong Zhang   ci[0] = 0;
4133c50cad2SHong Zhang 
4143c50cad2SHong Zhang   /* create and initialize a linked list */
4159566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(bn,bn,&ta));
416c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
4179566063dSJacob Faibussowitsch   PetscCall(PetscTableGetCount(ta,&Crmax));
4189566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&ta));
419eca6b86aSHong Zhang 
4209566063dSJacob Faibussowitsch   PetscCall(PetscLLCondensedCreate_fast(Crmax,&lnk));
4213c50cad2SHong Zhang 
4223c50cad2SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
4239566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space));
4243c50cad2SHong Zhang   current_space = free_space;
4253c50cad2SHong Zhang 
4263c50cad2SHong Zhang   /* Determine ci and cj */
4273c50cad2SHong Zhang   for (i=0; i<am; i++) {
4283c50cad2SHong Zhang     anzi = ai[i+1] - ai[i];
4293c50cad2SHong Zhang     aj   = a->j + ai[i];
4303c50cad2SHong Zhang     for (j=0; j<anzi; j++) {
4313c50cad2SHong Zhang       brow = aj[j];
4323c50cad2SHong Zhang       bnzj = bi[brow+1] - bi[brow];
4333c50cad2SHong Zhang       bj   = b->j + bi[brow];
4343c50cad2SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
4359566063dSJacob Faibussowitsch       PetscCall(PetscLLCondensedAddSorted_fast(bnzj,bj,lnk));
4363c50cad2SHong Zhang     }
4378c78258cSHong Zhang     /* add possible missing diagonal entry */
4388c78258cSHong Zhang     if (C->force_diagonals) {
4399566063dSJacob Faibussowitsch       PetscCall(PetscLLCondensedAddSorted_fast(1,&i,lnk));
4408c78258cSHong Zhang     }
4413c50cad2SHong Zhang     cnzi = lnk[1];
4423c50cad2SHong Zhang 
4433c50cad2SHong Zhang     /* If free space is not available, make more free space */
4443c50cad2SHong Zhang     /* Double the amount of total space in the list */
4453c50cad2SHong Zhang     if (current_space->local_remaining<cnzi) {
4469566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space));
4473c50cad2SHong Zhang       ndouble++;
4483c50cad2SHong Zhang     }
4493c50cad2SHong Zhang 
4503c50cad2SHong Zhang     /* Copy data into free space, then initialize lnk */
4519566063dSJacob Faibussowitsch     PetscCall(PetscLLCondensedClean_fast(cnzi,current_space->array,lnk));
4522205254eSKarl Rupp 
4533c50cad2SHong Zhang     current_space->array           += cnzi;
4543c50cad2SHong Zhang     current_space->local_used      += cnzi;
4553c50cad2SHong Zhang     current_space->local_remaining -= cnzi;
4562205254eSKarl Rupp 
4573c50cad2SHong Zhang     ci[i+1] = ci[i] + cnzi;
4583c50cad2SHong Zhang   }
4593c50cad2SHong Zhang 
4603c50cad2SHong Zhang   /* Column indices are in the list of free space */
4613c50cad2SHong Zhang   /* Allocate space for cj, initialize cj, and */
4623c50cad2SHong Zhang   /* destroy list of free space and other temporary array(s) */
4639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[am]+1,&cj));
4649566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
4659566063dSJacob Faibussowitsch   PetscCall(PetscLLCondensedDestroy_fast(lnk));
46625296bd5SBarry Smith 
46725296bd5SBarry Smith   /* Allocate space for ca */
4689566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[am]+1,&ca));
46925296bd5SBarry Smith 
47025296bd5SBarry Smith   /* put together the new symbolic matrix */
4719566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C));
4729566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
47325296bd5SBarry Smith 
47425296bd5SBarry Smith   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
47525296bd5SBarry Smith   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
4764222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
47725296bd5SBarry Smith   c->free_a  = PETSC_TRUE;
47825296bd5SBarry Smith   c->free_ij = PETSC_TRUE;
47925296bd5SBarry Smith   c->nonew   = 0;
4802205254eSKarl Rupp 
4814222ddf1SHong Zhang   /* slower, less memory */
4824222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
48325296bd5SBarry Smith 
48425296bd5SBarry Smith   /* set MatInfo */
48525296bd5SBarry Smith   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
48625296bd5SBarry Smith   if (afill < 1.0) afill = 1.0;
4874222ddf1SHong Zhang   C->info.mallocs           = ndouble;
4884222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
4894222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
49025296bd5SBarry Smith 
49125296bd5SBarry Smith #if defined(PETSC_USE_INFO)
49225296bd5SBarry Smith   if (ci[am]) {
4939566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill));
4949566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill));
49525296bd5SBarry Smith   } else {
4969566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Empty matrix product\n"));
49725296bd5SBarry Smith   }
49825296bd5SBarry Smith #endif
49925296bd5SBarry Smith   PetscFunctionReturn(0);
50025296bd5SBarry Smith }
50125296bd5SBarry Smith 
5024222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C)
503e9e4536cSHong Zhang {
504e9e4536cSHong Zhang   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
505bf9555e6SHong Zhang   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
50625c41797SHong Zhang   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
507e9e4536cSHong Zhang   MatScalar          *ca;
508e9e4536cSHong Zhang   PetscReal          afill;
509eca6b86aSHong Zhang   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
510eca6b86aSHong Zhang   PetscTable         ta;
5110298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
512e9e4536cSHong Zhang 
513e9e4536cSHong Zhang   PetscFunctionBegin;
514bd958071SHong Zhang   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
515bd958071SHong Zhang   /*---------------------------------------------------------------------------------------------*/
516bd958071SHong Zhang   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
5179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am+2,&ci));
518bd958071SHong Zhang   ci[0] = 0;
519bd958071SHong Zhang 
520bd958071SHong Zhang   /* create and initialize a linked list */
5219566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(bn,bn,&ta));
522c373ccc6SHong Zhang   MatRowMergeMax_SeqAIJ(b,bm,ta);
5239566063dSJacob Faibussowitsch   PetscCall(PetscTableGetCount(ta,&Crmax));
5249566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&ta));
5259566063dSJacob Faibussowitsch   PetscCall(PetscLLCondensedCreate_Scalable(Crmax,&lnk));
526bd958071SHong Zhang 
527bd958071SHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
5289566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space));
529bd958071SHong Zhang   current_space = free_space;
530bd958071SHong Zhang 
531bd958071SHong Zhang   /* Determine ci and cj */
532bd958071SHong Zhang   for (i=0; i<am; i++) {
533bd958071SHong Zhang     anzi = ai[i+1] - ai[i];
534bd958071SHong Zhang     aj   = a->j + ai[i];
535bd958071SHong Zhang     for (j=0; j<anzi; j++) {
536bd958071SHong Zhang       brow = aj[j];
537bd958071SHong Zhang       bnzj = bi[brow+1] - bi[brow];
538bd958071SHong Zhang       bj   = b->j + bi[brow];
539bd958071SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
5409566063dSJacob Faibussowitsch       PetscCall(PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk));
541bd958071SHong Zhang     }
5428c78258cSHong Zhang     /* add possible missing diagonal entry */
5438c78258cSHong Zhang     if (C->force_diagonals) {
5449566063dSJacob Faibussowitsch       PetscCall(PetscLLCondensedAddSorted_Scalable(1,&i,lnk));
5458c78258cSHong Zhang     }
5468c78258cSHong Zhang 
547bd958071SHong Zhang     cnzi = lnk[0];
548bd958071SHong Zhang 
549bd958071SHong Zhang     /* If free space is not available, make more free space */
550bd958071SHong Zhang     /* Double the amount of total space in the list */
551bd958071SHong Zhang     if (current_space->local_remaining<cnzi) {
5529566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space));
553bd958071SHong Zhang       ndouble++;
554bd958071SHong Zhang     }
555bd958071SHong Zhang 
556bd958071SHong Zhang     /* Copy data into free space, then initialize lnk */
5579566063dSJacob Faibussowitsch     PetscCall(PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk));
5582205254eSKarl Rupp 
559bd958071SHong Zhang     current_space->array           += cnzi;
560bd958071SHong Zhang     current_space->local_used      += cnzi;
561bd958071SHong Zhang     current_space->local_remaining -= cnzi;
5622205254eSKarl Rupp 
563bd958071SHong Zhang     ci[i+1] = ci[i] + cnzi;
564bd958071SHong Zhang   }
565bd958071SHong Zhang 
566bd958071SHong Zhang   /* Column indices are in the list of free space */
567bd958071SHong Zhang   /* Allocate space for cj, initialize cj, and */
568bd958071SHong Zhang   /* destroy list of free space and other temporary array(s) */
5699566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[am]+1,&cj));
5709566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
5719566063dSJacob Faibussowitsch   PetscCall(PetscLLCondensedDestroy_Scalable(lnk));
572e9e4536cSHong Zhang 
573e9e4536cSHong Zhang   /* Allocate space for ca */
574bd958071SHong Zhang   /*-----------------------*/
5759566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[am]+1,&ca));
576e9e4536cSHong Zhang 
577e9e4536cSHong Zhang   /* put together the new symbolic matrix */
5789566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C));
5799566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
580e9e4536cSHong Zhang 
581e9e4536cSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
582e9e4536cSHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
5834222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
584e9e4536cSHong Zhang   c->free_a  = PETSC_TRUE;
585e9e4536cSHong Zhang   c->free_ij = PETSC_TRUE;
586e9e4536cSHong Zhang   c->nonew   = 0;
5872205254eSKarl Rupp 
5884222ddf1SHong Zhang   /* slower, less memory */
5894222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
590e9e4536cSHong Zhang 
591e9e4536cSHong Zhang   /* set MatInfo */
592e9e4536cSHong Zhang   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
593e9e4536cSHong Zhang   if (afill < 1.0) afill = 1.0;
5944222ddf1SHong Zhang   C->info.mallocs           = ndouble;
5954222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
5964222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
597e9e4536cSHong Zhang 
598e9e4536cSHong Zhang #if defined(PETSC_USE_INFO)
599e9e4536cSHong Zhang   if (ci[am]) {
6009566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill));
6019566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill));
602e9e4536cSHong Zhang   } else {
6039566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Empty matrix product\n"));
604e9e4536cSHong Zhang   }
605e9e4536cSHong Zhang #endif
606e9e4536cSHong Zhang   PetscFunctionReturn(0);
607e9e4536cSHong Zhang }
608e9e4536cSHong Zhang 
6094222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C)
6100ced3a2bSJed Brown {
6110ced3a2bSJed Brown   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
6120ced3a2bSJed Brown   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
6130ced3a2bSJed Brown   PetscInt           *ci,*cj,*bb;
6140ced3a2bSJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
6150ced3a2bSJed Brown   PetscReal          afill;
6160ced3a2bSJed Brown   PetscInt           i,j,col,ndouble = 0;
6170298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
6180ced3a2bSJed Brown   PetscHeap          h;
6190ced3a2bSJed Brown 
6200ced3a2bSJed Brown   PetscFunctionBegin;
621cd093f1dSJed Brown   /* Get ci and cj - by merging sorted rows using a heap */
6220ced3a2bSJed Brown   /*---------------------------------------------------------------------------------------------*/
6230ced3a2bSJed Brown   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
6249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am+2,&ci));
6250ced3a2bSJed Brown   ci[0] = 0;
6260ced3a2bSJed Brown 
6270ced3a2bSJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
6289566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space));
6290ced3a2bSJed Brown   current_space = free_space;
6300ced3a2bSJed Brown 
6319566063dSJacob Faibussowitsch   PetscCall(PetscHeapCreate(a->rmax,&h));
6329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(a->rmax,&bb));
6330ced3a2bSJed Brown 
6340ced3a2bSJed Brown   /* Determine ci and cj */
6350ced3a2bSJed Brown   for (i=0; i<am; i++) {
6360ced3a2bSJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
6370ced3a2bSJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
6380ced3a2bSJed Brown     ci[i+1] = ci[i];
6390ced3a2bSJed Brown     /* Populate the min heap */
6400ced3a2bSJed Brown     for (j=0; j<anzi; j++) {
6410ced3a2bSJed Brown       bb[j] = bi[acol[j]];         /* bb points at the start of the row */
6420ced3a2bSJed Brown       if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
6439566063dSJacob Faibussowitsch         PetscCall(PetscHeapAdd(h,j,bj[bb[j]++]));
6440ced3a2bSJed Brown       }
6450ced3a2bSJed Brown     }
6460ced3a2bSJed Brown     /* Pick off the min element, adding it to free space */
6479566063dSJacob Faibussowitsch     PetscCall(PetscHeapPop(h,&j,&col));
6480ced3a2bSJed Brown     while (j >= 0) {
6490ced3a2bSJed Brown       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
6509566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space));
6510ced3a2bSJed Brown         ndouble++;
6520ced3a2bSJed Brown       }
6530ced3a2bSJed Brown       *(current_space->array++) = col;
6540ced3a2bSJed Brown       current_space->local_used++;
6550ced3a2bSJed Brown       current_space->local_remaining--;
6560ced3a2bSJed Brown       ci[i+1]++;
6570ced3a2bSJed Brown 
6580ced3a2bSJed Brown       /* stash if anything else remains in this row of B */
6599566063dSJacob Faibussowitsch       if (bb[j] < bi[acol[j]+1]) PetscCall(PetscHeapStash(h,j,bj[bb[j]++]));
6600ced3a2bSJed Brown       while (1) {               /* pop and stash any other rows of B that also had an entry in this column */
6610ced3a2bSJed Brown         PetscInt j2,col2;
6629566063dSJacob Faibussowitsch         PetscCall(PetscHeapPeek(h,&j2,&col2));
6630ced3a2bSJed Brown         if (col2 != col) break;
6649566063dSJacob Faibussowitsch         PetscCall(PetscHeapPop(h,&j2,&col2));
6659566063dSJacob Faibussowitsch         if (bb[j2] < bi[acol[j2]+1]) PetscCall(PetscHeapStash(h,j2,bj[bb[j2]++]));
6660ced3a2bSJed Brown       }
6670ced3a2bSJed Brown       /* Put any stashed elements back into the min heap */
6689566063dSJacob Faibussowitsch       PetscCall(PetscHeapUnstash(h));
6699566063dSJacob Faibussowitsch       PetscCall(PetscHeapPop(h,&j,&col));
6700ced3a2bSJed Brown     }
6710ced3a2bSJed Brown   }
6729566063dSJacob Faibussowitsch   PetscCall(PetscFree(bb));
6739566063dSJacob Faibussowitsch   PetscCall(PetscHeapDestroy(&h));
6740ced3a2bSJed Brown 
6750ced3a2bSJed Brown   /* Column indices are in the list of free space */
6760ced3a2bSJed Brown   /* Allocate space for cj, initialize cj, and */
6770ced3a2bSJed Brown   /* destroy list of free space and other temporary array(s) */
6789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[am],&cj));
6799566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
6800ced3a2bSJed Brown 
6810ced3a2bSJed Brown   /* put together the new symbolic matrix */
6829566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C));
6839566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
6840ced3a2bSJed Brown 
6850ced3a2bSJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6860ced3a2bSJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
6874222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
6880ced3a2bSJed Brown   c->free_a  = PETSC_TRUE;
6890ced3a2bSJed Brown   c->free_ij = PETSC_TRUE;
6900ced3a2bSJed Brown   c->nonew   = 0;
69126fbe8dcSKarl Rupp 
6924222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
6930ced3a2bSJed Brown 
6940ced3a2bSJed Brown   /* set MatInfo */
6950ced3a2bSJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
6960ced3a2bSJed Brown   if (afill < 1.0) afill = 1.0;
6974222ddf1SHong Zhang   C->info.mallocs           = ndouble;
6984222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
6994222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
7000ced3a2bSJed Brown 
7010ced3a2bSJed Brown #if defined(PETSC_USE_INFO)
7020ced3a2bSJed Brown   if (ci[am]) {
7039566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill));
7049566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill));
7050ced3a2bSJed Brown   } else {
7069566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Empty matrix product\n"));
7070ced3a2bSJed Brown   }
7080ced3a2bSJed Brown #endif
7090ced3a2bSJed Brown   PetscFunctionReturn(0);
7100ced3a2bSJed Brown }
711e9e4536cSHong Zhang 
7124222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C)
7138a07c6f1SJed Brown {
7148a07c6f1SJed Brown   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
7158a07c6f1SJed Brown   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
7168a07c6f1SJed Brown   PetscInt           *ci,*cj,*bb;
7178a07c6f1SJed Brown   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
7188a07c6f1SJed Brown   PetscReal          afill;
7198a07c6f1SJed Brown   PetscInt           i,j,col,ndouble = 0;
7200298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
7218a07c6f1SJed Brown   PetscHeap          h;
7228a07c6f1SJed Brown   PetscBT            bt;
7238a07c6f1SJed Brown 
7248a07c6f1SJed Brown   PetscFunctionBegin;
725cd093f1dSJed Brown   /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
7268a07c6f1SJed Brown   /*---------------------------------------------------------------------------------------------*/
7278a07c6f1SJed Brown   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
7289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am+2,&ci));
7298a07c6f1SJed Brown   ci[0] = 0;
7308a07c6f1SJed Brown 
7318a07c6f1SJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
7329566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space));
7332205254eSKarl Rupp 
7348a07c6f1SJed Brown   current_space = free_space;
7358a07c6f1SJed Brown 
7369566063dSJacob Faibussowitsch   PetscCall(PetscHeapCreate(a->rmax,&h));
7379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(a->rmax,&bb));
7389566063dSJacob Faibussowitsch   PetscCall(PetscBTCreate(bn,&bt));
7398a07c6f1SJed Brown 
7408a07c6f1SJed Brown   /* Determine ci and cj */
7418a07c6f1SJed Brown   for (i=0; i<am; i++) {
7428a07c6f1SJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
7438a07c6f1SJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
7448a07c6f1SJed Brown     const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
7458a07c6f1SJed Brown     ci[i+1] = ci[i];
7468a07c6f1SJed Brown     /* Populate the min heap */
7478a07c6f1SJed Brown     for (j=0; j<anzi; j++) {
7488a07c6f1SJed Brown       PetscInt brow = acol[j];
7498a07c6f1SJed Brown       for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
7508a07c6f1SJed Brown         PetscInt bcol = bj[bb[j]];
7518a07c6f1SJed Brown         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
7529566063dSJacob Faibussowitsch           PetscCall(PetscHeapAdd(h,j,bcol));
7538a07c6f1SJed Brown           bb[j]++;
7548a07c6f1SJed Brown           break;
7558a07c6f1SJed Brown         }
7568a07c6f1SJed Brown       }
7578a07c6f1SJed Brown     }
7588a07c6f1SJed Brown     /* Pick off the min element, adding it to free space */
7599566063dSJacob Faibussowitsch     PetscCall(PetscHeapPop(h,&j,&col));
7608a07c6f1SJed Brown     while (j >= 0) {
7618a07c6f1SJed Brown       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
7620298fd71SBarry Smith         fptr = NULL;                      /* need PetscBTMemzero */
7639566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space));
7648a07c6f1SJed Brown         ndouble++;
7658a07c6f1SJed Brown       }
7668a07c6f1SJed Brown       *(current_space->array++) = col;
7678a07c6f1SJed Brown       current_space->local_used++;
7688a07c6f1SJed Brown       current_space->local_remaining--;
7698a07c6f1SJed Brown       ci[i+1]++;
7708a07c6f1SJed Brown 
7718a07c6f1SJed Brown       /* stash if anything else remains in this row of B */
7728a07c6f1SJed Brown       for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
7738a07c6f1SJed Brown         PetscInt bcol = bj[bb[j]];
7748a07c6f1SJed Brown         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
7759566063dSJacob Faibussowitsch           PetscCall(PetscHeapAdd(h,j,bcol));
7768a07c6f1SJed Brown           bb[j]++;
7778a07c6f1SJed Brown           break;
7788a07c6f1SJed Brown         }
7798a07c6f1SJed Brown       }
7809566063dSJacob Faibussowitsch       PetscCall(PetscHeapPop(h,&j,&col));
7818a07c6f1SJed Brown     }
7828a07c6f1SJed Brown     if (fptr) {                 /* Clear the bits for this row */
7839566063dSJacob Faibussowitsch       for (; fptr<current_space->array; fptr++) PetscCall(PetscBTClear(bt,*fptr));
7848a07c6f1SJed Brown     } else {                    /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
7859566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(bn,bt));
7868a07c6f1SJed Brown     }
7878a07c6f1SJed Brown   }
7889566063dSJacob Faibussowitsch   PetscCall(PetscFree(bb));
7899566063dSJacob Faibussowitsch   PetscCall(PetscHeapDestroy(&h));
7909566063dSJacob Faibussowitsch   PetscCall(PetscBTDestroy(&bt));
7918a07c6f1SJed Brown 
7928a07c6f1SJed Brown   /* Column indices are in the list of free space */
7938a07c6f1SJed Brown   /* Allocate space for cj, initialize cj, and */
7948a07c6f1SJed Brown   /* destroy list of free space and other temporary array(s) */
7959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[am],&cj));
7969566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
7978a07c6f1SJed Brown 
7988a07c6f1SJed Brown   /* put together the new symbolic matrix */
7999566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C));
8009566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
8018a07c6f1SJed Brown 
8028a07c6f1SJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
8038a07c6f1SJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
8044222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
8058a07c6f1SJed Brown   c->free_a  = PETSC_TRUE;
8068a07c6f1SJed Brown   c->free_ij = PETSC_TRUE;
8078a07c6f1SJed Brown   c->nonew   = 0;
80826fbe8dcSKarl Rupp 
8094222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
8108a07c6f1SJed Brown 
8118a07c6f1SJed Brown   /* set MatInfo */
8128a07c6f1SJed Brown   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
8138a07c6f1SJed Brown   if (afill < 1.0) afill = 1.0;
8144222ddf1SHong Zhang   C->info.mallocs           = ndouble;
8154222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
8164222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
8178a07c6f1SJed Brown 
8188a07c6f1SJed Brown #if defined(PETSC_USE_INFO)
8198a07c6f1SJed Brown   if (ci[am]) {
8209566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill));
8219566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill));
8228a07c6f1SJed Brown   } else {
8239566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Empty matrix product\n"));
8248a07c6f1SJed Brown   }
8258a07c6f1SJed Brown #endif
8268a07c6f1SJed Brown   PetscFunctionReturn(0);
8278a07c6f1SJed Brown }
8288a07c6f1SJed Brown 
8294222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C)
830d7ed1a4dSandi selinger {
831d7ed1a4dSandi selinger   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
832d7ed1a4dSandi selinger   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1;
833d7ed1a4dSandi selinger   PetscInt           *ci,*cj,*outputj,worki_L1[9],worki_L2[9];
834d7ed1a4dSandi selinger   PetscInt           c_maxmem,a_maxrownnz=0,a_rownnz;
835d7ed1a4dSandi selinger   const PetscInt     workcol[8]={0,1,2,3,4,5,6,7};
836d7ed1a4dSandi selinger   const PetscInt     am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
837d7ed1a4dSandi selinger   const PetscInt     *brow_ptr[8],*brow_end[8];
838d7ed1a4dSandi selinger   PetscInt           window[8];
839d7ed1a4dSandi selinger   PetscInt           window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows;
840d7ed1a4dSandi selinger   PetscInt           i,k,ndouble=0,L1_rowsleft,rowsleft;
841d7ed1a4dSandi selinger   PetscReal          afill;
842f83700f2Sandi selinger   PetscInt           *workj_L1,*workj_L2,*workj_L3;
8437660c4dbSandi selinger   PetscInt           L1_nnz,L2_nnz;
844d7ed1a4dSandi selinger 
845d7ed1a4dSandi selinger   /* Step 1: Get upper bound on memory required for allocation.
846d7ed1a4dSandi selinger              Because of the way virtual memory works,
847d7ed1a4dSandi selinger              only the memory pages that are actually needed will be physically allocated. */
848d7ed1a4dSandi selinger   PetscFunctionBegin;
8499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am+1,&ci));
850d7ed1a4dSandi selinger   for (i=0; i<am; i++) {
851d7ed1a4dSandi selinger     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
852d7ed1a4dSandi selinger     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
853d7ed1a4dSandi selinger     a_rownnz = 0;
854d7ed1a4dSandi selinger     for (k=0; k<anzi; ++k) {
855d7ed1a4dSandi selinger       a_rownnz += bi[acol[k]+1] - bi[acol[k]];
856d7ed1a4dSandi selinger       if (a_rownnz > bn) {
857d7ed1a4dSandi selinger         a_rownnz = bn;
858d7ed1a4dSandi selinger         break;
859d7ed1a4dSandi selinger       }
860d7ed1a4dSandi selinger     }
861d7ed1a4dSandi selinger     a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
862d7ed1a4dSandi selinger   }
863d7ed1a4dSandi selinger   /* temporary work areas for merging rows */
8649566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L1));
8659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L2));
8669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(a_maxrownnz,&workj_L3));
867d7ed1a4dSandi selinger 
8687660c4dbSandi selinger   /* This should be enough for almost all matrices. If not, memory is reallocated later. */
8697660c4dbSandi selinger   c_maxmem = 8*(ai[am]+bi[bm]);
870d7ed1a4dSandi selinger   /* Step 2: Populate pattern for C */
8719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(c_maxmem,&cj));
872d7ed1a4dSandi selinger 
873d7ed1a4dSandi selinger   ci_nnz       = 0;
874d7ed1a4dSandi selinger   ci[0]        = 0;
875d7ed1a4dSandi selinger   worki_L1[0]  = 0;
876d7ed1a4dSandi selinger   worki_L2[0]  = 0;
877d7ed1a4dSandi selinger   for (i=0; i<am; i++) {
878d7ed1a4dSandi selinger     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
879d7ed1a4dSandi selinger     const PetscInt *acol = aj + ai[i];      /* column indices of nonzero entries in this row */
880d7ed1a4dSandi selinger     rowsleft             = anzi;
881d7ed1a4dSandi selinger     inputcol_L1          = acol;
882d7ed1a4dSandi selinger     L2_nnz               = 0;
8837660c4dbSandi selinger     L2_nrows             = 1;  /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1   */
8847660c4dbSandi selinger     worki_L2[1]          = 0;
88508fe1b3cSKarl Rupp     outputi_nnz          = 0;
886d7ed1a4dSandi selinger 
887d7ed1a4dSandi selinger     /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem  -> allocate more memory */
888d7ed1a4dSandi selinger     while (ci_nnz+a_maxrownnz > c_maxmem) {
889d7ed1a4dSandi selinger       c_maxmem *= 2;
890d7ed1a4dSandi selinger       ndouble++;
8919566063dSJacob Faibussowitsch       PetscCall(PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj));
892d7ed1a4dSandi selinger     }
893d7ed1a4dSandi selinger 
894d7ed1a4dSandi selinger     while (rowsleft) {
895d7ed1a4dSandi selinger       L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
896d7ed1a4dSandi selinger       L1_nrows    = 0;
8977660c4dbSandi selinger       L1_nnz      = 0;
898d7ed1a4dSandi selinger       inputcol    = inputcol_L1;
899d7ed1a4dSandi selinger       inputi      = bi;
900d7ed1a4dSandi selinger       inputj      = bj;
901d7ed1a4dSandi selinger 
902d7ed1a4dSandi selinger       /* The following macro is used to specialize for small rows in A.
903d7ed1a4dSandi selinger          This helps with compiler unrolling, improving performance substantially.
904f83700f2Sandi selinger           Input:  inputj   inputi  inputcol  bn
905d7ed1a4dSandi selinger           Output: outputj  outputi_nnz                       */
906d7ed1a4dSandi selinger        #define MatMatMultSymbolic_RowMergeMacro(ANNZ)                        \
907d7ed1a4dSandi selinger          window_min  = bn;                                                   \
9087660c4dbSandi selinger          outputi_nnz = 0;                                                    \
9097660c4dbSandi selinger          for (k=0; k<ANNZ; ++k) {                                            \
910d7ed1a4dSandi selinger            brow_ptr[k] = inputj + inputi[inputcol[k]];                       \
911d7ed1a4dSandi selinger            brow_end[k] = inputj + inputi[inputcol[k]+1];                     \
912d7ed1a4dSandi selinger            window[k]   = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn;   \
913d7ed1a4dSandi selinger            window_min  = PetscMin(window[k], window_min);                    \
914d7ed1a4dSandi selinger          }                                                                   \
915d7ed1a4dSandi selinger          while (window_min < bn) {                                           \
916d7ed1a4dSandi selinger            outputj[outputi_nnz++] = window_min;                              \
917d7ed1a4dSandi selinger            /* advance front and compute new minimum */                       \
918d7ed1a4dSandi selinger            old_window_min = window_min;                                      \
919d7ed1a4dSandi selinger            window_min = bn;                                                  \
920d7ed1a4dSandi selinger            for (k=0; k<ANNZ; ++k) {                                          \
921d7ed1a4dSandi selinger              if (window[k] == old_window_min) {                              \
922d7ed1a4dSandi selinger                brow_ptr[k]++;                                                \
923d7ed1a4dSandi selinger                window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
924d7ed1a4dSandi selinger              }                                                               \
925d7ed1a4dSandi selinger              window_min = PetscMin(window[k], window_min);                   \
926d7ed1a4dSandi selinger            }                                                                 \
927d7ed1a4dSandi selinger          }
928d7ed1a4dSandi selinger 
929d7ed1a4dSandi selinger       /************** L E V E L  1 ***************/
930d7ed1a4dSandi selinger       /* Merge up to 8 rows of B to L1 work array*/
931d7ed1a4dSandi selinger       while (L1_rowsleft) {
9327660c4dbSandi selinger         outputi_nnz = 0;
9337660c4dbSandi selinger         if (anzi > 8)  outputj = workj_L1 + L1_nnz;     /* Level 1 rowmerge*/
9347660c4dbSandi selinger         else           outputj = cj + ci_nnz;           /* Merge directly to C */
9357660c4dbSandi selinger 
936d7ed1a4dSandi selinger         switch (L1_rowsleft) {
937d7ed1a4dSandi selinger         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
938d7ed1a4dSandi selinger                  brow_end[0] = inputj + inputi[inputcol[0]+1];
939d7ed1a4dSandi selinger                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
940d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
941d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
942d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
943d7ed1a4dSandi selinger                  break;
944d7ed1a4dSandi selinger         case 2:  MatMatMultSymbolic_RowMergeMacro(2);
945d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
946d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
947d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
948d7ed1a4dSandi selinger                  break;
949d7ed1a4dSandi selinger         case 3: MatMatMultSymbolic_RowMergeMacro(3);
950d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
951d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
952d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
953d7ed1a4dSandi selinger                  break;
954d7ed1a4dSandi selinger         case 4:  MatMatMultSymbolic_RowMergeMacro(4);
955d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
956d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
957d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
958d7ed1a4dSandi selinger                  break;
959d7ed1a4dSandi selinger         case 5:  MatMatMultSymbolic_RowMergeMacro(5);
960d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
961d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
962d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
963d7ed1a4dSandi selinger                  break;
964d7ed1a4dSandi selinger         case 6:  MatMatMultSymbolic_RowMergeMacro(6);
965d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
966d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
967d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
968d7ed1a4dSandi selinger                  break;
969d7ed1a4dSandi selinger         case 7:  MatMatMultSymbolic_RowMergeMacro(7);
970d7ed1a4dSandi selinger                  inputcol    += L1_rowsleft;
971d7ed1a4dSandi selinger                  rowsleft    -= L1_rowsleft;
972d7ed1a4dSandi selinger                  L1_rowsleft  = 0;
973d7ed1a4dSandi selinger                  break;
974d7ed1a4dSandi selinger         default: MatMatMultSymbolic_RowMergeMacro(8);
975d7ed1a4dSandi selinger                  inputcol    += 8;
976d7ed1a4dSandi selinger                  rowsleft    -= 8;
977d7ed1a4dSandi selinger                  L1_rowsleft -= 8;
978d7ed1a4dSandi selinger                  break;
979d7ed1a4dSandi selinger         }
980d7ed1a4dSandi selinger         inputcol_L1           = inputcol;
9817660c4dbSandi selinger         L1_nnz               += outputi_nnz;
9827660c4dbSandi selinger         worki_L1[++L1_nrows]  = L1_nnz;
983d7ed1a4dSandi selinger       }
984d7ed1a4dSandi selinger 
985d7ed1a4dSandi selinger       /********************** L E V E L  2 ************************/
986d7ed1a4dSandi selinger       /* Merge from L1 work array to either C or to L2 work array */
987d7ed1a4dSandi selinger       if (anzi > 8) {
988d7ed1a4dSandi selinger         inputi      = worki_L1;
989d7ed1a4dSandi selinger         inputj      = workj_L1;
990d7ed1a4dSandi selinger         inputcol    = workcol;
991d7ed1a4dSandi selinger         outputi_nnz = 0;
992d7ed1a4dSandi selinger 
993d7ed1a4dSandi selinger         if (anzi <= 64) outputj = cj + ci_nnz;        /* Merge from L1 work array to C */
994d7ed1a4dSandi selinger         else            outputj = workj_L2 + L2_nnz;  /* Merge from L1 work array to L2 work array */
995d7ed1a4dSandi selinger 
996d7ed1a4dSandi selinger         switch (L1_nrows) {
997d7ed1a4dSandi selinger         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
998d7ed1a4dSandi selinger                  brow_end[0] = inputj + inputi[inputcol[0]+1];
999d7ed1a4dSandi selinger                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
1000d7ed1a4dSandi selinger                  break;
1001d7ed1a4dSandi selinger         case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
1002d7ed1a4dSandi selinger         case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
1003d7ed1a4dSandi selinger         case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
1004d7ed1a4dSandi selinger         case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
1005d7ed1a4dSandi selinger         case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
1006d7ed1a4dSandi selinger         case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
1007d7ed1a4dSandi selinger         case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
1008d7ed1a4dSandi selinger         default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
1009d7ed1a4dSandi selinger         }
1010d7ed1a4dSandi selinger         L2_nnz               += outputi_nnz;
1011d7ed1a4dSandi selinger         worki_L2[++L2_nrows]  = L2_nnz;
1012d7ed1a4dSandi selinger 
10137660c4dbSandi selinger         /************************ L E V E L  3 **********************/
10147660c4dbSandi selinger         /* Merge from L2 work array to either C or to L2 work array */
1015d7ed1a4dSandi selinger         if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
1016d7ed1a4dSandi selinger           inputi      = worki_L2;
1017d7ed1a4dSandi selinger           inputj      = workj_L2;
1018d7ed1a4dSandi selinger           inputcol    = workcol;
1019d7ed1a4dSandi selinger           outputi_nnz = 0;
1020f83700f2Sandi selinger           if (rowsleft) outputj = workj_L3;
1021d7ed1a4dSandi selinger           else          outputj = cj + ci_nnz;
1022d7ed1a4dSandi selinger           switch (L2_nrows) {
1023d7ed1a4dSandi selinger           case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
1024d7ed1a4dSandi selinger                    brow_end[0] = inputj + inputi[inputcol[0]+1];
1025d7ed1a4dSandi selinger                    for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
1026d7ed1a4dSandi selinger                    break;
1027d7ed1a4dSandi selinger           case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
1028d7ed1a4dSandi selinger           case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
1029d7ed1a4dSandi selinger           case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
1030d7ed1a4dSandi selinger           case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
1031d7ed1a4dSandi selinger           case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
1032d7ed1a4dSandi selinger           case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
1033d7ed1a4dSandi selinger           case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
1034d7ed1a4dSandi selinger           default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1035d7ed1a4dSandi selinger           }
1036d7ed1a4dSandi selinger           L2_nrows    = 1;
10377660c4dbSandi selinger           L2_nnz      = outputi_nnz;
10387660c4dbSandi selinger           worki_L2[1] = outputi_nnz;
10397660c4dbSandi selinger           /* Copy to workj_L2 */
1040d7ed1a4dSandi selinger           if (rowsleft) {
10417660c4dbSandi selinger             for (k=0; k<outputi_nnz; ++k)  workj_L2[k] = outputj[k];
1042d7ed1a4dSandi selinger           }
1043d7ed1a4dSandi selinger         }
1044d7ed1a4dSandi selinger       }
1045d7ed1a4dSandi selinger     }  /* while (rowsleft) */
1046d7ed1a4dSandi selinger #undef MatMatMultSymbolic_RowMergeMacro
1047d7ed1a4dSandi selinger 
1048d7ed1a4dSandi selinger     /* terminate current row */
1049d7ed1a4dSandi selinger     ci_nnz += outputi_nnz;
1050d7ed1a4dSandi selinger     ci[i+1] = ci_nnz;
1051d7ed1a4dSandi selinger   }
1052d7ed1a4dSandi selinger 
1053d7ed1a4dSandi selinger   /* Step 3: Create the new symbolic matrix */
10549566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C));
10559566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
1056d7ed1a4dSandi selinger 
1057d7ed1a4dSandi selinger   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1058d7ed1a4dSandi selinger   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
10594222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
1060d7ed1a4dSandi selinger   c->free_a  = PETSC_TRUE;
1061d7ed1a4dSandi selinger   c->free_ij = PETSC_TRUE;
1062d7ed1a4dSandi selinger   c->nonew   = 0;
1063d7ed1a4dSandi selinger 
10644222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1065d7ed1a4dSandi selinger 
1066d7ed1a4dSandi selinger   /* set MatInfo */
1067d7ed1a4dSandi selinger   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1068d7ed1a4dSandi selinger   if (afill < 1.0) afill = 1.0;
10694222ddf1SHong Zhang   C->info.mallocs           = ndouble;
10704222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
10714222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
1072d7ed1a4dSandi selinger 
1073d7ed1a4dSandi selinger #if defined(PETSC_USE_INFO)
1074d7ed1a4dSandi selinger   if (ci[am]) {
10759566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill));
10769566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill));
1077d7ed1a4dSandi selinger   } else {
10789566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Empty matrix product\n"));
1079d7ed1a4dSandi selinger   }
1080d7ed1a4dSandi selinger #endif
1081d7ed1a4dSandi selinger 
1082d7ed1a4dSandi selinger   /* Step 4: Free temporary work areas */
10839566063dSJacob Faibussowitsch   PetscCall(PetscFree(workj_L1));
10849566063dSJacob Faibussowitsch   PetscCall(PetscFree(workj_L2));
10859566063dSJacob Faibussowitsch   PetscCall(PetscFree(workj_L3));
1086d7ed1a4dSandi selinger   PetscFunctionReturn(0);
1087d7ed1a4dSandi selinger }
1088d7ed1a4dSandi selinger 
1089cd093f1dSJed Brown /* concatenate unique entries and then sort */
10904222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C)
1091cd093f1dSJed Brown {
1092cd093f1dSJed Brown   Mat_SeqAIJ     *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
1093cd093f1dSJed Brown   const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
10948c78258cSHong Zhang   PetscInt       *ci,*cj,bcol;
1095cd093f1dSJed Brown   PetscInt       am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
1096cd093f1dSJed Brown   PetscReal      afill;
1097cd093f1dSJed Brown   PetscInt       i,j,ndouble = 0;
1098cd093f1dSJed Brown   PetscSegBuffer seg,segrow;
1099cd093f1dSJed Brown   char           *seen;
1100cd093f1dSJed Brown 
1101cd093f1dSJed Brown   PetscFunctionBegin;
11029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am+1,&ci));
1103cd093f1dSJed Brown   ci[0] = 0;
1104cd093f1dSJed Brown 
1105cd093f1dSJed Brown   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
11069566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg));
11079566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferCreate(sizeof(PetscInt),100,&segrow));
11089566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bn,&seen));
1109cd093f1dSJed Brown 
1110cd093f1dSJed Brown   /* Determine ci and cj */
1111cd093f1dSJed Brown   for (i=0; i<am; i++) {
1112cd093f1dSJed Brown     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1113cd093f1dSJed Brown     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1114cd093f1dSJed Brown     PetscInt packlen = 0,*PETSC_RESTRICT crow;
11158c78258cSHong Zhang 
1116cd093f1dSJed Brown     /* Pack segrow */
1117cd093f1dSJed Brown     for (j=0; j<anzi; j++) {
1118cd093f1dSJed Brown       PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
1119cd093f1dSJed Brown       for (k=bjstart; k<bjend; k++) {
11208c78258cSHong Zhang         bcol = bj[k];
1121cd093f1dSJed Brown         if (!seen[bcol]) { /* new entry */
1122cd093f1dSJed Brown           PetscInt *PETSC_RESTRICT slot;
11239566063dSJacob Faibussowitsch           PetscCall(PetscSegBufferGetInts(segrow,1,&slot));
1124cd093f1dSJed Brown           *slot = bcol;
1125cd093f1dSJed Brown           seen[bcol] = 1;
1126cd093f1dSJed Brown           packlen++;
1127cd093f1dSJed Brown         }
1128cd093f1dSJed Brown       }
1129cd093f1dSJed Brown     }
11308c78258cSHong Zhang 
11318c78258cSHong Zhang     /* Check i-th diagonal entry */
11328c78258cSHong Zhang     if (C->force_diagonals && !seen[i]) {
11338c78258cSHong Zhang       PetscInt *PETSC_RESTRICT slot;
11349566063dSJacob Faibussowitsch       PetscCall(PetscSegBufferGetInts(segrow,1,&slot));
11358c78258cSHong Zhang       *slot   = i;
11368c78258cSHong Zhang       seen[i] = 1;
11378c78258cSHong Zhang       packlen++;
11388c78258cSHong Zhang     }
11398c78258cSHong Zhang 
11409566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferGetInts(seg,packlen,&crow));
11419566063dSJacob Faibussowitsch     PetscCall(PetscSegBufferExtractTo(segrow,crow));
11429566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(packlen,crow));
1143cd093f1dSJed Brown     ci[i+1] = ci[i] + packlen;
1144cd093f1dSJed Brown     for (j=0; j<packlen; j++) seen[crow[j]] = 0;
1145cd093f1dSJed Brown   }
11469566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&segrow));
11479566063dSJacob Faibussowitsch   PetscCall(PetscFree(seen));
1148cd093f1dSJed Brown 
1149cd093f1dSJed Brown   /* Column indices are in the segmented buffer */
11509566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferExtractAlloc(seg,&cj));
11519566063dSJacob Faibussowitsch   PetscCall(PetscSegBufferDestroy(&seg));
1152cd093f1dSJed Brown 
1153cd093f1dSJed Brown   /* put together the new symbolic matrix */
11549566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C));
11559566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(C,A,B));
1156cd093f1dSJed Brown 
1157cd093f1dSJed Brown   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1158cd093f1dSJed Brown   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
11594222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
1160cd093f1dSJed Brown   c->free_a  = PETSC_TRUE;
1161cd093f1dSJed Brown   c->free_ij = PETSC_TRUE;
1162cd093f1dSJed Brown   c->nonew   = 0;
1163cd093f1dSJed Brown 
11644222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1165cd093f1dSJed Brown 
1166cd093f1dSJed Brown   /* set MatInfo */
11672a09556fSStefano Zampini   afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5;
1168cd093f1dSJed Brown   if (afill < 1.0) afill = 1.0;
11694222ddf1SHong Zhang   C->info.mallocs           = ndouble;
11704222ddf1SHong Zhang   C->info.fill_ratio_given  = fill;
11714222ddf1SHong Zhang   C->info.fill_ratio_needed = afill;
1172cd093f1dSJed Brown 
1173cd093f1dSJed Brown #if defined(PETSC_USE_INFO)
1174cd093f1dSJed Brown   if (ci[am]) {
11759566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill));
11769566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill));
1177cd093f1dSJed Brown   } else {
11789566063dSJacob Faibussowitsch     PetscCall(PetscInfo(C,"Empty matrix product\n"));
1179cd093f1dSJed Brown   }
1180cd093f1dSJed Brown #endif
1181cd093f1dSJed Brown   PetscFunctionReturn(0);
1182cd093f1dSJed Brown }
1183cd093f1dSJed Brown 
11846718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data)
11852128a86cSHong Zhang {
11866718818eSStefano Zampini   Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data;
11872128a86cSHong Zhang 
11882128a86cSHong Zhang   PetscFunctionBegin;
11899566063dSJacob Faibussowitsch   PetscCall(MatTransposeColoringDestroy(&abt->matcoloring));
11909566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&abt->Bt_den));
11919566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&abt->ABt_den));
11929566063dSJacob Faibussowitsch   PetscCall(PetscFree(abt));
11932128a86cSHong Zhang   PetscFunctionReturn(0);
11942128a86cSHong Zhang }
11952128a86cSHong Zhang 
11964222ddf1SHong Zhang PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1197bc011b1eSHong Zhang {
119881d82fe4SHong Zhang   Mat                 Bt;
119981d82fe4SHong Zhang   PetscInt            *bti,*btj;
120040192850SHong Zhang   Mat_MatMatTransMult *abt;
12014222ddf1SHong Zhang   Mat_Product         *product = C->product;
12026718818eSStefano Zampini   char                *alg;
1203d2853540SHong Zhang 
120481d82fe4SHong Zhang   PetscFunctionBegin;
120528b400f6SJacob Faibussowitsch   PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
120628b400f6SJacob Faibussowitsch   PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty");
12076718818eSStefano Zampini 
120881d82fe4SHong Zhang   /* create symbolic Bt */
12099566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj));
12109566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt));
12119566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs)));
12129566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bt,((PetscObject)A)->type_name));
121381d82fe4SHong Zhang 
121481d82fe4SHong Zhang   /* get symbolic C=A*Bt */
12159566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(product->alg,&alg));
12169566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(C,"sorted")); /* set algorithm for C = A*Bt */
12179566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C));
12189566063dSJacob Faibussowitsch   PetscCall(MatProductSetAlgorithm(C,alg)); /* resume original algorithm for ABt product */
12199566063dSJacob Faibussowitsch   PetscCall(PetscFree(alg));
122081d82fe4SHong Zhang 
1221a5b23f4aSJose E. Roman   /* create a supporting struct for reuse intermediate dense matrices with matcoloring */
12229566063dSJacob Faibussowitsch   PetscCall(PetscNew(&abt));
12232128a86cSHong Zhang 
12246718818eSStefano Zampini   product->data    = abt;
12256718818eSStefano Zampini   product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans;
12266718818eSStefano Zampini 
12274222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
12282128a86cSHong Zhang 
12294222ddf1SHong Zhang   abt->usecoloring = PETSC_FALSE;
12309566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"color",&abt->usecoloring));
123140192850SHong Zhang   if (abt->usecoloring) {
1232b9af6bddSHong Zhang     /* Create MatTransposeColoring from symbolic C=A*B^T */
1233b9af6bddSHong Zhang     MatTransposeColoring matcoloring;
1234335efc43SPeter Brune     MatColoring          coloring;
12352128a86cSHong Zhang     ISColoring           iscoloring;
12362128a86cSHong Zhang     Mat                  Bt_dense,C_dense;
1237e8354b3eSHong Zhang 
12384222ddf1SHong Zhang     /* inode causes memory problem */
12399566063dSJacob Faibussowitsch     PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE));
12404222ddf1SHong Zhang 
12419566063dSJacob Faibussowitsch     PetscCall(MatColoringCreate(C,&coloring));
12429566063dSJacob Faibussowitsch     PetscCall(MatColoringSetDistance(coloring,2));
12439566063dSJacob Faibussowitsch     PetscCall(MatColoringSetType(coloring,MATCOLORINGSL));
12449566063dSJacob Faibussowitsch     PetscCall(MatColoringSetFromOptions(coloring));
12459566063dSJacob Faibussowitsch     PetscCall(MatColoringApply(coloring,&iscoloring));
12469566063dSJacob Faibussowitsch     PetscCall(MatColoringDestroy(&coloring));
12479566063dSJacob Faibussowitsch     PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring));
12482205254eSKarl Rupp 
124940192850SHong Zhang     abt->matcoloring = matcoloring;
12502205254eSKarl Rupp 
12519566063dSJacob Faibussowitsch     PetscCall(ISColoringDestroy(&iscoloring));
12522128a86cSHong Zhang 
12532128a86cSHong Zhang     /* Create Bt_dense and C_dense = A*Bt_dense */
12549566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF,&Bt_dense));
12559566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors));
12569566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bt_dense,MATSEQDENSE));
12579566063dSJacob Faibussowitsch     PetscCall(MatSeqDenseSetPreallocation(Bt_dense,NULL));
12582205254eSKarl Rupp 
12592128a86cSHong Zhang     Bt_dense->assembled = PETSC_TRUE;
126040192850SHong Zhang     abt->Bt_den         = Bt_dense;
12612128a86cSHong Zhang 
12629566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF,&C_dense));
12639566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors));
12649566063dSJacob Faibussowitsch     PetscCall(MatSetType(C_dense,MATSEQDENSE));
12659566063dSJacob Faibussowitsch     PetscCall(MatSeqDenseSetPreallocation(C_dense,NULL));
12662205254eSKarl Rupp 
12672128a86cSHong Zhang     Bt_dense->assembled = PETSC_TRUE;
126840192850SHong Zhang     abt->ABt_den  = C_dense;
1269f94ccd6cSHong Zhang 
1270f94ccd6cSHong Zhang #if defined(PETSC_USE_INFO)
12711ce71dffSSatish Balay     {
12724222ddf1SHong Zhang       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
12739566063dSJacob Faibussowitsch       PetscCall(PetscInfo(C,"Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(double)(((PetscReal)(c->nz))/((PetscReal)(A->rmap->n*matcoloring->ncolors)))));
12741ce71dffSSatish Balay     }
1275f94ccd6cSHong Zhang #endif
12762128a86cSHong Zhang   }
1277e99be685SHong Zhang   /* clean up */
12789566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Bt));
12799566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj));
12805df89d91SHong Zhang   PetscFunctionReturn(0);
12815df89d91SHong Zhang }
12825df89d91SHong Zhang 
12836fc122caSHong Zhang PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
12845df89d91SHong Zhang {
12855df89d91SHong Zhang   Mat_SeqAIJ          *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1286e2cac8caSJed Brown   PetscInt            *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
128781d82fe4SHong Zhang   PetscInt            cm   =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
12885df89d91SHong Zhang   PetscLogDouble      flops=0.0;
1289aa1aec99SHong Zhang   MatScalar           *aa  =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
12906718818eSStefano Zampini   Mat_MatMatTransMult *abt;
12916718818eSStefano Zampini   Mat_Product         *product = C->product;
12925df89d91SHong Zhang 
12935df89d91SHong Zhang   PetscFunctionBegin;
129428b400f6SJacob Faibussowitsch   PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
12956718818eSStefano Zampini   abt = (Mat_MatMatTransMult *)product->data;
129628b400f6SJacob Faibussowitsch   PetscCheck(abt,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
129758ed3ceaSHong Zhang   /* clear old values in C */
129858ed3ceaSHong Zhang   if (!c->a) {
12999566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(ci[cm]+1,&ca));
130058ed3ceaSHong Zhang     c->a      = ca;
130158ed3ceaSHong Zhang     c->free_a = PETSC_TRUE;
130258ed3ceaSHong Zhang   } else {
130358ed3ceaSHong Zhang     ca =  c->a;
13049566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ca,ci[cm]+1));
130558ed3ceaSHong Zhang   }
130658ed3ceaSHong Zhang 
130740192850SHong Zhang   if (abt->usecoloring) {
130840192850SHong Zhang     MatTransposeColoring matcoloring = abt->matcoloring;
130940192850SHong Zhang     Mat                  Bt_dense,C_dense = abt->ABt_den;
1310c8db22f6SHong Zhang 
1311b9af6bddSHong Zhang     /* Get Bt_dense by Apply MatTransposeColoring to B */
131240192850SHong Zhang     Bt_dense = abt->Bt_den;
13139566063dSJacob Faibussowitsch     PetscCall(MatTransColoringApplySpToDen(matcoloring,B,Bt_dense));
1314c8db22f6SHong Zhang 
1315c8db22f6SHong Zhang     /* C_dense = A*Bt_dense */
13169566063dSJacob Faibussowitsch     PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense));
1317c8db22f6SHong Zhang 
13182128a86cSHong Zhang     /* Recover C from C_dense */
13199566063dSJacob Faibussowitsch     PetscCall(MatTransColoringApplyDenToSp(matcoloring,C_dense,C));
1320c8db22f6SHong Zhang     PetscFunctionReturn(0);
1321c8db22f6SHong Zhang   }
1322c8db22f6SHong Zhang 
13231fa1209cSHong Zhang   for (i=0; i<cm; i++) {
132481d82fe4SHong Zhang     anzi = ai[i+1] - ai[i];
132581d82fe4SHong Zhang     acol = aj + ai[i];
13266973769bSHong Zhang     aval = aa + ai[i];
13271fa1209cSHong Zhang     cnzi = ci[i+1] - ci[i];
13281fa1209cSHong Zhang     ccol = cj + ci[i];
13296973769bSHong Zhang     cval = ca + ci[i];
13301fa1209cSHong Zhang     for (j=0; j<cnzi; j++) {
133181d82fe4SHong Zhang       brow = ccol[j];
133281d82fe4SHong Zhang       bnzj = bi[brow+1] - bi[brow];
133381d82fe4SHong Zhang       bcol = bj + bi[brow];
13346973769bSHong Zhang       bval = ba + bi[brow];
13356973769bSHong Zhang 
133681d82fe4SHong Zhang       /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
133781d82fe4SHong Zhang       nexta = 0; nextb = 0;
133881d82fe4SHong Zhang       while (nexta<anzi && nextb<bnzj) {
13397b6d5e96SMark Adams         while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
134081d82fe4SHong Zhang         if (nexta == anzi) break;
13417b6d5e96SMark Adams         while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
134281d82fe4SHong Zhang         if (nextb == bnzj) break;
134381d82fe4SHong Zhang         if (acol[nexta] == bcol[nextb]) {
13446973769bSHong Zhang           cval[j] += aval[nexta]*bval[nextb];
134581d82fe4SHong Zhang           nexta++; nextb++;
134681d82fe4SHong Zhang           flops += 2;
13471fa1209cSHong Zhang         }
13481fa1209cSHong Zhang       }
134981d82fe4SHong Zhang     }
135081d82fe4SHong Zhang   }
13519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
13529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
13539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(flops));
13545df89d91SHong Zhang   PetscFunctionReturn(0);
13555df89d91SHong Zhang }
13565df89d91SHong Zhang 
13576718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data)
13586d373c3eSHong Zhang {
13596718818eSStefano Zampini   Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data;
13606d373c3eSHong Zhang 
13616d373c3eSHong Zhang   PetscFunctionBegin;
13629566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&atb->At));
1363*1baa6e33SBarry Smith   if (atb->destroy) PetscCall((*atb->destroy)(atb->data));
13649566063dSJacob Faibussowitsch   PetscCall(PetscFree(atb));
13656d373c3eSHong Zhang   PetscFunctionReturn(0);
13666d373c3eSHong Zhang }
13676d373c3eSHong Zhang 
13684222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
13695df89d91SHong Zhang {
1370089a957eSStefano Zampini   Mat            At = NULL;
137138baddfdSBarry Smith   PetscInt       *ati,*atj;
13724222ddf1SHong Zhang   Mat_Product    *product = C->product;
1373089a957eSStefano Zampini   PetscBool      flg,def,square;
1374bc011b1eSHong Zhang 
1375bc011b1eSHong Zhang   PetscFunctionBegin;
1376089a957eSStefano Zampini   MatCheckProduct(C,4);
1377089a957eSStefano Zampini   square = (PetscBool)(A == B && A->symmetric && A->symmetric_set);
13784222ddf1SHong Zhang   /* outerproduct */
13799566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"outerproduct",&flg));
13804222ddf1SHong Zhang   if (flg) {
1381bc011b1eSHong Zhang     /* create symbolic At */
1382089a957eSStefano Zampini     if (!square) {
13839566063dSJacob Faibussowitsch       PetscCall(MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj));
13849566063dSJacob Faibussowitsch       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At));
13859566063dSJacob Faibussowitsch       PetscCall(MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs)));
13869566063dSJacob Faibussowitsch       PetscCall(MatSetType(At,((PetscObject)A)->type_name));
1387089a957eSStefano Zampini     }
1388bc011b1eSHong Zhang     /* get symbolic C=At*B */
13899566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,"sorted"));
13909566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C));
1391bc011b1eSHong Zhang 
1392bc011b1eSHong Zhang     /* clean up */
1393089a957eSStefano Zampini     if (!square) {
13949566063dSJacob Faibussowitsch       PetscCall(MatDestroy(&At));
13959566063dSJacob Faibussowitsch       PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj));
1396089a957eSStefano Zampini     }
13976d373c3eSHong Zhang 
13984222ddf1SHong Zhang     C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
13999566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,"outerproduct"));
14004222ddf1SHong Zhang     PetscFunctionReturn(0);
14014222ddf1SHong Zhang   }
14024222ddf1SHong Zhang 
14034222ddf1SHong Zhang   /* matmatmult */
14049566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"default",&def));
14059566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"at*b",&flg));
1406089a957eSStefano Zampini   if (flg || def) {
14074222ddf1SHong Zhang     Mat_MatTransMatMult *atb;
14084222ddf1SHong Zhang 
140928b400f6SJacob Faibussowitsch     PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty");
14109566063dSJacob Faibussowitsch     PetscCall(PetscNew(&atb));
1411089a957eSStefano Zampini     if (!square) {
14129566063dSJacob Faibussowitsch       PetscCall(MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At));
1413089a957eSStefano Zampini     }
14149566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,"sorted"));
14159566063dSJacob Faibussowitsch     PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C));
14169566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,"at*b"));
14176718818eSStefano Zampini     product->data    = atb;
14186718818eSStefano Zampini     product->destroy = MatDestroy_SeqAIJ_MatTransMatMult;
14194222ddf1SHong Zhang     atb->At          = At;
14204222ddf1SHong Zhang     atb->updateAt    = PETSC_FALSE; /* because At is computed here */
14214222ddf1SHong Zhang 
14224222ddf1SHong Zhang     C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
14234222ddf1SHong Zhang     PetscFunctionReturn(0);
14244222ddf1SHong Zhang   }
14254222ddf1SHong Zhang 
14264222ddf1SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1427bc011b1eSHong Zhang }
1428bc011b1eSHong Zhang 
142975648e8dSHong Zhang PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1430bc011b1eSHong Zhang {
14310fbc74f4SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1432d0f46423SBarry Smith   PetscInt       am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1433d0f46423SBarry Smith   PetscInt       cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1434d13dce4bSSatish Balay   PetscLogDouble flops=0.0;
1435aa1aec99SHong Zhang   MatScalar      *aa=a->a,*ba,*ca,*caj;
1436bc011b1eSHong Zhang 
1437bc011b1eSHong Zhang   PetscFunctionBegin;
1438aa1aec99SHong Zhang   if (!c->a) {
14399566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(ci[cm]+1,&ca));
14402205254eSKarl Rupp 
1441aa1aec99SHong Zhang     c->a      = ca;
1442aa1aec99SHong Zhang     c->free_a = PETSC_TRUE;
1443aa1aec99SHong Zhang   } else {
1444aa1aec99SHong Zhang     ca   = c->a;
14459566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ca,ci[cm]));
1446aa1aec99SHong Zhang   }
1447bc011b1eSHong Zhang 
1448bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1449bc011b1eSHong Zhang   for (i=0; i<am; i++) {
1450bc011b1eSHong Zhang     bj   = b->j + bi[i];
1451bc011b1eSHong Zhang     ba   = b->a + bi[i];
1452bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
1453bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
1454bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
1455bc011b1eSHong Zhang       nextb = 0;
14560fbc74f4SHong Zhang       crow  = *aj++;
1457bc011b1eSHong Zhang       cjj   = cj + ci[crow];
1458bc011b1eSHong Zhang       caj   = ca + ci[crow];
1459bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
1460bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
14610fbc74f4SHong Zhang         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
14620fbc74f4SHong Zhang           caj[k] += (*aa)*(*(ba+nextb));
1463bc011b1eSHong Zhang           nextb++;
1464bc011b1eSHong Zhang         }
1465bc011b1eSHong Zhang       }
1466bc011b1eSHong Zhang       flops += 2*bnzi;
14670fbc74f4SHong Zhang       aa++;
1468bc011b1eSHong Zhang     }
1469bc011b1eSHong Zhang   }
1470bc011b1eSHong Zhang 
1471bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
14729566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
14739566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
14749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(flops));
1475bc011b1eSHong Zhang   PetscFunctionReturn(0);
1476bc011b1eSHong Zhang }
14779123193aSHong Zhang 
14784222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)
14799123193aSHong Zhang {
14809123193aSHong Zhang   PetscFunctionBegin;
14819566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C));
14824222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
14839123193aSHong Zhang   PetscFunctionReturn(0);
14849123193aSHong Zhang }
14859123193aSHong Zhang 
148693aa15f2SStefano Zampini PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add)
14879123193aSHong Zhang {
1488f32d5d43SBarry Smith   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
14891ca9667aSStefano Zampini   PetscScalar       *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4;
1490a4af7ca8SStefano Zampini   const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av;
1491daf57211SHong Zhang   const PetscInt    *aj;
149275f6d85dSStefano Zampini   PetscInt          cm=C->rmap->n,cn=B->cmap->n,bm,am=A->rmap->n;
149375f6d85dSStefano Zampini   PetscInt          clda;
149475f6d85dSStefano Zampini   PetscInt          am4,bm4,col,i,j,n;
14959123193aSHong Zhang 
14969123193aSHong Zhang   PetscFunctionBegin;
1497f32d5d43SBarry Smith   if (!cm || !cn) PetscFunctionReturn(0);
14989566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A,&av));
149993aa15f2SStefano Zampini   if (add) {
15009566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArray(C,&c));
150193aa15f2SStefano Zampini   } else {
15029566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayWrite(C,&c));
150393aa15f2SStefano Zampini   }
15049566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B,&b));
15059566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B,&bm));
15069566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(C,&clda));
150775f6d85dSStefano Zampini   am4 = 4*clda;
150875f6d85dSStefano Zampini   bm4 = 4*bm;
1509f32d5d43SBarry Smith   b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
15101ca9667aSStefano Zampini   c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda;
15111ca9667aSStefano Zampini   for (col=0; col<(cn/4)*4; col += 4) {  /* over columns of C */
15121ca9667aSStefano Zampini     for (i=0; i<am; i++) {        /* over rows of A in those columns */
1513f32d5d43SBarry Smith       r1 = r2 = r3 = r4 = 0.0;
1514f32d5d43SBarry Smith       n  = a->i[i+1] - a->i[i];
1515f32d5d43SBarry Smith       aj = a->j + a->i[i];
1516a4af7ca8SStefano Zampini       aa = av + a->i[i];
1517f32d5d43SBarry Smith       for (j=0; j<n; j++) {
15181ca9667aSStefano Zampini         const PetscScalar aatmp = aa[j];
15191ca9667aSStefano Zampini         const PetscInt    ajtmp = aj[j];
1520730858b9SHong Zhang         r1 += aatmp*b1[ajtmp];
1521730858b9SHong Zhang         r2 += aatmp*b2[ajtmp];
1522730858b9SHong Zhang         r3 += aatmp*b3[ajtmp];
1523730858b9SHong Zhang         r4 += aatmp*b4[ajtmp];
15249123193aSHong Zhang       }
152593aa15f2SStefano Zampini       if (add) {
152687753ddeSHong Zhang         c1[i] += r1;
152787753ddeSHong Zhang         c2[i] += r2;
152887753ddeSHong Zhang         c3[i] += r3;
152987753ddeSHong Zhang         c4[i] += r4;
153093aa15f2SStefano Zampini       } else {
153193aa15f2SStefano Zampini         c1[i] = r1;
153293aa15f2SStefano Zampini         c2[i] = r2;
153393aa15f2SStefano Zampini         c3[i] = r3;
153493aa15f2SStefano Zampini         c4[i] = r4;
153593aa15f2SStefano Zampini       }
1536f32d5d43SBarry Smith     }
1537730858b9SHong Zhang     b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4;
1538730858b9SHong Zhang     c1 += am4; c2 += am4; c3 += am4; c4 += am4;
1539f32d5d43SBarry Smith   }
154093aa15f2SStefano Zampini   /* process remaining columns */
154193aa15f2SStefano Zampini   if (col != cn) {
154293aa15f2SStefano Zampini     PetscInt rc = cn-col;
154393aa15f2SStefano Zampini 
154493aa15f2SStefano Zampini     if (rc == 1) {
154593aa15f2SStefano Zampini       for (i=0; i<am; i++) {
1546f32d5d43SBarry Smith         r1 = 0.0;
1547f32d5d43SBarry Smith         n  = a->i[i+1] - a->i[i];
1548f32d5d43SBarry Smith         aj = a->j + a->i[i];
1549a4af7ca8SStefano Zampini         aa = av + a->i[i];
155093aa15f2SStefano Zampini         for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]];
155193aa15f2SStefano Zampini         if (add) c1[i] += r1;
155293aa15f2SStefano Zampini         else c1[i] = r1;
155393aa15f2SStefano Zampini       }
155493aa15f2SStefano Zampini     } else if (rc == 2) {
155593aa15f2SStefano Zampini       for (i=0; i<am; i++) {
155693aa15f2SStefano Zampini         r1 = r2 = 0.0;
155793aa15f2SStefano Zampini         n  = a->i[i+1] - a->i[i];
155893aa15f2SStefano Zampini         aj = a->j + a->i[i];
155993aa15f2SStefano Zampini         aa = av + a->i[i];
1560f32d5d43SBarry Smith         for (j=0; j<n; j++) {
156193aa15f2SStefano Zampini           const PetscScalar aatmp = aa[j];
156293aa15f2SStefano Zampini           const PetscInt    ajtmp = aj[j];
156393aa15f2SStefano Zampini           r1 += aatmp*b1[ajtmp];
156493aa15f2SStefano Zampini           r2 += aatmp*b2[ajtmp];
1565f32d5d43SBarry Smith         }
156693aa15f2SStefano Zampini         if (add) {
156787753ddeSHong Zhang           c1[i] += r1;
156893aa15f2SStefano Zampini           c2[i] += r2;
156993aa15f2SStefano Zampini         } else {
157093aa15f2SStefano Zampini           c1[i] = r1;
157193aa15f2SStefano Zampini           c2[i] = r2;
1572f32d5d43SBarry Smith         }
157393aa15f2SStefano Zampini       }
157493aa15f2SStefano Zampini     } else {
157593aa15f2SStefano Zampini       for (i=0; i<am; i++) {
157693aa15f2SStefano Zampini         r1 = r2 = r3 = 0.0;
157793aa15f2SStefano Zampini         n  = a->i[i+1] - a->i[i];
157893aa15f2SStefano Zampini         aj = a->j + a->i[i];
157993aa15f2SStefano Zampini         aa = av + a->i[i];
158093aa15f2SStefano Zampini         for (j=0; j<n; j++) {
158193aa15f2SStefano Zampini           const PetscScalar aatmp = aa[j];
158293aa15f2SStefano Zampini           const PetscInt    ajtmp = aj[j];
158393aa15f2SStefano Zampini           r1 += aatmp*b1[ajtmp];
158493aa15f2SStefano Zampini           r2 += aatmp*b2[ajtmp];
158593aa15f2SStefano Zampini           r3 += aatmp*b3[ajtmp];
158693aa15f2SStefano Zampini         }
158793aa15f2SStefano Zampini         if (add) {
158893aa15f2SStefano Zampini           c1[i] += r1;
158993aa15f2SStefano Zampini           c2[i] += r2;
159093aa15f2SStefano Zampini           c3[i] += r3;
159193aa15f2SStefano Zampini         } else {
159293aa15f2SStefano Zampini           c1[i] = r1;
159393aa15f2SStefano Zampini           c2[i] = r2;
159493aa15f2SStefano Zampini           c3[i] = r3;
159593aa15f2SStefano Zampini         }
159693aa15f2SStefano Zampini       }
159793aa15f2SStefano Zampini     }
1598f32d5d43SBarry Smith   }
15999566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(cn*(2.0*a->nz)));
160093aa15f2SStefano Zampini   if (add) {
16019566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArray(C,&c));
160293aa15f2SStefano Zampini   } else {
16039566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayWrite(C,&c));
160493aa15f2SStefano Zampini   }
16059566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B,&b));
16069566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
1607f32d5d43SBarry Smith   PetscFunctionReturn(0);
1608f32d5d43SBarry Smith }
1609f32d5d43SBarry Smith 
161087753ddeSHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1611f32d5d43SBarry Smith {
1612f32d5d43SBarry Smith   PetscFunctionBegin;
161308401ef6SPierre Jolivet   PetscCheck(B->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,B->rmap->n);
161408401ef6SPierre Jolivet   PetscCheck(A->rmap->n == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,C->rmap->n,A->rmap->n);
161508401ef6SPierre Jolivet   PetscCheck(B->cmap->n == C->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT,B->cmap->n,C->cmap->n);
16164324174eSBarry Smith 
16179566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE));
16189123193aSHong Zhang   PetscFunctionReturn(0);
16199123193aSHong Zhang }
1620b1683b59SHong Zhang 
16214222ddf1SHong Zhang /* ------------------------------------------------------- */
16224222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
16234222ddf1SHong Zhang {
16244222ddf1SHong Zhang   PetscFunctionBegin;
16254222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
16264222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
16274222ddf1SHong Zhang   PetscFunctionReturn(0);
16284222ddf1SHong Zhang }
16294222ddf1SHong Zhang 
16306718818eSStefano Zampini PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat);
16316718818eSStefano Zampini 
16324222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
16334222ddf1SHong Zhang {
16344222ddf1SHong Zhang   PetscFunctionBegin;
16356718818eSStefano Zampini   C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
16364222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_AtB;
16376718818eSStefano Zampini   PetscFunctionReturn(0);
16386718818eSStefano Zampini }
16396718818eSStefano Zampini 
16406718818eSStefano Zampini static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C)
16416718818eSStefano Zampini {
16426718818eSStefano Zampini   PetscFunctionBegin;
16436718818eSStefano Zampini   C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
16446718818eSStefano Zampini   C->ops->productsymbolic          = MatProductSymbolic_ABt;
16454222ddf1SHong Zhang   PetscFunctionReturn(0);
16464222ddf1SHong Zhang }
16474222ddf1SHong Zhang 
16484222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
16494222ddf1SHong Zhang {
16504222ddf1SHong Zhang   Mat_Product    *product = C->product;
16514222ddf1SHong Zhang 
16524222ddf1SHong Zhang   PetscFunctionBegin;
16534222ddf1SHong Zhang   switch (product->type) {
16544222ddf1SHong Zhang   case MATPRODUCT_AB:
16559566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C));
16564222ddf1SHong Zhang     break;
16574222ddf1SHong Zhang   case MATPRODUCT_AtB:
16589566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C));
16594222ddf1SHong Zhang     break;
16606718818eSStefano Zampini   case MATPRODUCT_ABt:
16619566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C));
16624222ddf1SHong Zhang     break;
16634222ddf1SHong Zhang   default:
16646718818eSStefano Zampini     break;
16654222ddf1SHong Zhang   }
16664222ddf1SHong Zhang   PetscFunctionReturn(0);
16674222ddf1SHong Zhang }
16684222ddf1SHong Zhang /* ------------------------------------------------------- */
16694222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
16704222ddf1SHong Zhang {
16714222ddf1SHong Zhang   Mat_Product    *product = C->product;
16724222ddf1SHong Zhang   Mat            A = product->A;
16734222ddf1SHong Zhang   PetscBool      baij;
16744222ddf1SHong Zhang 
16754222ddf1SHong Zhang   PetscFunctionBegin;
16769566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij));
16774222ddf1SHong Zhang   if (!baij) { /* A is seqsbaij */
16784222ddf1SHong Zhang     PetscBool sbaij;
16799566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij));
168028b400f6SJacob Faibussowitsch     PetscCheck(sbaij,PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format");
16814222ddf1SHong Zhang 
16824222ddf1SHong Zhang     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
16834222ddf1SHong Zhang   } else { /* A is seqbaij */
16844222ddf1SHong Zhang     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
16854222ddf1SHong Zhang   }
16864222ddf1SHong Zhang 
16874222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
16884222ddf1SHong Zhang   PetscFunctionReturn(0);
16894222ddf1SHong Zhang }
16904222ddf1SHong Zhang 
16914222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
16924222ddf1SHong Zhang {
16934222ddf1SHong Zhang   Mat_Product    *product = C->product;
16944222ddf1SHong Zhang 
16954222ddf1SHong Zhang   PetscFunctionBegin;
16966718818eSStefano Zampini   MatCheckProduct(C,1);
169728b400f6SJacob Faibussowitsch   PetscCheck(product->A,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A");
16986718818eSStefano Zampini   if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric)) {
16999566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C));
17006718818eSStefano Zampini   }
17014222ddf1SHong Zhang   PetscFunctionReturn(0);
17024222ddf1SHong Zhang }
17036718818eSStefano Zampini 
17044222ddf1SHong Zhang /* ------------------------------------------------------- */
17054222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
17064222ddf1SHong Zhang {
17074222ddf1SHong Zhang   PetscFunctionBegin;
17084222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
17094222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
17104222ddf1SHong Zhang   PetscFunctionReturn(0);
17114222ddf1SHong Zhang }
17124222ddf1SHong Zhang 
17134222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
17144222ddf1SHong Zhang {
17154222ddf1SHong Zhang   Mat_Product    *product = C->product;
17164222ddf1SHong Zhang 
17174222ddf1SHong Zhang   PetscFunctionBegin;
17184222ddf1SHong Zhang   if (product->type == MATPRODUCT_AB) {
17199566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C));
17206718818eSStefano Zampini   }
17214222ddf1SHong Zhang   PetscFunctionReturn(0);
17224222ddf1SHong Zhang }
17234222ddf1SHong Zhang /* ------------------------------------------------------- */
17244222ddf1SHong Zhang 
1725b9af6bddSHong Zhang PetscErrorCode  MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1726c8db22f6SHong Zhang {
17272f5f1f90SHong Zhang   Mat_SeqAIJ     *b       = (Mat_SeqAIJ*)B->data;
17282f5f1f90SHong Zhang   Mat_SeqDense   *btdense = (Mat_SeqDense*)Btdense->data;
17292f5f1f90SHong Zhang   PetscInt       *bi      = b->i,*bj=b->j;
17302f5f1f90SHong Zhang   PetscInt       m        = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
17312f5f1f90SHong Zhang   MatScalar      *btval,*btval_den,*ba=b->a;
17322f5f1f90SHong Zhang   PetscInt       *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;
1733c8db22f6SHong Zhang 
1734c8db22f6SHong Zhang   PetscFunctionBegin;
17352f5f1f90SHong Zhang   btval_den=btdense->v;
17369566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(btval_den,m*n));
17372f5f1f90SHong Zhang   for (k=0; k<ncolors; k++) {
17382f5f1f90SHong Zhang     ncolumns = coloring->ncolumns[k];
17392f5f1f90SHong Zhang     for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1740d2853540SHong Zhang       col   = *(columns + colorforcol[k] + l);
17412f5f1f90SHong Zhang       btcol = bj + bi[col];
17422f5f1f90SHong Zhang       btval = ba + bi[col];
17432f5f1f90SHong Zhang       anz   = bi[col+1] - bi[col];
1744d2853540SHong Zhang       for (j=0; j<anz; j++) {
17452f5f1f90SHong Zhang         brow            = btcol[j];
17462f5f1f90SHong Zhang         btval_den[brow] = btval[j];
1747c8db22f6SHong Zhang       }
1748c8db22f6SHong Zhang     }
17492f5f1f90SHong Zhang     btval_den += m;
1750c8db22f6SHong Zhang   }
1751c8db22f6SHong Zhang   PetscFunctionReturn(0);
1752c8db22f6SHong Zhang }
1753c8db22f6SHong Zhang 
1754b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
17558972f759SHong Zhang {
1756b2d2b04fSHong Zhang   Mat_SeqAIJ        *csp = (Mat_SeqAIJ*)Csp->data;
17571683a169SBarry Smith   const PetscScalar *ca_den,*ca_den_ptr;
17581683a169SBarry Smith   PetscScalar       *ca=csp->a;
1759f99a636bSHong Zhang   PetscInt          k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors;
1760e88777f2SHong Zhang   PetscInt          brows=matcoloring->brows,*den2sp=matcoloring->den2sp;
1761077f23c2SHong Zhang   PetscInt          nrows,*row,*idx;
1762077f23c2SHong Zhang   PetscInt          *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow;
17638972f759SHong Zhang 
17648972f759SHong Zhang   PetscFunctionBegin;
17659566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(Cden,&ca_den));
1766f99a636bSHong Zhang 
1767077f23c2SHong Zhang   if (brows > 0) {
1768077f23c2SHong Zhang     PetscInt *lstart,row_end,row_start;
1769980ae229SHong Zhang     lstart = matcoloring->lstart;
17709566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(lstart,ncolors));
1771eeb4fd02SHong Zhang 
1772077f23c2SHong Zhang     row_end = brows;
1773eeb4fd02SHong Zhang     if (row_end > m) row_end = m;
1774077f23c2SHong Zhang     for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */
1775077f23c2SHong Zhang       ca_den_ptr = ca_den;
1776980ae229SHong Zhang       for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */
1777eeb4fd02SHong Zhang         nrows = matcoloring->nrows[k];
1778eeb4fd02SHong Zhang         row   = rows  + colorforrow[k];
1779eeb4fd02SHong Zhang         idx   = den2sp + colorforrow[k];
1780eeb4fd02SHong Zhang         for (l=lstart[k]; l<nrows; l++) {
1781eeb4fd02SHong Zhang           if (row[l] >= row_end) {
1782eeb4fd02SHong Zhang             lstart[k] = l;
1783eeb4fd02SHong Zhang             break;
1784eeb4fd02SHong Zhang           } else {
1785077f23c2SHong Zhang             ca[idx[l]] = ca_den_ptr[row[l]];
1786eeb4fd02SHong Zhang           }
1787eeb4fd02SHong Zhang         }
1788077f23c2SHong Zhang         ca_den_ptr += m;
1789eeb4fd02SHong Zhang       }
1790077f23c2SHong Zhang       row_end += brows;
1791eeb4fd02SHong Zhang       if (row_end > m) row_end = m;
1792eeb4fd02SHong Zhang     }
1793077f23c2SHong Zhang   } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1794077f23c2SHong Zhang     ca_den_ptr = ca_den;
1795077f23c2SHong Zhang     for (k=0; k<ncolors; k++) {
1796077f23c2SHong Zhang       nrows = matcoloring->nrows[k];
1797077f23c2SHong Zhang       row   = rows  + colorforrow[k];
1798077f23c2SHong Zhang       idx   = den2sp + colorforrow[k];
1799077f23c2SHong Zhang       for (l=0; l<nrows; l++) {
1800077f23c2SHong Zhang         ca[idx[l]] = ca_den_ptr[row[l]];
1801077f23c2SHong Zhang       }
1802077f23c2SHong Zhang       ca_den_ptr += m;
1803077f23c2SHong Zhang     }
1804f99a636bSHong Zhang   }
1805f99a636bSHong Zhang 
18069566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(Cden,&ca_den));
1807f99a636bSHong Zhang #if defined(PETSC_USE_INFO)
1808077f23c2SHong Zhang   if (matcoloring->brows > 0) {
18099566063dSJacob Faibussowitsch     PetscCall(PetscInfo(Csp,"Loop over %" PetscInt_FMT " row blocks for den2sp\n",brows));
1810e88777f2SHong Zhang   } else {
18119566063dSJacob Faibussowitsch     PetscCall(PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n"));
1812e88777f2SHong Zhang   }
1813f99a636bSHong Zhang #endif
18148972f759SHong Zhang   PetscFunctionReturn(0);
18158972f759SHong Zhang }
18168972f759SHong Zhang 
1817b9af6bddSHong Zhang PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1818b1683b59SHong Zhang {
1819e88777f2SHong Zhang   PetscInt       i,n,nrows,Nbs,j,k,m,ncols,col,cm;
18201a83f524SJed Brown   const PetscInt *is,*ci,*cj,*row_idx;
1821b28d80bdSHong Zhang   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
1822b1683b59SHong Zhang   IS             *isa;
1823b28d80bdSHong Zhang   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1824e88777f2SHong Zhang   PetscInt       *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i;
1825e88777f2SHong Zhang   PetscInt       *colorforcol,*columns,*columns_i,brows;
1826e88777f2SHong Zhang   PetscBool      flg;
1827b1683b59SHong Zhang 
1828b1683b59SHong Zhang   PetscFunctionBegin;
18299566063dSJacob Faibussowitsch   PetscCall(ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa));
1830e99be685SHong Zhang 
18317ecccc15SHong Zhang   /* bs >1 is not being tested yet! */
1832e88777f2SHong Zhang   Nbs       = mat->cmap->N/bs;
1833b1683b59SHong Zhang   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
1834e88777f2SHong Zhang   c->N      = Nbs;
1835e88777f2SHong Zhang   c->m      = c->M;
1836b1683b59SHong Zhang   c->rstart = 0;
1837e88777f2SHong Zhang   c->brows  = 100;
1838b1683b59SHong Zhang 
1839b1683b59SHong Zhang   c->ncolors = nis;
18409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow));
18419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(csp->nz+1,&rows));
18429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(csp->nz+1,&den2sp));
1843e88777f2SHong Zhang 
1844e88777f2SHong Zhang   brows = c->brows;
18459566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg));
1846e88777f2SHong Zhang   if (flg) c->brows = brows;
1847eeb4fd02SHong Zhang   if (brows > 0) {
18489566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nis+1,&c->lstart));
1849e88777f2SHong Zhang   }
18502205254eSKarl Rupp 
1851d2853540SHong Zhang   colorforrow[0] = 0;
1852d2853540SHong Zhang   rows_i         = rows;
1853f99a636bSHong Zhang   den2sp_i       = den2sp;
1854b1683b59SHong Zhang 
18559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nis+1,&colorforcol));
18569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(Nbs+1,&columns));
18572205254eSKarl Rupp 
1858d2853540SHong Zhang   colorforcol[0] = 0;
1859d2853540SHong Zhang   columns_i      = columns;
1860d2853540SHong Zhang 
1861077f23c2SHong Zhang   /* get column-wise storage of mat */
18629566063dSJacob Faibussowitsch   PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL));
1863b1683b59SHong Zhang 
1864b28d80bdSHong Zhang   cm   = c->m;
18659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cm+1,&rowhit));
18669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cm+1,&idxhit));
1867f99a636bSHong Zhang   for (i=0; i<nis; i++) { /* loop over color */
18689566063dSJacob Faibussowitsch     PetscCall(ISGetLocalSize(isa[i],&n));
18699566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(isa[i],&is));
18702205254eSKarl Rupp 
1871b1683b59SHong Zhang     c->ncolumns[i] = n;
1872*1baa6e33SBarry Smith     if (n) PetscCall(PetscArraycpy(columns_i,is,n));
1873d2853540SHong Zhang     colorforcol[i+1] = colorforcol[i] + n;
1874d2853540SHong Zhang     columns_i       += n;
1875b1683b59SHong Zhang 
1876b1683b59SHong Zhang     /* fast, crude version requires O(N*N) work */
18779566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(rowhit,cm));
1878e99be685SHong Zhang 
1879b7caf3d6SHong Zhang     for (j=0; j<n; j++) { /* loop over columns*/
1880b1683b59SHong Zhang       col     = is[j];
1881d2853540SHong Zhang       row_idx = cj + ci[col];
1882b1683b59SHong Zhang       m       = ci[col+1] - ci[col];
1883b7caf3d6SHong Zhang       for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */
1884e99be685SHong Zhang         idxhit[*row_idx]   = spidx[ci[col] + k];
1885d2853540SHong Zhang         rowhit[*row_idx++] = col + 1;
1886b1683b59SHong Zhang       }
1887b1683b59SHong Zhang     }
1888b1683b59SHong Zhang     /* count the number of hits */
1889b1683b59SHong Zhang     nrows = 0;
1890e8354b3eSHong Zhang     for (j=0; j<cm; j++) {
1891b1683b59SHong Zhang       if (rowhit[j]) nrows++;
1892b1683b59SHong Zhang     }
1893b1683b59SHong Zhang     c->nrows[i]      = nrows;
1894d2853540SHong Zhang     colorforrow[i+1] = colorforrow[i] + nrows;
1895d2853540SHong Zhang 
1896b1683b59SHong Zhang     nrows = 0;
1897b7caf3d6SHong Zhang     for (j=0; j<cm; j++) { /* loop over rows */
1898b1683b59SHong Zhang       if (rowhit[j]) {
1899d2853540SHong Zhang         rows_i[nrows]   = j;
190012b89a43SHong Zhang         den2sp_i[nrows] = idxhit[j];
1901b1683b59SHong Zhang         nrows++;
1902b1683b59SHong Zhang       }
1903b1683b59SHong Zhang     }
1904e88777f2SHong Zhang     den2sp_i += nrows;
1905077f23c2SHong Zhang 
19069566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(isa[i],&is));
1907f99a636bSHong Zhang     rows_i += nrows;
1908b1683b59SHong Zhang   }
19099566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL));
19109566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowhit));
19119566063dSJacob Faibussowitsch   PetscCall(ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa));
19122c71b3e2SJacob Faibussowitsch   PetscCheck(csp->nz == colorforrow[nis],PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %" PetscInt_FMT " != colorforrow[nis] %" PetscInt_FMT,csp->nz,colorforrow[nis]);
1913b28d80bdSHong Zhang 
1914d2853540SHong Zhang   c->colorforrow = colorforrow;
1915d2853540SHong Zhang   c->rows        = rows;
1916f99a636bSHong Zhang   c->den2sp      = den2sp;
1917d2853540SHong Zhang   c->colorforcol = colorforcol;
1918d2853540SHong Zhang   c->columns     = columns;
19192205254eSKarl Rupp 
19209566063dSJacob Faibussowitsch   PetscCall(PetscFree(idxhit));
1921b1683b59SHong Zhang   PetscFunctionReturn(0);
1922b1683b59SHong Zhang }
1923d013fc79Sandi selinger 
19244222ddf1SHong Zhang /* --------------------------------------------------------------- */
19254222ddf1SHong Zhang static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1926df97dc6dSFande Kong {
19274222ddf1SHong Zhang   Mat_Product    *product = C->product;
19284222ddf1SHong Zhang   Mat            A=product->A,B=product->B;
19294222ddf1SHong Zhang 
1930df97dc6dSFande Kong   PetscFunctionBegin;
19314222ddf1SHong Zhang   if (C->ops->mattransposemultnumeric) {
19324222ddf1SHong Zhang     /* Alg: "outerproduct" */
19339566063dSJacob Faibussowitsch     PetscCall((*C->ops->mattransposemultnumeric)(A,B,C));
19344222ddf1SHong Zhang   } else {
19354222ddf1SHong Zhang     /* Alg: "matmatmult" -- C = At*B */
19366718818eSStefano Zampini     Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data;
19376718818eSStefano Zampini     Mat                 At;
19384222ddf1SHong Zhang 
193928b400f6SJacob Faibussowitsch     PetscCheck(atb,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
19406718818eSStefano Zampini     At = atb->At;
1941089a957eSStefano Zampini     if (atb->updateAt && At) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */
19429566063dSJacob Faibussowitsch       PetscCall(MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At));
19434222ddf1SHong Zhang     }
19449566063dSJacob Faibussowitsch     PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(At ? At : A,B,C));
19454222ddf1SHong Zhang     atb->updateAt = PETSC_TRUE;
19464222ddf1SHong Zhang   }
1947df97dc6dSFande Kong   PetscFunctionReturn(0);
1948df97dc6dSFande Kong }
1949df97dc6dSFande Kong 
19504222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1951d013fc79Sandi selinger {
19524222ddf1SHong Zhang   Mat_Product    *product = C->product;
19534222ddf1SHong Zhang   Mat            A=product->A,B=product->B;
19544222ddf1SHong Zhang   PetscReal      fill=product->fill;
1955d013fc79Sandi selinger 
1956d013fc79Sandi selinger   PetscFunctionBegin;
19579566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C));
19582869b61bSandi selinger 
19594222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
19604222ddf1SHong Zhang   PetscFunctionReturn(0);
19612869b61bSandi selinger }
1962d013fc79Sandi selinger 
19634222ddf1SHong Zhang /* --------------------------------------------------------------- */
19644222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
19654222ddf1SHong Zhang {
19664222ddf1SHong Zhang   Mat_Product    *product = C->product;
19674222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
19684222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
19694222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
19704222ddf1SHong Zhang   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
19714222ddf1SHong Zhang   PetscInt       nalg = 7;
19724222ddf1SHong Zhang #else
19734222ddf1SHong Zhang   const char     *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"};
19744222ddf1SHong Zhang   PetscInt       nalg = 8;
19754222ddf1SHong Zhang #endif
19764222ddf1SHong Zhang 
19774222ddf1SHong Zhang   PetscFunctionBegin;
19784222ddf1SHong Zhang   /* Set default algorithm */
19799566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"default",&flg));
19804222ddf1SHong Zhang   if (flg) {
19819566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
1982d013fc79Sandi selinger   }
1983d013fc79Sandi selinger 
19844222ddf1SHong Zhang   /* Get runtime option */
19854222ddf1SHong Zhang   if (product->api_user) {
1986d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
19879566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg));
1988d0609cedSBarry Smith     PetscOptionsEnd();
19894222ddf1SHong Zhang   } else {
1990d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
19919566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg));
1992d0609cedSBarry Smith     PetscOptionsEnd();
1993d013fc79Sandi selinger   }
19944222ddf1SHong Zhang   if (flg) {
19959566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
1996d013fc79Sandi selinger   }
1997d013fc79Sandi selinger 
19984222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
19994222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
20004222ddf1SHong Zhang   PetscFunctionReturn(0);
20014222ddf1SHong Zhang }
2002d013fc79Sandi selinger 
20034222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
20044222ddf1SHong Zhang {
20054222ddf1SHong Zhang   Mat_Product    *product = C->product;
20064222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
20074222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
2008089a957eSStefano Zampini   const char     *algTypes[3] = {"default","at*b","outerproduct"};
2009089a957eSStefano Zampini   PetscInt       nalg = 3;
2010d013fc79Sandi selinger 
20114222ddf1SHong Zhang   PetscFunctionBegin;
20124222ddf1SHong Zhang   /* Get runtime option */
20134222ddf1SHong Zhang   if (product->api_user) {
2014d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");
20159566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2016d0609cedSBarry Smith     PetscOptionsEnd();
20174222ddf1SHong Zhang   } else {
2018d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");
20199566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg));
2020d0609cedSBarry Smith     PetscOptionsEnd();
20214222ddf1SHong Zhang   }
20224222ddf1SHong Zhang   if (flg) {
20239566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
20244222ddf1SHong Zhang   }
2025d013fc79Sandi selinger 
20264222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
20274222ddf1SHong Zhang   PetscFunctionReturn(0);
20284222ddf1SHong Zhang }
20294222ddf1SHong Zhang 
20304222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
20314222ddf1SHong Zhang {
20324222ddf1SHong Zhang   Mat_Product    *product = C->product;
20334222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
20344222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
20354222ddf1SHong Zhang   const char     *algTypes[2] = {"default","color"};
20364222ddf1SHong Zhang   PetscInt       nalg = 2;
20374222ddf1SHong Zhang 
20384222ddf1SHong Zhang   PetscFunctionBegin;
20394222ddf1SHong Zhang   /* Set default algorithm */
20409566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"default",&flg));
20414222ddf1SHong Zhang   if (!flg) {
20424222ddf1SHong Zhang     alg = 1;
20439566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
20444222ddf1SHong Zhang   }
20454222ddf1SHong Zhang 
20464222ddf1SHong Zhang   /* Get runtime option */
20474222ddf1SHong Zhang   if (product->api_user) {
2048d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
20499566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2050d0609cedSBarry Smith     PetscOptionsEnd();
20514222ddf1SHong Zhang   } else {
2052d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
20539566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2054d0609cedSBarry Smith     PetscOptionsEnd();
20554222ddf1SHong Zhang   }
20564222ddf1SHong Zhang   if (flg) {
20579566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
20584222ddf1SHong Zhang   }
20594222ddf1SHong Zhang 
20604222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
20614222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
20624222ddf1SHong Zhang   PetscFunctionReturn(0);
20634222ddf1SHong Zhang }
20644222ddf1SHong Zhang 
20654222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
20664222ddf1SHong Zhang {
20674222ddf1SHong Zhang   Mat_Product    *product = C->product;
20684222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
20694222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm -- alg=1 should be default!!! */
20704222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
20714222ddf1SHong Zhang   const char     *algTypes[2] = {"scalable","rap"};
20724222ddf1SHong Zhang   PetscInt       nalg = 2;
20734222ddf1SHong Zhang #else
20744222ddf1SHong Zhang   const char     *algTypes[3] = {"scalable","rap","hypre"};
20754222ddf1SHong Zhang   PetscInt       nalg = 3;
20764222ddf1SHong Zhang #endif
20774222ddf1SHong Zhang 
20784222ddf1SHong Zhang   PetscFunctionBegin;
20794222ddf1SHong Zhang   /* Set default algorithm */
20809566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"default",&flg));
20814222ddf1SHong Zhang   if (flg) {
20829566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
20834222ddf1SHong Zhang   }
20844222ddf1SHong Zhang 
20854222ddf1SHong Zhang   /* Get runtime option */
20864222ddf1SHong Zhang   if (product->api_user) {
2087d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
20889566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg));
2089d0609cedSBarry Smith     PetscOptionsEnd();
20904222ddf1SHong Zhang   } else {
2091d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
20929566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg));
2093d0609cedSBarry Smith     PetscOptionsEnd();
20944222ddf1SHong Zhang   }
20954222ddf1SHong Zhang   if (flg) {
20969566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
20974222ddf1SHong Zhang   }
20984222ddf1SHong Zhang 
20994222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
21004222ddf1SHong Zhang   PetscFunctionReturn(0);
21014222ddf1SHong Zhang }
21024222ddf1SHong Zhang 
21034222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
21044222ddf1SHong Zhang {
21054222ddf1SHong Zhang   Mat_Product    *product = C->product;
21064222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
21074222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
21084222ddf1SHong Zhang   const char     *algTypes[3] = {"r*a*rt","r*art","coloring_rart"};
21094222ddf1SHong Zhang   PetscInt        nalg = 3;
21104222ddf1SHong Zhang 
21114222ddf1SHong Zhang   PetscFunctionBegin;
21124222ddf1SHong Zhang   /* Set default algorithm */
21139566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"default",&flg));
21144222ddf1SHong Zhang   if (flg) {
21159566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
21164222ddf1SHong Zhang   }
21174222ddf1SHong Zhang 
21184222ddf1SHong Zhang   /* Get runtime option */
21194222ddf1SHong Zhang   if (product->api_user) {
2120d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");
21219566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg));
2122d0609cedSBarry Smith     PetscOptionsEnd();
21234222ddf1SHong Zhang   } else {
2124d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");
21259566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg));
2126d0609cedSBarry Smith     PetscOptionsEnd();
21274222ddf1SHong Zhang   }
21284222ddf1SHong Zhang   if (flg) {
21299566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
21304222ddf1SHong Zhang   }
21314222ddf1SHong Zhang 
21324222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
21334222ddf1SHong Zhang   PetscFunctionReturn(0);
21344222ddf1SHong Zhang }
21354222ddf1SHong Zhang 
21364222ddf1SHong Zhang /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
21374222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
21384222ddf1SHong Zhang {
21394222ddf1SHong Zhang   Mat_Product    *product = C->product;
21404222ddf1SHong Zhang   PetscInt       alg = 0; /* default algorithm */
21414222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
21424222ddf1SHong Zhang   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
21434222ddf1SHong Zhang   PetscInt       nalg = 7;
21444222ddf1SHong Zhang 
21454222ddf1SHong Zhang   PetscFunctionBegin;
21464222ddf1SHong Zhang   /* Set default algorithm */
21479566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"default",&flg));
21484222ddf1SHong Zhang   if (flg) {
21499566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
21504222ddf1SHong Zhang   }
21514222ddf1SHong Zhang 
21524222ddf1SHong Zhang   /* Get runtime option */
21534222ddf1SHong Zhang   if (product->api_user) {
2154d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");
21559566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2156d0609cedSBarry Smith     PetscOptionsEnd();
21574222ddf1SHong Zhang   } else {
2158d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");
21599566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg));
2160d0609cedSBarry Smith     PetscOptionsEnd();
21614222ddf1SHong Zhang   }
21624222ddf1SHong Zhang   if (flg) {
21639566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
21644222ddf1SHong Zhang   }
21654222ddf1SHong Zhang 
21664222ddf1SHong Zhang   C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
21674222ddf1SHong Zhang   C->ops->productsymbolic    = MatProductSymbolic_ABC;
21684222ddf1SHong Zhang   PetscFunctionReturn(0);
21694222ddf1SHong Zhang }
21704222ddf1SHong Zhang 
21714222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
21724222ddf1SHong Zhang {
21734222ddf1SHong Zhang   Mat_Product    *product = C->product;
21744222ddf1SHong Zhang 
21754222ddf1SHong Zhang   PetscFunctionBegin;
21764222ddf1SHong Zhang   switch (product->type) {
21774222ddf1SHong Zhang   case MATPRODUCT_AB:
21789566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_AB(C));
21794222ddf1SHong Zhang     break;
21804222ddf1SHong Zhang   case MATPRODUCT_AtB:
21819566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_AtB(C));
21824222ddf1SHong Zhang     break;
21834222ddf1SHong Zhang   case MATPRODUCT_ABt:
21849566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_ABt(C));
21854222ddf1SHong Zhang     break;
21864222ddf1SHong Zhang   case MATPRODUCT_PtAP:
21879566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_PtAP(C));
21884222ddf1SHong Zhang     break;
21894222ddf1SHong Zhang   case MATPRODUCT_RARt:
21909566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_RARt(C));
21914222ddf1SHong Zhang     break;
21924222ddf1SHong Zhang   case MATPRODUCT_ABC:
21939566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJ_ABC(C));
21944222ddf1SHong Zhang     break;
21956718818eSStefano Zampini   default:
21966718818eSStefano Zampini     break;
21974222ddf1SHong Zhang   }
2198d013fc79Sandi selinger   PetscFunctionReturn(0);
2199d013fc79Sandi selinger }
2200