1be1d678aSKris Buschelman 2d50806bdSBarry Smith /* 32499ec78SHong Zhang Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 5d50806bdSBarry Smith */ 6d50806bdSBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 9c6db04a5SJed Brown #include <petscbt.h> 10af0996ceSBarry Smith #include <petsc/private/isimpl.h> 1107475bc1SBarry Smith #include <../src/mat/impls/dense/seq/dense.h> 127bab7c10SHong Zhang 13df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 14df97dc6dSFande Kong { 15df97dc6dSFande Kong PetscErrorCode ierr; 16df97dc6dSFande Kong 17df97dc6dSFande Kong PetscFunctionBegin; 18df97dc6dSFande Kong if (C->ops->matmultnumeric) { 196718818eSStefano Zampini if (C->ops->matmultnumeric == MatMatMultNumeric_SeqAIJ_SeqAIJ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Recursive call"); 20df97dc6dSFande Kong ierr = (*C->ops->matmultnumeric)(A,B,C);CHKERRQ(ierr); 21df97dc6dSFande Kong } else { 22df97dc6dSFande Kong ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);CHKERRQ(ierr); 23df97dc6dSFande Kong } 24df97dc6dSFande Kong PetscFunctionReturn(0); 25df97dc6dSFande Kong } 26df97dc6dSFande Kong 274222ddf1SHong Zhang /* Modified from MatCreateSeqAIJWithArrays() */ 28e4e71118SStefano Zampini PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat) 29df97dc6dSFande Kong { 30df97dc6dSFande Kong PetscErrorCode ierr; 314222ddf1SHong Zhang PetscInt ii; 324222ddf1SHong Zhang Mat_SeqAIJ *aij; 33e4e71118SStefano Zampini PetscBool isseqaij; 345c66b693SKris Buschelman 355c66b693SKris Buschelman PetscFunctionBegin; 364222ddf1SHong Zhang if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 374222ddf1SHong Zhang ierr = MatSetSizes(mat,m,n,m,n);CHKERRQ(ierr); 384222ddf1SHong Zhang 39e4e71118SStefano Zampini if (!mtype) { 40e4e71118SStefano Zampini ierr = PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 41e4e71118SStefano Zampini if (!isseqaij) { ierr = MatSetType(mat,MATSEQAIJ);CHKERRQ(ierr); } 42e4e71118SStefano Zampini } else { 43e4e71118SStefano Zampini ierr = MatSetType(mat,mtype);CHKERRQ(ierr); 44e4e71118SStefano Zampini } 45f4259b30SLisandro Dalcin ierr = MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 464222ddf1SHong Zhang aij = (Mat_SeqAIJ*)(mat)->data; 474222ddf1SHong Zhang ierr = PetscMalloc1(m,&aij->imax);CHKERRQ(ierr); 484222ddf1SHong Zhang ierr = PetscMalloc1(m,&aij->ilen);CHKERRQ(ierr); 494222ddf1SHong Zhang 504222ddf1SHong Zhang aij->i = i; 514222ddf1SHong Zhang aij->j = j; 524222ddf1SHong Zhang aij->a = a; 534222ddf1SHong Zhang aij->singlemalloc = PETSC_FALSE; 544222ddf1SHong Zhang aij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 554222ddf1SHong Zhang aij->free_a = PETSC_FALSE; 564222ddf1SHong Zhang aij->free_ij = PETSC_FALSE; 574222ddf1SHong Zhang 584222ddf1SHong Zhang for (ii=0; ii<m; ii++) { 594222ddf1SHong Zhang aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 6025023028SHong Zhang } 614222ddf1SHong Zhang 625c66b693SKris Buschelman PetscFunctionReturn(0); 635c66b693SKris Buschelman } 641c24bd37SHong Zhang 654222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 664222ddf1SHong Zhang { 674222ddf1SHong Zhang PetscErrorCode ierr; 684222ddf1SHong Zhang Mat_Product *product = C->product; 694222ddf1SHong Zhang MatProductAlgorithm alg; 704222ddf1SHong Zhang PetscBool flg; 714222ddf1SHong Zhang 724222ddf1SHong Zhang PetscFunctionBegin; 734222ddf1SHong Zhang if (product) { 744222ddf1SHong Zhang alg = product->alg; 754222ddf1SHong Zhang } else { 764222ddf1SHong Zhang alg = "sorted"; 774222ddf1SHong Zhang } 784222ddf1SHong Zhang /* sorted */ 794222ddf1SHong Zhang ierr = PetscStrcmp(alg,"sorted",&flg);CHKERRQ(ierr); 804222ddf1SHong Zhang if (flg) { 814222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);CHKERRQ(ierr); 824222ddf1SHong Zhang PetscFunctionReturn(0); 834222ddf1SHong Zhang } 844222ddf1SHong Zhang 854222ddf1SHong Zhang /* scalable */ 864222ddf1SHong Zhang ierr = PetscStrcmp(alg,"scalable",&flg);CHKERRQ(ierr); 874222ddf1SHong Zhang if (flg) { 884222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 894222ddf1SHong Zhang PetscFunctionReturn(0); 904222ddf1SHong Zhang } 914222ddf1SHong Zhang 924222ddf1SHong Zhang /* scalable_fast */ 934222ddf1SHong Zhang ierr = PetscStrcmp(alg,"scalable_fast",&flg);CHKERRQ(ierr); 944222ddf1SHong Zhang if (flg) { 954222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr); 964222ddf1SHong Zhang PetscFunctionReturn(0); 974222ddf1SHong Zhang } 984222ddf1SHong Zhang 994222ddf1SHong Zhang /* heap */ 1004222ddf1SHong Zhang ierr = PetscStrcmp(alg,"heap",&flg);CHKERRQ(ierr); 1014222ddf1SHong Zhang if (flg) { 1024222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr); 1034222ddf1SHong Zhang PetscFunctionReturn(0); 1044222ddf1SHong Zhang } 1054222ddf1SHong Zhang 1064222ddf1SHong Zhang /* btheap */ 1074222ddf1SHong Zhang ierr = PetscStrcmp(alg,"btheap",&flg);CHKERRQ(ierr); 1084222ddf1SHong Zhang if (flg) { 1094222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr); 1104222ddf1SHong Zhang PetscFunctionReturn(0); 1114222ddf1SHong Zhang } 1124222ddf1SHong Zhang 1134222ddf1SHong Zhang /* llcondensed */ 1144222ddf1SHong Zhang ierr = PetscStrcmp(alg,"llcondensed",&flg);CHKERRQ(ierr); 1154222ddf1SHong Zhang if (flg) { 1164222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);CHKERRQ(ierr); 1174222ddf1SHong Zhang PetscFunctionReturn(0); 1184222ddf1SHong Zhang } 1194222ddf1SHong Zhang 1204222ddf1SHong Zhang /* rowmerge */ 1214222ddf1SHong Zhang ierr = PetscStrcmp(alg,"rowmerge",&flg);CHKERRQ(ierr); 1224222ddf1SHong Zhang if (flg) { 1234222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);CHKERRQ(ierr); 1244222ddf1SHong Zhang PetscFunctionReturn(0); 1254222ddf1SHong Zhang } 1264222ddf1SHong Zhang 1274222ddf1SHong Zhang #if defined(PETSC_HAVE_HYPRE) 1284222ddf1SHong Zhang ierr = PetscStrcmp(alg,"hypre",&flg);CHKERRQ(ierr); 1294222ddf1SHong Zhang if (flg) { 1304222ddf1SHong Zhang ierr = MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);CHKERRQ(ierr); 1314222ddf1SHong Zhang PetscFunctionReturn(0); 1324222ddf1SHong Zhang } 1334222ddf1SHong Zhang #endif 1344222ddf1SHong Zhang 1354222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1364222ddf1SHong Zhang } 1374222ddf1SHong Zhang 1384222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C) 139b561aa9dSHong Zhang { 140b561aa9dSHong Zhang PetscErrorCode ierr; 141b561aa9dSHong Zhang Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 142971236abSHong Zhang PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 143c1ba5575SJed Brown PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 144b561aa9dSHong Zhang PetscReal afill; 145eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 146eca6b86aSHong Zhang PetscTable ta; 147fb908031SHong Zhang PetscBT lnkbt; 1480298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 149b561aa9dSHong Zhang 150b561aa9dSHong Zhang PetscFunctionBegin; 151bd958071SHong Zhang /* Get ci and cj */ 152bd958071SHong Zhang /*---------------*/ 153fb908031SHong Zhang /* Allocate ci array, arrays for fill computation and */ 154fb908031SHong Zhang /* free space for accumulating nonzero column info */ 155854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 156fb908031SHong Zhang ci[0] = 0; 157fb908031SHong Zhang 158fb908031SHong Zhang /* create and initialize a linked list */ 159c373ccc6SHong Zhang ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 160c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 161eca6b86aSHong Zhang ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 162eca6b86aSHong Zhang ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 163eca6b86aSHong Zhang 164eca6b86aSHong Zhang ierr = PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);CHKERRQ(ierr); 165fb908031SHong Zhang 166fb908031SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 167f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 1682205254eSKarl Rupp 169fb908031SHong Zhang current_space = free_space; 170fb908031SHong Zhang 171fb908031SHong Zhang /* Determine ci and cj */ 172fb908031SHong Zhang for (i=0; i<am; i++) { 173fb908031SHong Zhang anzi = ai[i+1] - ai[i]; 174fb908031SHong Zhang aj = a->j + ai[i]; 175fb908031SHong Zhang for (j=0; j<anzi; j++) { 176fb908031SHong Zhang brow = aj[j]; 177fb908031SHong Zhang bnzj = bi[brow+1] - bi[brow]; 178fb908031SHong Zhang bj = b->j + bi[brow]; 179fb908031SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 180fb908031SHong Zhang ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr); 181fb908031SHong Zhang } 182*8c78258cSHong Zhang /* add possible missing diagonal entry */ 183*8c78258cSHong Zhang if (C->force_diagonals) { 184*8c78258cSHong Zhang ierr = PetscLLCondensedAddSorted(1,&i,lnk,lnkbt);CHKERRQ(ierr); 185*8c78258cSHong Zhang } 186fb908031SHong Zhang cnzi = lnk[0]; 187fb908031SHong Zhang 188fb908031SHong Zhang /* If free space is not available, make more free space */ 189fb908031SHong Zhang /* Double the amount of total space in the list */ 190fb908031SHong Zhang if (current_space->local_remaining<cnzi) { 191f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 192fb908031SHong Zhang ndouble++; 193fb908031SHong Zhang } 194fb908031SHong Zhang 195fb908031SHong Zhang /* Copy data into free space, then initialize lnk */ 196fb908031SHong Zhang ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 1972205254eSKarl Rupp 198fb908031SHong Zhang current_space->array += cnzi; 199fb908031SHong Zhang current_space->local_used += cnzi; 200fb908031SHong Zhang current_space->local_remaining -= cnzi; 2012205254eSKarl Rupp 202fb908031SHong Zhang ci[i+1] = ci[i] + cnzi; 203fb908031SHong Zhang } 204fb908031SHong Zhang 205fb908031SHong Zhang /* Column indices are in the list of free space */ 206fb908031SHong Zhang /* Allocate space for cj, initialize cj, and */ 207fb908031SHong Zhang /* destroy list of free space and other temporary array(s) */ 208854ce69bSBarry Smith ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 209fb908031SHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 210fb908031SHong Zhang ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 211b561aa9dSHong Zhang 21226be0446SHong Zhang /* put together the new symbolic matrix */ 213e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 2144222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 21558c24d83SHong Zhang 21658c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 21758c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 2184222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 219aa1aec99SHong Zhang c->free_a = PETSC_FALSE; 220e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 22158c24d83SHong Zhang c->nonew = 0; 2224222ddf1SHong Zhang 2234222ddf1SHong Zhang /* fast, needs non-scalable O(bn) array 'abdense' */ 2244222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 2250b7e3e3dSHong Zhang 2267212da7cSHong Zhang /* set MatInfo */ 2277212da7cSHong Zhang afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 228f2b054eeSHong Zhang if (afill < 1.0) afill = 1.0; 2297212da7cSHong Zhang c->maxnz = ci[am]; 2307212da7cSHong Zhang c->nz = ci[am]; 2314222ddf1SHong Zhang C->info.mallocs = ndouble; 2324222ddf1SHong Zhang C->info.fill_ratio_given = fill; 2334222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 2347212da7cSHong Zhang 2357212da7cSHong Zhang #if defined(PETSC_USE_INFO) 2367212da7cSHong Zhang if (ci[am]) { 2374222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 2384222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 239f2b054eeSHong Zhang } else { 2404222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 241be0fcf8dSHong Zhang } 242f2b054eeSHong Zhang #endif 24358c24d83SHong Zhang PetscFunctionReturn(0); 24458c24d83SHong Zhang } 245d50806bdSBarry Smith 246df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) 247d50806bdSBarry Smith { 248dfbe8321SBarry Smith PetscErrorCode ierr; 249d13dce4bSSatish Balay PetscLogDouble flops=0.0; 250d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 251d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 252d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 25338baddfdSBarry Smith PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 254c58ca2e3SHong Zhang PetscInt am =A->rmap->n,cm=C->rmap->n; 255519eb980SHong Zhang PetscInt i,j,k,anzi,bnzi,cnzi,brow; 256aa1aec99SHong Zhang PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; 257aa1aec99SHong Zhang PetscScalar *ab_dense; 2586718818eSStefano Zampini PetscContainer cab_dense; 259d50806bdSBarry Smith 260d50806bdSBarry Smith PetscFunctionBegin; 261aa1aec99SHong Zhang if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 262854ce69bSBarry Smith ierr = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 263aa1aec99SHong Zhang c->a = ca; 264aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 2656718818eSStefano Zampini } else ca = c->a; 2666718818eSStefano Zampini 2676718818eSStefano Zampini /* TODO this should be done in the symbolic phase */ 2686718818eSStefano Zampini /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 2696718818eSStefano Zampini that is hard to eradicate) */ 2706718818eSStefano Zampini ierr = PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense);CHKERRQ(ierr); 2716718818eSStefano Zampini if (!cab_dense) { 2726718818eSStefano Zampini ierr = PetscMalloc1(B->cmap->N,&ab_dense);CHKERRQ(ierr); 2736718818eSStefano Zampini ierr = PetscContainerCreate(PETSC_COMM_SELF,&cab_dense);CHKERRQ(ierr); 2746718818eSStefano Zampini ierr = PetscContainerSetPointer(cab_dense,ab_dense);CHKERRQ(ierr); 2756718818eSStefano Zampini ierr = PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 2766718818eSStefano Zampini ierr = PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense);CHKERRQ(ierr); 2776718818eSStefano Zampini ierr = PetscObjectDereference((PetscObject)cab_dense);CHKERRQ(ierr); 278d90d86d0SMatthew G. Knepley } 2796718818eSStefano Zampini ierr = PetscContainerGetPointer(cab_dense,(void**)&ab_dense);CHKERRQ(ierr); 2806718818eSStefano Zampini ierr = PetscArrayzero(ab_dense,B->cmap->N);CHKERRQ(ierr); 281aa1aec99SHong Zhang 282c124e916SHong Zhang /* clean old values in C */ 283580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 284d50806bdSBarry Smith /* Traverse A row-wise. */ 285d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 286d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 287d50806bdSBarry Smith for (i=0; i<am; i++) { 288d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 289d50806bdSBarry Smith for (j=0; j<anzi; j++) { 290519eb980SHong Zhang brow = aj[j]; 291d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 292d50806bdSBarry Smith bjj = bj + bi[brow]; 293d50806bdSBarry Smith baj = ba + bi[brow]; 294519eb980SHong Zhang /* perform dense axpy */ 29536ec6d2dSHong Zhang valtmp = aa[j]; 296519eb980SHong Zhang for (k=0; k<bnzi; k++) { 29736ec6d2dSHong Zhang ab_dense[bjj[k]] += valtmp*baj[k]; 298519eb980SHong Zhang } 299519eb980SHong Zhang flops += 2*bnzi; 300519eb980SHong Zhang } 301c58ca2e3SHong Zhang aj += anzi; aa += anzi; 302c58ca2e3SHong Zhang 303c58ca2e3SHong Zhang cnzi = ci[i+1] - ci[i]; 304519eb980SHong Zhang for (k=0; k<cnzi; k++) { 305519eb980SHong Zhang ca[k] += ab_dense[cj[k]]; 306519eb980SHong Zhang ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 307519eb980SHong Zhang } 308519eb980SHong Zhang flops += cnzi; 309519eb980SHong Zhang cj += cnzi; ca += cnzi; 310519eb980SHong Zhang } 311c58ca2e3SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 312c58ca2e3SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 313c58ca2e3SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 314c58ca2e3SHong Zhang PetscFunctionReturn(0); 315c58ca2e3SHong Zhang } 316c58ca2e3SHong Zhang 31725023028SHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 318c58ca2e3SHong Zhang { 319c58ca2e3SHong Zhang PetscErrorCode ierr; 320c58ca2e3SHong Zhang PetscLogDouble flops=0.0; 321c58ca2e3SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 322c58ca2e3SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 323c58ca2e3SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 324c58ca2e3SHong Zhang PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 325c58ca2e3SHong Zhang PetscInt am = A->rmap->N,cm=C->rmap->N; 326c58ca2e3SHong Zhang PetscInt i,j,k,anzi,bnzi,cnzi,brow; 32736ec6d2dSHong Zhang PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 328c58ca2e3SHong Zhang PetscInt nextb; 329c58ca2e3SHong Zhang 330c58ca2e3SHong Zhang PetscFunctionBegin; 331cf742fccSHong Zhang if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 332cf742fccSHong Zhang ierr = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 333cf742fccSHong Zhang c->a = ca; 334cf742fccSHong Zhang c->free_a = PETSC_TRUE; 335cf742fccSHong Zhang } 336cf742fccSHong Zhang 337c58ca2e3SHong Zhang /* clean old values in C */ 338580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 339c58ca2e3SHong Zhang /* Traverse A row-wise. */ 340c58ca2e3SHong Zhang /* Build the ith row in C by summing over nonzero columns in A, */ 341c58ca2e3SHong Zhang /* the rows of B corresponding to nonzeros of A. */ 342519eb980SHong Zhang for (i=0; i<am; i++) { 343519eb980SHong Zhang anzi = ai[i+1] - ai[i]; 344519eb980SHong Zhang cnzi = ci[i+1] - ci[i]; 345519eb980SHong Zhang for (j=0; j<anzi; j++) { 346519eb980SHong Zhang brow = aj[j]; 347519eb980SHong Zhang bnzi = bi[brow+1] - bi[brow]; 348519eb980SHong Zhang bjj = bj + bi[brow]; 349519eb980SHong Zhang baj = ba + bi[brow]; 350519eb980SHong Zhang /* perform sparse axpy */ 35136ec6d2dSHong Zhang valtmp = aa[j]; 352c124e916SHong Zhang nextb = 0; 353c124e916SHong Zhang for (k=0; nextb<bnzi; k++) { 354c124e916SHong Zhang if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 35536ec6d2dSHong Zhang ca[k] += valtmp*baj[nextb++]; 356c124e916SHong Zhang } 357d50806bdSBarry Smith } 358d50806bdSBarry Smith flops += 2*bnzi; 359d50806bdSBarry Smith } 360519eb980SHong Zhang aj += anzi; aa += anzi; 361519eb980SHong Zhang cj += cnzi; ca += cnzi; 362d50806bdSBarry Smith } 363716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 364716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 365d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 366d50806bdSBarry Smith PetscFunctionReturn(0); 367d50806bdSBarry Smith } 368bc011b1eSHong Zhang 3694222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C) 37025296bd5SBarry Smith { 37125296bd5SBarry Smith PetscErrorCode ierr; 37225296bd5SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 37325296bd5SBarry Smith PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 3743c50cad2SHong Zhang PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 37525296bd5SBarry Smith MatScalar *ca; 37625296bd5SBarry Smith PetscReal afill; 377eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 378eca6b86aSHong Zhang PetscTable ta; 3790298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 38025296bd5SBarry Smith 38125296bd5SBarry Smith PetscFunctionBegin; 3823c50cad2SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 3833c50cad2SHong Zhang /*-----------------------------------------------------------------------------------------*/ 3843c50cad2SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 385854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 3863c50cad2SHong Zhang ci[0] = 0; 3873c50cad2SHong Zhang 3883c50cad2SHong Zhang /* create and initialize a linked list */ 389c373ccc6SHong Zhang ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 390c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 391eca6b86aSHong Zhang ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 392eca6b86aSHong Zhang ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 393eca6b86aSHong Zhang 394eca6b86aSHong Zhang ierr = PetscLLCondensedCreate_fast(Crmax,&lnk);CHKERRQ(ierr); 3953c50cad2SHong Zhang 3963c50cad2SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 397f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 3983c50cad2SHong Zhang current_space = free_space; 3993c50cad2SHong Zhang 4003c50cad2SHong Zhang /* Determine ci and cj */ 4013c50cad2SHong Zhang for (i=0; i<am; i++) { 4023c50cad2SHong Zhang anzi = ai[i+1] - ai[i]; 4033c50cad2SHong Zhang aj = a->j + ai[i]; 4043c50cad2SHong Zhang for (j=0; j<anzi; j++) { 4053c50cad2SHong Zhang brow = aj[j]; 4063c50cad2SHong Zhang bnzj = bi[brow+1] - bi[brow]; 4073c50cad2SHong Zhang bj = b->j + bi[brow]; 4083c50cad2SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 4093c50cad2SHong Zhang ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 4103c50cad2SHong Zhang } 411*8c78258cSHong Zhang /* add possible missing diagonal entry */ 412*8c78258cSHong Zhang if (C->force_diagonals) { 413*8c78258cSHong Zhang ierr = PetscLLCondensedAddSorted_fast(1,&i,lnk);CHKERRQ(ierr); 414*8c78258cSHong Zhang } 4153c50cad2SHong Zhang cnzi = lnk[1]; 4163c50cad2SHong Zhang 4173c50cad2SHong Zhang /* If free space is not available, make more free space */ 4183c50cad2SHong Zhang /* Double the amount of total space in the list */ 4193c50cad2SHong Zhang if (current_space->local_remaining<cnzi) { 420f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4213c50cad2SHong Zhang ndouble++; 4223c50cad2SHong Zhang } 4233c50cad2SHong Zhang 4243c50cad2SHong Zhang /* Copy data into free space, then initialize lnk */ 4253c50cad2SHong Zhang ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 4262205254eSKarl Rupp 4273c50cad2SHong Zhang current_space->array += cnzi; 4283c50cad2SHong Zhang current_space->local_used += cnzi; 4293c50cad2SHong Zhang current_space->local_remaining -= cnzi; 4302205254eSKarl Rupp 4313c50cad2SHong Zhang ci[i+1] = ci[i] + cnzi; 4323c50cad2SHong Zhang } 4333c50cad2SHong Zhang 4343c50cad2SHong Zhang /* Column indices are in the list of free space */ 4353c50cad2SHong Zhang /* Allocate space for cj, initialize cj, and */ 4363c50cad2SHong Zhang /* destroy list of free space and other temporary array(s) */ 437854ce69bSBarry Smith ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 4383c50cad2SHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 4393c50cad2SHong Zhang ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 44025296bd5SBarry Smith 44125296bd5SBarry Smith /* Allocate space for ca */ 442580bdb30SBarry Smith ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr); 44325296bd5SBarry Smith 44425296bd5SBarry Smith /* put together the new symbolic matrix */ 445e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 4464222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 44725296bd5SBarry Smith 44825296bd5SBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 44925296bd5SBarry Smith /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 4504222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 45125296bd5SBarry Smith c->free_a = PETSC_TRUE; 45225296bd5SBarry Smith c->free_ij = PETSC_TRUE; 45325296bd5SBarry Smith c->nonew = 0; 4542205254eSKarl Rupp 4554222ddf1SHong Zhang /* slower, less memory */ 4564222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 45725296bd5SBarry Smith 45825296bd5SBarry Smith /* set MatInfo */ 45925296bd5SBarry Smith afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 46025296bd5SBarry Smith if (afill < 1.0) afill = 1.0; 46125296bd5SBarry Smith c->maxnz = ci[am]; 46225296bd5SBarry Smith c->nz = ci[am]; 4634222ddf1SHong Zhang C->info.mallocs = ndouble; 4644222ddf1SHong Zhang C->info.fill_ratio_given = fill; 4654222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 46625296bd5SBarry Smith 46725296bd5SBarry Smith #if defined(PETSC_USE_INFO) 46825296bd5SBarry Smith if (ci[am]) { 4694222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 4704222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 47125296bd5SBarry Smith } else { 4724222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 47325296bd5SBarry Smith } 47425296bd5SBarry Smith #endif 47525296bd5SBarry Smith PetscFunctionReturn(0); 47625296bd5SBarry Smith } 47725296bd5SBarry Smith 4784222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C) 479e9e4536cSHong Zhang { 480e9e4536cSHong Zhang PetscErrorCode ierr; 481e9e4536cSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 482bf9555e6SHong Zhang PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 48325c41797SHong Zhang PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 484e9e4536cSHong Zhang MatScalar *ca; 485e9e4536cSHong Zhang PetscReal afill; 486eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 487eca6b86aSHong Zhang PetscTable ta; 4880298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 489e9e4536cSHong Zhang 490e9e4536cSHong Zhang PetscFunctionBegin; 491bd958071SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 492bd958071SHong Zhang /*---------------------------------------------------------------------------------------------*/ 493bd958071SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 494854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 495bd958071SHong Zhang ci[0] = 0; 496bd958071SHong Zhang 497bd958071SHong Zhang /* create and initialize a linked list */ 498c373ccc6SHong Zhang ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 499c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 500eca6b86aSHong Zhang ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 501eca6b86aSHong Zhang ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 502eca6b86aSHong Zhang ierr = PetscLLCondensedCreate_Scalable(Crmax,&lnk);CHKERRQ(ierr); 503bd958071SHong Zhang 504bd958071SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 505f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 506bd958071SHong Zhang current_space = free_space; 507bd958071SHong Zhang 508bd958071SHong Zhang /* Determine ci and cj */ 509bd958071SHong Zhang for (i=0; i<am; i++) { 510bd958071SHong Zhang anzi = ai[i+1] - ai[i]; 511bd958071SHong Zhang aj = a->j + ai[i]; 512bd958071SHong Zhang for (j=0; j<anzi; j++) { 513bd958071SHong Zhang brow = aj[j]; 514bd958071SHong Zhang bnzj = bi[brow+1] - bi[brow]; 515bd958071SHong Zhang bj = b->j + bi[brow]; 516bd958071SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 517bd958071SHong Zhang ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 518bd958071SHong Zhang } 519*8c78258cSHong Zhang /* add possible missing diagonal entry */ 520*8c78258cSHong Zhang if (C->force_diagonals) { 521*8c78258cSHong Zhang ierr = PetscLLCondensedAddSorted_Scalable(1,&i,lnk);CHKERRQ(ierr); 522*8c78258cSHong Zhang } 523*8c78258cSHong Zhang 524bd958071SHong Zhang cnzi = lnk[0]; 525bd958071SHong Zhang 526bd958071SHong Zhang /* If free space is not available, make more free space */ 527bd958071SHong Zhang /* Double the amount of total space in the list */ 528bd958071SHong Zhang if (current_space->local_remaining<cnzi) { 529f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 530bd958071SHong Zhang ndouble++; 531bd958071SHong Zhang } 532bd958071SHong Zhang 533bd958071SHong Zhang /* Copy data into free space, then initialize lnk */ 534bd958071SHong Zhang ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 5352205254eSKarl Rupp 536bd958071SHong Zhang current_space->array += cnzi; 537bd958071SHong Zhang current_space->local_used += cnzi; 538bd958071SHong Zhang current_space->local_remaining -= cnzi; 5392205254eSKarl Rupp 540bd958071SHong Zhang ci[i+1] = ci[i] + cnzi; 541bd958071SHong Zhang } 542bd958071SHong Zhang 543bd958071SHong Zhang /* Column indices are in the list of free space */ 544bd958071SHong Zhang /* Allocate space for cj, initialize cj, and */ 545bd958071SHong Zhang /* destroy list of free space and other temporary array(s) */ 546854ce69bSBarry Smith ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 547bd958071SHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 548bd958071SHong Zhang ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 549e9e4536cSHong Zhang 550e9e4536cSHong Zhang /* Allocate space for ca */ 551bd958071SHong Zhang /*-----------------------*/ 552580bdb30SBarry Smith ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr); 553e9e4536cSHong Zhang 554e9e4536cSHong Zhang /* put together the new symbolic matrix */ 555e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 5564222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 557e9e4536cSHong Zhang 558e9e4536cSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 559e9e4536cSHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 5604222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 561e9e4536cSHong Zhang c->free_a = PETSC_TRUE; 562e9e4536cSHong Zhang c->free_ij = PETSC_TRUE; 563e9e4536cSHong Zhang c->nonew = 0; 5642205254eSKarl Rupp 5654222ddf1SHong Zhang /* slower, less memory */ 5664222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 567e9e4536cSHong Zhang 568e9e4536cSHong Zhang /* set MatInfo */ 569e9e4536cSHong Zhang afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 570e9e4536cSHong Zhang if (afill < 1.0) afill = 1.0; 571e9e4536cSHong Zhang c->maxnz = ci[am]; 572e9e4536cSHong Zhang c->nz = ci[am]; 5734222ddf1SHong Zhang C->info.mallocs = ndouble; 5744222ddf1SHong Zhang C->info.fill_ratio_given = fill; 5754222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 576e9e4536cSHong Zhang 577e9e4536cSHong Zhang #if defined(PETSC_USE_INFO) 578e9e4536cSHong Zhang if (ci[am]) { 5794222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 5804222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 581e9e4536cSHong Zhang } else { 5824222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 583e9e4536cSHong Zhang } 584e9e4536cSHong Zhang #endif 585e9e4536cSHong Zhang PetscFunctionReturn(0); 586e9e4536cSHong Zhang } 587e9e4536cSHong Zhang 5884222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C) 5890ced3a2bSJed Brown { 5900ced3a2bSJed Brown PetscErrorCode ierr; 5910ced3a2bSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 5920ced3a2bSJed Brown const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 5930ced3a2bSJed Brown PetscInt *ci,*cj,*bb; 5940ced3a2bSJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 5950ced3a2bSJed Brown PetscReal afill; 5960ced3a2bSJed Brown PetscInt i,j,col,ndouble = 0; 5970298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 5980ced3a2bSJed Brown PetscHeap h; 5990ced3a2bSJed Brown 6000ced3a2bSJed Brown PetscFunctionBegin; 601cd093f1dSJed Brown /* Get ci and cj - by merging sorted rows using a heap */ 6020ced3a2bSJed Brown /*---------------------------------------------------------------------------------------------*/ 6030ced3a2bSJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 604854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 6050ced3a2bSJed Brown ci[0] = 0; 6060ced3a2bSJed Brown 6070ced3a2bSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 608f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 6090ced3a2bSJed Brown current_space = free_space; 6100ced3a2bSJed Brown 6110ced3a2bSJed Brown ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 612785e854fSJed Brown ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr); 6130ced3a2bSJed Brown 6140ced3a2bSJed Brown /* Determine ci and cj */ 6150ced3a2bSJed Brown for (i=0; i<am; i++) { 6160ced3a2bSJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 6170ced3a2bSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 6180ced3a2bSJed Brown ci[i+1] = ci[i]; 6190ced3a2bSJed Brown /* Populate the min heap */ 6200ced3a2bSJed Brown for (j=0; j<anzi; j++) { 6210ced3a2bSJed Brown bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 6220ced3a2bSJed Brown if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 6230ced3a2bSJed Brown ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr); 6240ced3a2bSJed Brown } 6250ced3a2bSJed Brown } 6260ced3a2bSJed Brown /* Pick off the min element, adding it to free space */ 6270ced3a2bSJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 6280ced3a2bSJed Brown while (j >= 0) { 6290ced3a2bSJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 630f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);CHKERRQ(ierr); 6310ced3a2bSJed Brown ndouble++; 6320ced3a2bSJed Brown } 6330ced3a2bSJed Brown *(current_space->array++) = col; 6340ced3a2bSJed Brown current_space->local_used++; 6350ced3a2bSJed Brown current_space->local_remaining--; 6360ced3a2bSJed Brown ci[i+1]++; 6370ced3a2bSJed Brown 6380ced3a2bSJed Brown /* stash if anything else remains in this row of B */ 6390ced3a2bSJed Brown if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr);} 6400ced3a2bSJed Brown while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 6410ced3a2bSJed Brown PetscInt j2,col2; 6420ced3a2bSJed Brown ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr); 6430ced3a2bSJed Brown if (col2 != col) break; 6440ced3a2bSJed Brown ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr); 6450ced3a2bSJed Brown if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr);} 6460ced3a2bSJed Brown } 6470ced3a2bSJed Brown /* Put any stashed elements back into the min heap */ 6480ced3a2bSJed Brown ierr = PetscHeapUnstash(h);CHKERRQ(ierr); 6490ced3a2bSJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 6500ced3a2bSJed Brown } 6510ced3a2bSJed Brown } 6520ced3a2bSJed Brown ierr = PetscFree(bb);CHKERRQ(ierr); 6530ced3a2bSJed Brown ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 6540ced3a2bSJed Brown 6550ced3a2bSJed Brown /* Column indices are in the list of free space */ 6560ced3a2bSJed Brown /* Allocate space for cj, initialize cj, and */ 6570ced3a2bSJed Brown /* destroy list of free space and other temporary array(s) */ 658785e854fSJed Brown ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr); 6590ced3a2bSJed Brown ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 6600ced3a2bSJed Brown 6610ced3a2bSJed Brown /* put together the new symbolic matrix */ 662e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 6634222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6640ced3a2bSJed Brown 6650ced3a2bSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6660ced3a2bSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 6674222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 6680ced3a2bSJed Brown c->free_a = PETSC_TRUE; 6690ced3a2bSJed Brown c->free_ij = PETSC_TRUE; 6700ced3a2bSJed Brown c->nonew = 0; 67126fbe8dcSKarl Rupp 6724222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 6730ced3a2bSJed Brown 6740ced3a2bSJed Brown /* set MatInfo */ 6750ced3a2bSJed Brown afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 6760ced3a2bSJed Brown if (afill < 1.0) afill = 1.0; 6770ced3a2bSJed Brown c->maxnz = ci[am]; 6780ced3a2bSJed Brown c->nz = ci[am]; 6794222ddf1SHong Zhang C->info.mallocs = ndouble; 6804222ddf1SHong Zhang C->info.fill_ratio_given = fill; 6814222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 6820ced3a2bSJed Brown 6830ced3a2bSJed Brown #if defined(PETSC_USE_INFO) 6840ced3a2bSJed Brown if (ci[am]) { 6854222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 6864222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 6870ced3a2bSJed Brown } else { 6884222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 6890ced3a2bSJed Brown } 6900ced3a2bSJed Brown #endif 6910ced3a2bSJed Brown PetscFunctionReturn(0); 6920ced3a2bSJed Brown } 693e9e4536cSHong Zhang 6944222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C) 6958a07c6f1SJed Brown { 6968a07c6f1SJed Brown PetscErrorCode ierr; 6978a07c6f1SJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 6988a07c6f1SJed Brown const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 6998a07c6f1SJed Brown PetscInt *ci,*cj,*bb; 7008a07c6f1SJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 7018a07c6f1SJed Brown PetscReal afill; 7028a07c6f1SJed Brown PetscInt i,j,col,ndouble = 0; 7030298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 7048a07c6f1SJed Brown PetscHeap h; 7058a07c6f1SJed Brown PetscBT bt; 7068a07c6f1SJed Brown 7078a07c6f1SJed Brown PetscFunctionBegin; 708cd093f1dSJed Brown /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 7098a07c6f1SJed Brown /*---------------------------------------------------------------------------------------------*/ 7108a07c6f1SJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 711854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 7128a07c6f1SJed Brown ci[0] = 0; 7138a07c6f1SJed Brown 7148a07c6f1SJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 715f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 7162205254eSKarl Rupp 7178a07c6f1SJed Brown current_space = free_space; 7188a07c6f1SJed Brown 7198a07c6f1SJed Brown ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 720785e854fSJed Brown ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr); 7218a07c6f1SJed Brown ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr); 7228a07c6f1SJed Brown 7238a07c6f1SJed Brown /* Determine ci and cj */ 7248a07c6f1SJed Brown for (i=0; i<am; i++) { 7258a07c6f1SJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 7268a07c6f1SJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 7278a07c6f1SJed Brown const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 7288a07c6f1SJed Brown ci[i+1] = ci[i]; 7298a07c6f1SJed Brown /* Populate the min heap */ 7308a07c6f1SJed Brown for (j=0; j<anzi; j++) { 7318a07c6f1SJed Brown PetscInt brow = acol[j]; 7328a07c6f1SJed Brown for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 7338a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7348a07c6f1SJed Brown if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 7358a07c6f1SJed Brown ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 7368a07c6f1SJed Brown bb[j]++; 7378a07c6f1SJed Brown break; 7388a07c6f1SJed Brown } 7398a07c6f1SJed Brown } 7408a07c6f1SJed Brown } 7418a07c6f1SJed Brown /* Pick off the min element, adding it to free space */ 7428a07c6f1SJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 7438a07c6f1SJed Brown while (j >= 0) { 7448a07c6f1SJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 7450298fd71SBarry Smith fptr = NULL; /* need PetscBTMemzero */ 746f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);CHKERRQ(ierr); 7478a07c6f1SJed Brown ndouble++; 7488a07c6f1SJed Brown } 7498a07c6f1SJed Brown *(current_space->array++) = col; 7508a07c6f1SJed Brown current_space->local_used++; 7518a07c6f1SJed Brown current_space->local_remaining--; 7528a07c6f1SJed Brown ci[i+1]++; 7538a07c6f1SJed Brown 7548a07c6f1SJed Brown /* stash if anything else remains in this row of B */ 7558a07c6f1SJed Brown for (; bb[j] < bi[acol[j]+1]; bb[j]++) { 7568a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7578a07c6f1SJed Brown if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 7588a07c6f1SJed Brown ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 7598a07c6f1SJed Brown bb[j]++; 7608a07c6f1SJed Brown break; 7618a07c6f1SJed Brown } 7628a07c6f1SJed Brown } 7638a07c6f1SJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 7648a07c6f1SJed Brown } 7658a07c6f1SJed Brown if (fptr) { /* Clear the bits for this row */ 7668a07c6f1SJed Brown for (; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr);} 7678a07c6f1SJed Brown } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 7688a07c6f1SJed Brown ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 7698a07c6f1SJed Brown } 7708a07c6f1SJed Brown } 7718a07c6f1SJed Brown ierr = PetscFree(bb);CHKERRQ(ierr); 7728a07c6f1SJed Brown ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 7738a07c6f1SJed Brown ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 7748a07c6f1SJed Brown 7758a07c6f1SJed Brown /* Column indices are in the list of free space */ 7768a07c6f1SJed Brown /* Allocate space for cj, initialize cj, and */ 7778a07c6f1SJed Brown /* destroy list of free space and other temporary array(s) */ 778785e854fSJed Brown ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr); 7798a07c6f1SJed Brown ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 7808a07c6f1SJed Brown 7818a07c6f1SJed Brown /* put together the new symbolic matrix */ 782e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 7834222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 7848a07c6f1SJed Brown 7858a07c6f1SJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7868a07c6f1SJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 7874222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 7888a07c6f1SJed Brown c->free_a = PETSC_TRUE; 7898a07c6f1SJed Brown c->free_ij = PETSC_TRUE; 7908a07c6f1SJed Brown c->nonew = 0; 79126fbe8dcSKarl Rupp 7924222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 7938a07c6f1SJed Brown 7948a07c6f1SJed Brown /* set MatInfo */ 7958a07c6f1SJed Brown afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 7968a07c6f1SJed Brown if (afill < 1.0) afill = 1.0; 7978a07c6f1SJed Brown c->maxnz = ci[am]; 7988a07c6f1SJed Brown c->nz = ci[am]; 7994222ddf1SHong Zhang C->info.mallocs = ndouble; 8004222ddf1SHong Zhang C->info.fill_ratio_given = fill; 8014222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 8028a07c6f1SJed Brown 8038a07c6f1SJed Brown #if defined(PETSC_USE_INFO) 8048a07c6f1SJed Brown if (ci[am]) { 8054222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 8064222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 8078a07c6f1SJed Brown } else { 8084222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 8098a07c6f1SJed Brown } 8108a07c6f1SJed Brown #endif 8118a07c6f1SJed Brown PetscFunctionReturn(0); 8128a07c6f1SJed Brown } 8138a07c6f1SJed Brown 814d7ed1a4dSandi selinger 8154222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C) 816d7ed1a4dSandi selinger { 817d7ed1a4dSandi selinger PetscErrorCode ierr; 818d7ed1a4dSandi selinger Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 819d7ed1a4dSandi selinger const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; 820d7ed1a4dSandi selinger PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; 821d7ed1a4dSandi selinger PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; 822d7ed1a4dSandi selinger const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; 823d7ed1a4dSandi selinger const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 824d7ed1a4dSandi selinger const PetscInt *brow_ptr[8],*brow_end[8]; 825d7ed1a4dSandi selinger PetscInt window[8]; 826d7ed1a4dSandi selinger PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; 827d7ed1a4dSandi selinger PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; 828d7ed1a4dSandi selinger PetscReal afill; 829f83700f2Sandi selinger PetscInt *workj_L1,*workj_L2,*workj_L3; 8307660c4dbSandi selinger PetscInt L1_nnz,L2_nnz; 831d7ed1a4dSandi selinger 832d7ed1a4dSandi selinger /* Step 1: Get upper bound on memory required for allocation. 833d7ed1a4dSandi selinger Because of the way virtual memory works, 834d7ed1a4dSandi selinger only the memory pages that are actually needed will be physically allocated. */ 835d7ed1a4dSandi selinger PetscFunctionBegin; 836d7ed1a4dSandi selinger ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr); 837d7ed1a4dSandi selinger for (i=0; i<am; i++) { 838d7ed1a4dSandi selinger const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 839d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 840d7ed1a4dSandi selinger a_rownnz = 0; 841d7ed1a4dSandi selinger for (k=0; k<anzi; ++k) { 842d7ed1a4dSandi selinger a_rownnz += bi[acol[k]+1] - bi[acol[k]]; 843d7ed1a4dSandi selinger if (a_rownnz > bn) { 844d7ed1a4dSandi selinger a_rownnz = bn; 845d7ed1a4dSandi selinger break; 846d7ed1a4dSandi selinger } 847d7ed1a4dSandi selinger } 848d7ed1a4dSandi selinger a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 849d7ed1a4dSandi selinger } 850d7ed1a4dSandi selinger /* temporary work areas for merging rows */ 851d7ed1a4dSandi selinger ierr = PetscMalloc1(a_maxrownnz*8,&workj_L1);CHKERRQ(ierr); 852f83700f2Sandi selinger ierr = PetscMalloc1(a_maxrownnz*8,&workj_L2);CHKERRQ(ierr); 853f83700f2Sandi selinger ierr = PetscMalloc1(a_maxrownnz,&workj_L3);CHKERRQ(ierr); 854d7ed1a4dSandi selinger 8557660c4dbSandi selinger /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 8567660c4dbSandi selinger c_maxmem = 8*(ai[am]+bi[bm]); 857d7ed1a4dSandi selinger /* Step 2: Populate pattern for C */ 858d7ed1a4dSandi selinger ierr = PetscMalloc1(c_maxmem,&cj);CHKERRQ(ierr); 859d7ed1a4dSandi selinger 860d7ed1a4dSandi selinger ci_nnz = 0; 861d7ed1a4dSandi selinger ci[0] = 0; 862d7ed1a4dSandi selinger worki_L1[0] = 0; 863d7ed1a4dSandi selinger worki_L2[0] = 0; 864d7ed1a4dSandi selinger for (i=0; i<am; i++) { 865d7ed1a4dSandi selinger const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 866d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 867d7ed1a4dSandi selinger rowsleft = anzi; 868d7ed1a4dSandi selinger inputcol_L1 = acol; 869d7ed1a4dSandi selinger L2_nnz = 0; 8707660c4dbSandi selinger L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 8717660c4dbSandi selinger worki_L2[1] = 0; 87208fe1b3cSKarl Rupp outputi_nnz = 0; 873d7ed1a4dSandi selinger 874d7ed1a4dSandi selinger /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 875d7ed1a4dSandi selinger while (ci_nnz+a_maxrownnz > c_maxmem) { 876d7ed1a4dSandi selinger c_maxmem *= 2; 877d7ed1a4dSandi selinger ndouble++; 878d7ed1a4dSandi selinger ierr = PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);CHKERRQ(ierr); 879d7ed1a4dSandi selinger } 880d7ed1a4dSandi selinger 881d7ed1a4dSandi selinger while (rowsleft) { 882d7ed1a4dSandi selinger L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 883d7ed1a4dSandi selinger L1_nrows = 0; 8847660c4dbSandi selinger L1_nnz = 0; 885d7ed1a4dSandi selinger inputcol = inputcol_L1; 886d7ed1a4dSandi selinger inputi = bi; 887d7ed1a4dSandi selinger inputj = bj; 888d7ed1a4dSandi selinger 889d7ed1a4dSandi selinger /* The following macro is used to specialize for small rows in A. 890d7ed1a4dSandi selinger This helps with compiler unrolling, improving performance substantially. 891f83700f2Sandi selinger Input: inputj inputi inputcol bn 892d7ed1a4dSandi selinger Output: outputj outputi_nnz */ 893d7ed1a4dSandi selinger #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 894d7ed1a4dSandi selinger window_min = bn; \ 8957660c4dbSandi selinger outputi_nnz = 0; \ 8967660c4dbSandi selinger for (k=0; k<ANNZ; ++k) { \ 897d7ed1a4dSandi selinger brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 898d7ed1a4dSandi selinger brow_end[k] = inputj + inputi[inputcol[k]+1]; \ 899d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 900d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 901d7ed1a4dSandi selinger } \ 902d7ed1a4dSandi selinger while (window_min < bn) { \ 903d7ed1a4dSandi selinger outputj[outputi_nnz++] = window_min; \ 904d7ed1a4dSandi selinger /* advance front and compute new minimum */ \ 905d7ed1a4dSandi selinger old_window_min = window_min; \ 906d7ed1a4dSandi selinger window_min = bn; \ 907d7ed1a4dSandi selinger for (k=0; k<ANNZ; ++k) { \ 908d7ed1a4dSandi selinger if (window[k] == old_window_min) { \ 909d7ed1a4dSandi selinger brow_ptr[k]++; \ 910d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 911d7ed1a4dSandi selinger } \ 912d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 913d7ed1a4dSandi selinger } \ 914d7ed1a4dSandi selinger } 915d7ed1a4dSandi selinger 916d7ed1a4dSandi selinger /************** L E V E L 1 ***************/ 917d7ed1a4dSandi selinger /* Merge up to 8 rows of B to L1 work array*/ 918d7ed1a4dSandi selinger while (L1_rowsleft) { 9197660c4dbSandi selinger outputi_nnz = 0; 9207660c4dbSandi selinger if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 9217660c4dbSandi selinger else outputj = cj + ci_nnz; /* Merge directly to C */ 9227660c4dbSandi selinger 923d7ed1a4dSandi selinger switch (L1_rowsleft) { 924d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 925d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 926d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 927d7ed1a4dSandi selinger inputcol += L1_rowsleft; 928d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 929d7ed1a4dSandi selinger L1_rowsleft = 0; 930d7ed1a4dSandi selinger break; 931d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); 932d7ed1a4dSandi selinger inputcol += L1_rowsleft; 933d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 934d7ed1a4dSandi selinger L1_rowsleft = 0; 935d7ed1a4dSandi selinger break; 936d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); 937d7ed1a4dSandi selinger inputcol += L1_rowsleft; 938d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 939d7ed1a4dSandi selinger L1_rowsleft = 0; 940d7ed1a4dSandi selinger break; 941d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); 942d7ed1a4dSandi selinger inputcol += L1_rowsleft; 943d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 944d7ed1a4dSandi selinger L1_rowsleft = 0; 945d7ed1a4dSandi selinger break; 946d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); 947d7ed1a4dSandi selinger inputcol += L1_rowsleft; 948d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 949d7ed1a4dSandi selinger L1_rowsleft = 0; 950d7ed1a4dSandi selinger break; 951d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); 952d7ed1a4dSandi selinger inputcol += L1_rowsleft; 953d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 954d7ed1a4dSandi selinger L1_rowsleft = 0; 955d7ed1a4dSandi selinger break; 956d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); 957d7ed1a4dSandi selinger inputcol += L1_rowsleft; 958d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 959d7ed1a4dSandi selinger L1_rowsleft = 0; 960d7ed1a4dSandi selinger break; 961d7ed1a4dSandi selinger default: MatMatMultSymbolic_RowMergeMacro(8); 962d7ed1a4dSandi selinger inputcol += 8; 963d7ed1a4dSandi selinger rowsleft -= 8; 964d7ed1a4dSandi selinger L1_rowsleft -= 8; 965d7ed1a4dSandi selinger break; 966d7ed1a4dSandi selinger } 967d7ed1a4dSandi selinger inputcol_L1 = inputcol; 9687660c4dbSandi selinger L1_nnz += outputi_nnz; 9697660c4dbSandi selinger worki_L1[++L1_nrows] = L1_nnz; 970d7ed1a4dSandi selinger } 971d7ed1a4dSandi selinger 972d7ed1a4dSandi selinger /********************** L E V E L 2 ************************/ 973d7ed1a4dSandi selinger /* Merge from L1 work array to either C or to L2 work array */ 974d7ed1a4dSandi selinger if (anzi > 8) { 975d7ed1a4dSandi selinger inputi = worki_L1; 976d7ed1a4dSandi selinger inputj = workj_L1; 977d7ed1a4dSandi selinger inputcol = workcol; 978d7ed1a4dSandi selinger outputi_nnz = 0; 979d7ed1a4dSandi selinger 980d7ed1a4dSandi selinger if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 981d7ed1a4dSandi selinger else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 982d7ed1a4dSandi selinger 983d7ed1a4dSandi selinger switch (L1_nrows) { 984d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 985d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 986d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 987d7ed1a4dSandi selinger break; 988d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 989d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 990d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 991d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 992d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 993d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 994d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 995d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 996d7ed1a4dSandi selinger } 997d7ed1a4dSandi selinger L2_nnz += outputi_nnz; 998d7ed1a4dSandi selinger worki_L2[++L2_nrows] = L2_nnz; 999d7ed1a4dSandi selinger 10007660c4dbSandi selinger /************************ L E V E L 3 **********************/ 10017660c4dbSandi selinger /* Merge from L2 work array to either C or to L2 work array */ 1002d7ed1a4dSandi selinger if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 1003d7ed1a4dSandi selinger inputi = worki_L2; 1004d7ed1a4dSandi selinger inputj = workj_L2; 1005d7ed1a4dSandi selinger inputcol = workcol; 1006d7ed1a4dSandi selinger outputi_nnz = 0; 1007f83700f2Sandi selinger if (rowsleft) outputj = workj_L3; 1008d7ed1a4dSandi selinger else outputj = cj + ci_nnz; 1009d7ed1a4dSandi selinger switch (L2_nrows) { 1010d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 1011d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 1012d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1013d7ed1a4dSandi selinger break; 1014d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1015d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1016d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1017d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1018d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1019d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1020d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1021d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1022d7ed1a4dSandi selinger } 1023d7ed1a4dSandi selinger L2_nrows = 1; 10247660c4dbSandi selinger L2_nnz = outputi_nnz; 10257660c4dbSandi selinger worki_L2[1] = outputi_nnz; 10267660c4dbSandi selinger /* Copy to workj_L2 */ 1027d7ed1a4dSandi selinger if (rowsleft) { 10287660c4dbSandi selinger for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1029d7ed1a4dSandi selinger } 1030d7ed1a4dSandi selinger } 1031d7ed1a4dSandi selinger } 1032d7ed1a4dSandi selinger } /* while (rowsleft) */ 1033d7ed1a4dSandi selinger #undef MatMatMultSymbolic_RowMergeMacro 1034d7ed1a4dSandi selinger 1035d7ed1a4dSandi selinger /* terminate current row */ 1036d7ed1a4dSandi selinger ci_nnz += outputi_nnz; 1037d7ed1a4dSandi selinger ci[i+1] = ci_nnz; 1038d7ed1a4dSandi selinger } 1039d7ed1a4dSandi selinger 1040d7ed1a4dSandi selinger /* Step 3: Create the new symbolic matrix */ 1041e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 10424222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 1043d7ed1a4dSandi selinger 1044d7ed1a4dSandi selinger /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1045d7ed1a4dSandi selinger /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 10464222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 1047d7ed1a4dSandi selinger c->free_a = PETSC_TRUE; 1048d7ed1a4dSandi selinger c->free_ij = PETSC_TRUE; 1049d7ed1a4dSandi selinger c->nonew = 0; 1050d7ed1a4dSandi selinger 10514222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1052d7ed1a4dSandi selinger 1053d7ed1a4dSandi selinger /* set MatInfo */ 1054d7ed1a4dSandi selinger afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 1055d7ed1a4dSandi selinger if (afill < 1.0) afill = 1.0; 1056d7ed1a4dSandi selinger c->maxnz = ci[am]; 1057d7ed1a4dSandi selinger c->nz = ci[am]; 10584222ddf1SHong Zhang C->info.mallocs = ndouble; 10594222ddf1SHong Zhang C->info.fill_ratio_given = fill; 10604222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1061d7ed1a4dSandi selinger 1062d7ed1a4dSandi selinger #if defined(PETSC_USE_INFO) 1063d7ed1a4dSandi selinger if (ci[am]) { 10644222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 10654222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 1066d7ed1a4dSandi selinger } else { 10674222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 1068d7ed1a4dSandi selinger } 1069d7ed1a4dSandi selinger #endif 1070d7ed1a4dSandi selinger 1071d7ed1a4dSandi selinger /* Step 4: Free temporary work areas */ 1072d7ed1a4dSandi selinger ierr = PetscFree(workj_L1);CHKERRQ(ierr); 1073d7ed1a4dSandi selinger ierr = PetscFree(workj_L2);CHKERRQ(ierr); 1074f83700f2Sandi selinger ierr = PetscFree(workj_L3);CHKERRQ(ierr); 1075d7ed1a4dSandi selinger PetscFunctionReturn(0); 1076d7ed1a4dSandi selinger } 1077d7ed1a4dSandi selinger 1078cd093f1dSJed Brown /* concatenate unique entries and then sort */ 10794222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C) 1080cd093f1dSJed Brown { 1081cd093f1dSJed Brown PetscErrorCode ierr; 1082cd093f1dSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 1083cd093f1dSJed Brown const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 1084*8c78258cSHong Zhang PetscInt *ci,*cj,bcol; 1085cd093f1dSJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 1086cd093f1dSJed Brown PetscReal afill; 1087cd093f1dSJed Brown PetscInt i,j,ndouble = 0; 1088cd093f1dSJed Brown PetscSegBuffer seg,segrow; 1089cd093f1dSJed Brown char *seen; 1090cd093f1dSJed Brown 1091cd093f1dSJed Brown PetscFunctionBegin; 1092854ce69bSBarry Smith ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr); 1093cd093f1dSJed Brown ci[0] = 0; 1094cd093f1dSJed Brown 1095cd093f1dSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1096cd093f1dSJed Brown ierr = PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);CHKERRQ(ierr); 1097cd093f1dSJed Brown ierr = PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);CHKERRQ(ierr); 1098580bdb30SBarry Smith ierr = PetscCalloc1(bn,&seen);CHKERRQ(ierr); 1099cd093f1dSJed Brown 1100cd093f1dSJed Brown /* Determine ci and cj */ 1101cd093f1dSJed Brown for (i=0; i<am; i++) { 1102cd093f1dSJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1103cd093f1dSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1104cd093f1dSJed Brown PetscInt packlen = 0,*PETSC_RESTRICT crow; 1105*8c78258cSHong Zhang 1106cd093f1dSJed Brown /* Pack segrow */ 1107cd093f1dSJed Brown for (j=0; j<anzi; j++) { 1108cd093f1dSJed Brown PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; 1109cd093f1dSJed Brown for (k=bjstart; k<bjend; k++) { 1110*8c78258cSHong Zhang bcol = bj[k]; 1111cd093f1dSJed Brown if (!seen[bcol]) { /* new entry */ 1112cd093f1dSJed Brown PetscInt *PETSC_RESTRICT slot; 1113cd093f1dSJed Brown ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr); 1114cd093f1dSJed Brown *slot = bcol; 1115cd093f1dSJed Brown seen[bcol] = 1; 1116cd093f1dSJed Brown packlen++; 1117cd093f1dSJed Brown } 1118cd093f1dSJed Brown } 1119cd093f1dSJed Brown } 1120*8c78258cSHong Zhang 1121*8c78258cSHong Zhang /* Check i-th diagonal entry */ 1122*8c78258cSHong Zhang if (C->force_diagonals && !seen[i]) { 1123*8c78258cSHong Zhang PetscInt *PETSC_RESTRICT slot; 1124*8c78258cSHong Zhang ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr); 1125*8c78258cSHong Zhang *slot = i; 1126*8c78258cSHong Zhang seen[i] = 1; 1127*8c78258cSHong Zhang packlen++; 1128*8c78258cSHong Zhang } 1129*8c78258cSHong Zhang 1130cd093f1dSJed Brown ierr = PetscSegBufferGetInts(seg,packlen,&crow);CHKERRQ(ierr); 1131cd093f1dSJed Brown ierr = PetscSegBufferExtractTo(segrow,crow);CHKERRQ(ierr); 1132cd093f1dSJed Brown ierr = PetscSortInt(packlen,crow);CHKERRQ(ierr); 1133cd093f1dSJed Brown ci[i+1] = ci[i] + packlen; 1134cd093f1dSJed Brown for (j=0; j<packlen; j++) seen[crow[j]] = 0; 1135cd093f1dSJed Brown } 1136cd093f1dSJed Brown ierr = PetscSegBufferDestroy(&segrow);CHKERRQ(ierr); 1137cd093f1dSJed Brown ierr = PetscFree(seen);CHKERRQ(ierr); 1138cd093f1dSJed Brown 1139cd093f1dSJed Brown /* Column indices are in the segmented buffer */ 1140cd093f1dSJed Brown ierr = PetscSegBufferExtractAlloc(seg,&cj);CHKERRQ(ierr); 1141cd093f1dSJed Brown ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr); 1142cd093f1dSJed Brown 1143cd093f1dSJed Brown /* put together the new symbolic matrix */ 1144e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 11454222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 1146cd093f1dSJed Brown 1147cd093f1dSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1148cd093f1dSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 11494222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 1150cd093f1dSJed Brown c->free_a = PETSC_TRUE; 1151cd093f1dSJed Brown c->free_ij = PETSC_TRUE; 1152cd093f1dSJed Brown c->nonew = 0; 1153cd093f1dSJed Brown 11544222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1155cd093f1dSJed Brown 1156cd093f1dSJed Brown /* set MatInfo */ 11572a09556fSStefano Zampini afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5; 1158cd093f1dSJed Brown if (afill < 1.0) afill = 1.0; 1159cd093f1dSJed Brown c->maxnz = ci[am]; 1160cd093f1dSJed Brown c->nz = ci[am]; 11614222ddf1SHong Zhang C->info.mallocs = ndouble; 11624222ddf1SHong Zhang C->info.fill_ratio_given = fill; 11634222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1164cd093f1dSJed Brown 1165cd093f1dSJed Brown #if defined(PETSC_USE_INFO) 1166cd093f1dSJed Brown if (ci[am]) { 11674222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 11684222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 1169cd093f1dSJed Brown } else { 11704222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 1171cd093f1dSJed Brown } 1172cd093f1dSJed Brown #endif 1173cd093f1dSJed Brown PetscFunctionReturn(0); 1174cd093f1dSJed Brown } 1175cd093f1dSJed Brown 11766718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) 11772128a86cSHong Zhang { 11782128a86cSHong Zhang PetscErrorCode ierr; 11796718818eSStefano Zampini Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data; 11802128a86cSHong Zhang 11812128a86cSHong Zhang PetscFunctionBegin; 118240192850SHong Zhang ierr = MatTransposeColoringDestroy(&abt->matcoloring);CHKERRQ(ierr); 118340192850SHong Zhang ierr = MatDestroy(&abt->Bt_den);CHKERRQ(ierr); 118440192850SHong Zhang ierr = MatDestroy(&abt->ABt_den);CHKERRQ(ierr); 118540192850SHong Zhang ierr = PetscFree(abt);CHKERRQ(ierr); 11862128a86cSHong Zhang PetscFunctionReturn(0); 11872128a86cSHong Zhang } 11882128a86cSHong Zhang 11894222ddf1SHong Zhang PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1190bc011b1eSHong Zhang { 11915df89d91SHong Zhang PetscErrorCode ierr; 119281d82fe4SHong Zhang Mat Bt; 119381d82fe4SHong Zhang PetscInt *bti,*btj; 119440192850SHong Zhang Mat_MatMatTransMult *abt; 11954222ddf1SHong Zhang Mat_Product *product = C->product; 11966718818eSStefano Zampini char *alg; 1197d2853540SHong Zhang 119881d82fe4SHong Zhang PetscFunctionBegin; 11996718818eSStefano Zampini if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 12006718818eSStefano Zampini if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 12016718818eSStefano Zampini 120281d82fe4SHong Zhang /* create symbolic Bt */ 120381d82fe4SHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 12040298fd71SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);CHKERRQ(ierr); 120533d57670SJed Brown ierr = MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr); 120604b858e0SBarry Smith ierr = MatSetType(Bt,((PetscObject)A)->type_name);CHKERRQ(ierr); 120781d82fe4SHong Zhang 120881d82fe4SHong Zhang /* get symbolic C=A*Bt */ 12096718818eSStefano Zampini ierr = PetscStrallocpy(product->alg,&alg);CHKERRQ(ierr); 12104222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); /* set algorithm for C = A*Bt */ 121181d82fe4SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 12124222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,alg);CHKERRQ(ierr); /* resume original algorithm for ABt product */ 12136718818eSStefano Zampini ierr = PetscFree(alg);CHKERRQ(ierr); 121481d82fe4SHong Zhang 12152128a86cSHong Zhang /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 1216b00a9115SJed Brown ierr = PetscNew(&abt);CHKERRQ(ierr); 12172128a86cSHong Zhang 12186718818eSStefano Zampini product->data = abt; 12196718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 12206718818eSStefano Zampini 12214222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 12222128a86cSHong Zhang 12234222ddf1SHong Zhang abt->usecoloring = PETSC_FALSE; 12244222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"color",&abt->usecoloring);CHKERRQ(ierr); 122540192850SHong Zhang if (abt->usecoloring) { 1226b9af6bddSHong Zhang /* Create MatTransposeColoring from symbolic C=A*B^T */ 1227b9af6bddSHong Zhang MatTransposeColoring matcoloring; 1228335efc43SPeter Brune MatColoring coloring; 12292128a86cSHong Zhang ISColoring iscoloring; 12302128a86cSHong Zhang Mat Bt_dense,C_dense; 1231e8354b3eSHong Zhang 12324222ddf1SHong Zhang /* inode causes memory problem */ 12334222ddf1SHong Zhang ierr = MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 12344222ddf1SHong Zhang 12354222ddf1SHong Zhang ierr = MatColoringCreate(C,&coloring);CHKERRQ(ierr); 1236335efc43SPeter Brune ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr); 1237335efc43SPeter Brune ierr = MatColoringSetType(coloring,MATCOLORINGSL);CHKERRQ(ierr); 1238335efc43SPeter Brune ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr); 1239335efc43SPeter Brune ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr); 1240335efc43SPeter Brune ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr); 12414222ddf1SHong Zhang ierr = MatTransposeColoringCreate(C,iscoloring,&matcoloring);CHKERRQ(ierr); 12422205254eSKarl Rupp 124340192850SHong Zhang abt->matcoloring = matcoloring; 12442205254eSKarl Rupp 12452128a86cSHong Zhang ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 12462128a86cSHong Zhang 12472128a86cSHong Zhang /* Create Bt_dense and C_dense = A*Bt_dense */ 12482128a86cSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 12492128a86cSHong Zhang ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 12502128a86cSHong Zhang ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 12510298fd71SBarry Smith ierr = MatSeqDenseSetPreallocation(Bt_dense,NULL);CHKERRQ(ierr); 12522205254eSKarl Rupp 12532128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 125440192850SHong Zhang abt->Bt_den = Bt_dense; 12552128a86cSHong Zhang 12562128a86cSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 12572128a86cSHong Zhang ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 12582128a86cSHong Zhang ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 12590298fd71SBarry Smith ierr = MatSeqDenseSetPreallocation(C_dense,NULL);CHKERRQ(ierr); 12602205254eSKarl Rupp 12612128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 126240192850SHong Zhang abt->ABt_den = C_dense; 1263f94ccd6cSHong Zhang 1264f94ccd6cSHong Zhang #if defined(PETSC_USE_INFO) 12651ce71dffSSatish Balay { 12664222ddf1SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 12674222ddf1SHong Zhang ierr = PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));CHKERRQ(ierr); 12681ce71dffSSatish Balay } 1269f94ccd6cSHong Zhang #endif 12702128a86cSHong Zhang } 1271e99be685SHong Zhang /* clean up */ 1272e99be685SHong Zhang ierr = MatDestroy(&Bt);CHKERRQ(ierr); 1273e99be685SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 12745df89d91SHong Zhang PetscFunctionReturn(0); 12755df89d91SHong Zhang } 12765df89d91SHong Zhang 12776fc122caSHong Zhang PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 12785df89d91SHong Zhang { 12795df89d91SHong Zhang PetscErrorCode ierr; 12805df89d91SHong Zhang Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1281e2cac8caSJed Brown PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 128281d82fe4SHong Zhang PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 12835df89d91SHong Zhang PetscLogDouble flops=0.0; 1284aa1aec99SHong Zhang MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; 12856718818eSStefano Zampini Mat_MatMatTransMult *abt; 12866718818eSStefano Zampini Mat_Product *product = C->product; 12875df89d91SHong Zhang 12885df89d91SHong Zhang PetscFunctionBegin; 12896718818eSStefano Zampini if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 12906718818eSStefano Zampini abt = (Mat_MatMatTransMult *)product->data; 12916718818eSStefano Zampini if (!abt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 129258ed3ceaSHong Zhang /* clear old values in C */ 129358ed3ceaSHong Zhang if (!c->a) { 1294580bdb30SBarry Smith ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 129558ed3ceaSHong Zhang c->a = ca; 129658ed3ceaSHong Zhang c->free_a = PETSC_TRUE; 129758ed3ceaSHong Zhang } else { 129858ed3ceaSHong Zhang ca = c->a; 1299580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]+1);CHKERRQ(ierr); 130058ed3ceaSHong Zhang } 130158ed3ceaSHong Zhang 130240192850SHong Zhang if (abt->usecoloring) { 130340192850SHong Zhang MatTransposeColoring matcoloring = abt->matcoloring; 130440192850SHong Zhang Mat Bt_dense,C_dense = abt->ABt_den; 1305c8db22f6SHong Zhang 1306b9af6bddSHong Zhang /* Get Bt_dense by Apply MatTransposeColoring to B */ 130740192850SHong Zhang Bt_dense = abt->Bt_den; 1308b9af6bddSHong Zhang ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 1309c8db22f6SHong Zhang 1310c8db22f6SHong Zhang /* C_dense = A*Bt_dense */ 13112128a86cSHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 1312c8db22f6SHong Zhang 13132128a86cSHong Zhang /* Recover C from C_dense */ 1314b9af6bddSHong Zhang ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 1315c8db22f6SHong Zhang PetscFunctionReturn(0); 1316c8db22f6SHong Zhang } 1317c8db22f6SHong Zhang 13181fa1209cSHong Zhang for (i=0; i<cm; i++) { 131981d82fe4SHong Zhang anzi = ai[i+1] - ai[i]; 132081d82fe4SHong Zhang acol = aj + ai[i]; 13216973769bSHong Zhang aval = aa + ai[i]; 13221fa1209cSHong Zhang cnzi = ci[i+1] - ci[i]; 13231fa1209cSHong Zhang ccol = cj + ci[i]; 13246973769bSHong Zhang cval = ca + ci[i]; 13251fa1209cSHong Zhang for (j=0; j<cnzi; j++) { 132681d82fe4SHong Zhang brow = ccol[j]; 132781d82fe4SHong Zhang bnzj = bi[brow+1] - bi[brow]; 132881d82fe4SHong Zhang bcol = bj + bi[brow]; 13296973769bSHong Zhang bval = ba + bi[brow]; 13306973769bSHong Zhang 133181d82fe4SHong Zhang /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 133281d82fe4SHong Zhang nexta = 0; nextb = 0; 133381d82fe4SHong Zhang while (nexta<anzi && nextb<bnzj) { 13347b6d5e96SMark Adams while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 133581d82fe4SHong Zhang if (nexta == anzi) break; 13367b6d5e96SMark Adams while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 133781d82fe4SHong Zhang if (nextb == bnzj) break; 133881d82fe4SHong Zhang if (acol[nexta] == bcol[nextb]) { 13396973769bSHong Zhang cval[j] += aval[nexta]*bval[nextb]; 134081d82fe4SHong Zhang nexta++; nextb++; 134181d82fe4SHong Zhang flops += 2; 13421fa1209cSHong Zhang } 13431fa1209cSHong Zhang } 134481d82fe4SHong Zhang } 134581d82fe4SHong Zhang } 134681d82fe4SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 134781d82fe4SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 134881d82fe4SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 13495df89d91SHong Zhang PetscFunctionReturn(0); 13505df89d91SHong Zhang } 13515df89d91SHong Zhang 13526718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) 13536d373c3eSHong Zhang { 13546d373c3eSHong Zhang PetscErrorCode ierr; 13556718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data; 13566d373c3eSHong Zhang 13576d373c3eSHong Zhang PetscFunctionBegin; 13586d373c3eSHong Zhang ierr = MatDestroy(&atb->At);CHKERRQ(ierr); 13596718818eSStefano Zampini if (atb->destroy) { 13606718818eSStefano Zampini ierr = (*atb->destroy)(atb->data);CHKERRQ(ierr); 13616473ade5SStefano Zampini } 13626d373c3eSHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 13636d373c3eSHong Zhang PetscFunctionReturn(0); 13646d373c3eSHong Zhang } 13656d373c3eSHong Zhang 13664222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 13675df89d91SHong Zhang { 1368bc011b1eSHong Zhang PetscErrorCode ierr; 1369089a957eSStefano Zampini Mat At = NULL; 137038baddfdSBarry Smith PetscInt *ati,*atj; 13714222ddf1SHong Zhang Mat_Product *product = C->product; 1372089a957eSStefano Zampini PetscBool flg,def,square; 1373bc011b1eSHong Zhang 1374bc011b1eSHong Zhang PetscFunctionBegin; 1375089a957eSStefano Zampini MatCheckProduct(C,4); 1376089a957eSStefano Zampini square = (PetscBool)(A == B && A->symmetric && A->symmetric_set); 13774222ddf1SHong Zhang /* outerproduct */ 1378089a957eSStefano Zampini ierr = PetscStrcmp(product->alg,"outerproduct",&flg);CHKERRQ(ierr); 13794222ddf1SHong Zhang if (flg) { 1380bc011b1eSHong Zhang /* create symbolic At */ 1381089a957eSStefano Zampini if (!square) { 1382bc011b1eSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 13830298fd71SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);CHKERRQ(ierr); 138433d57670SJed Brown ierr = MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr); 138504b858e0SBarry Smith ierr = MatSetType(At,((PetscObject)A)->type_name);CHKERRQ(ierr); 1386089a957eSStefano Zampini } 1387bc011b1eSHong Zhang /* get symbolic C=At*B */ 13887a3c3d58SStefano Zampini ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); 1389089a957eSStefano Zampini ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);CHKERRQ(ierr); 1390bc011b1eSHong Zhang 1391bc011b1eSHong Zhang /* clean up */ 1392089a957eSStefano Zampini if (!square) { 13936bf464f9SBarry Smith ierr = MatDestroy(&At);CHKERRQ(ierr); 1394bc011b1eSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1395089a957eSStefano Zampini } 13966d373c3eSHong Zhang 13974222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 13987a3c3d58SStefano Zampini ierr = MatProductSetAlgorithm(C,"outerproduct");CHKERRQ(ierr); 13994222ddf1SHong Zhang PetscFunctionReturn(0); 14004222ddf1SHong Zhang } 14014222ddf1SHong Zhang 14024222ddf1SHong Zhang /* matmatmult */ 1403089a957eSStefano Zampini ierr = PetscStrcmp(product->alg,"default",&def);CHKERRQ(ierr); 1404089a957eSStefano Zampini ierr = PetscStrcmp(product->alg,"at*b",&flg);CHKERRQ(ierr); 1405089a957eSStefano Zampini if (flg || def) { 14064222ddf1SHong Zhang Mat_MatTransMatMult *atb; 14074222ddf1SHong Zhang 14086718818eSStefano Zampini if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 14094222ddf1SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 1410089a957eSStefano Zampini if (!square) { 14114222ddf1SHong Zhang ierr = MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 1412089a957eSStefano Zampini } 14137a3c3d58SStefano Zampini ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); 1414089a957eSStefano Zampini ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);CHKERRQ(ierr); 14156718818eSStefano Zampini ierr = MatProductSetAlgorithm(C,"at*b");CHKERRQ(ierr); 14166718818eSStefano Zampini product->data = atb; 14176718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 14184222ddf1SHong Zhang atb->At = At; 14194222ddf1SHong Zhang atb->updateAt = PETSC_FALSE; /* because At is computed here */ 14204222ddf1SHong Zhang 14214222ddf1SHong Zhang C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 14224222ddf1SHong Zhang PetscFunctionReturn(0); 14234222ddf1SHong Zhang } 14244222ddf1SHong Zhang 14254222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1426bc011b1eSHong Zhang } 1427bc011b1eSHong Zhang 142875648e8dSHong Zhang PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1429bc011b1eSHong Zhang { 1430bc011b1eSHong Zhang PetscErrorCode ierr; 14310fbc74f4SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1432d0f46423SBarry Smith PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1433d0f46423SBarry Smith PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1434d13dce4bSSatish Balay PetscLogDouble flops=0.0; 1435aa1aec99SHong Zhang MatScalar *aa=a->a,*ba,*ca,*caj; 1436bc011b1eSHong Zhang 1437bc011b1eSHong Zhang PetscFunctionBegin; 1438aa1aec99SHong Zhang if (!c->a) { 1439580bdb30SBarry Smith ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 14402205254eSKarl Rupp 1441aa1aec99SHong Zhang c->a = ca; 1442aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 1443aa1aec99SHong Zhang } else { 1444aa1aec99SHong Zhang ca = c->a; 1445580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 1446aa1aec99SHong Zhang } 1447bc011b1eSHong Zhang 1448bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1449bc011b1eSHong Zhang for (i=0; i<am; i++) { 1450bc011b1eSHong Zhang bj = b->j + bi[i]; 1451bc011b1eSHong Zhang ba = b->a + bi[i]; 1452bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 1453bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 1454bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 1455bc011b1eSHong Zhang nextb = 0; 14560fbc74f4SHong Zhang crow = *aj++; 1457bc011b1eSHong Zhang cjj = cj + ci[crow]; 1458bc011b1eSHong Zhang caj = ca + ci[crow]; 1459bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 1460bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 14610fbc74f4SHong Zhang if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 14620fbc74f4SHong Zhang caj[k] += (*aa)*(*(ba+nextb)); 1463bc011b1eSHong Zhang nextb++; 1464bc011b1eSHong Zhang } 1465bc011b1eSHong Zhang } 1466bc011b1eSHong Zhang flops += 2*bnzi; 14670fbc74f4SHong Zhang aa++; 1468bc011b1eSHong Zhang } 1469bc011b1eSHong Zhang } 1470bc011b1eSHong Zhang 1471bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 1472bc011b1eSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1473bc011b1eSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474bc011b1eSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1475bc011b1eSHong Zhang PetscFunctionReturn(0); 1476bc011b1eSHong Zhang } 14779123193aSHong Zhang 14784222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) 14799123193aSHong Zhang { 14809123193aSHong Zhang PetscErrorCode ierr; 14819123193aSHong Zhang 14829123193aSHong Zhang PetscFunctionBegin; 14835a586d82SBarry Smith ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 14844222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 14859123193aSHong Zhang PetscFunctionReturn(0); 14869123193aSHong Zhang } 14879123193aSHong Zhang 148893aa15f2SStefano Zampini PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add) 14899123193aSHong Zhang { 1490f32d5d43SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1491612438f1SStefano Zampini Mat_SeqDense *bd=(Mat_SeqDense*)B->data; 14921ca9667aSStefano Zampini Mat_SeqDense *cd=(Mat_SeqDense*)C->data; 14939123193aSHong Zhang PetscErrorCode ierr; 14941ca9667aSStefano Zampini PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4; 1495a4af7ca8SStefano Zampini const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av; 1496daf57211SHong Zhang const PetscInt *aj; 1497612438f1SStefano Zampini PetscInt cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n; 14981ca9667aSStefano Zampini PetscInt clda=cd->lda; 14991ca9667aSStefano Zampini PetscInt am4=4*clda,bm4=4*bm,col,i,j,n; 15009123193aSHong Zhang 15019123193aSHong Zhang PetscFunctionBegin; 1502f32d5d43SBarry Smith if (!cm || !cn) PetscFunctionReturn(0); 1503a4af7ca8SStefano Zampini ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 150493aa15f2SStefano Zampini if (add) { 15058c778c55SBarry Smith ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); 150693aa15f2SStefano Zampini } else { 150793aa15f2SStefano Zampini ierr = MatDenseGetArrayWrite(C,&c);CHKERRQ(ierr); 150893aa15f2SStefano Zampini } 1509a4af7ca8SStefano Zampini ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1510f32d5d43SBarry Smith b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 15111ca9667aSStefano Zampini c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda; 15121ca9667aSStefano Zampini for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */ 15131ca9667aSStefano Zampini for (i=0; i<am; i++) { /* over rows of A in those columns */ 1514f32d5d43SBarry Smith r1 = r2 = r3 = r4 = 0.0; 1515f32d5d43SBarry Smith n = a->i[i+1] - a->i[i]; 1516f32d5d43SBarry Smith aj = a->j + a->i[i]; 1517a4af7ca8SStefano Zampini aa = av + a->i[i]; 1518f32d5d43SBarry Smith for (j=0; j<n; j++) { 15191ca9667aSStefano Zampini const PetscScalar aatmp = aa[j]; 15201ca9667aSStefano Zampini const PetscInt ajtmp = aj[j]; 1521730858b9SHong Zhang r1 += aatmp*b1[ajtmp]; 1522730858b9SHong Zhang r2 += aatmp*b2[ajtmp]; 1523730858b9SHong Zhang r3 += aatmp*b3[ajtmp]; 1524730858b9SHong Zhang r4 += aatmp*b4[ajtmp]; 15259123193aSHong Zhang } 152693aa15f2SStefano Zampini if (add) { 152787753ddeSHong Zhang c1[i] += r1; 152887753ddeSHong Zhang c2[i] += r2; 152987753ddeSHong Zhang c3[i] += r3; 153087753ddeSHong Zhang c4[i] += r4; 153193aa15f2SStefano Zampini } else { 153293aa15f2SStefano Zampini c1[i] = r1; 153393aa15f2SStefano Zampini c2[i] = r2; 153493aa15f2SStefano Zampini c3[i] = r3; 153593aa15f2SStefano Zampini c4[i] = r4; 153693aa15f2SStefano Zampini } 1537f32d5d43SBarry Smith } 1538730858b9SHong Zhang b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; 1539730858b9SHong Zhang c1 += am4; c2 += am4; c3 += am4; c4 += am4; 1540f32d5d43SBarry Smith } 154193aa15f2SStefano Zampini /* process remaining columns */ 154293aa15f2SStefano Zampini if (col != cn) { 154393aa15f2SStefano Zampini PetscInt rc = cn-col; 154493aa15f2SStefano Zampini 154593aa15f2SStefano Zampini if (rc == 1) { 154693aa15f2SStefano Zampini for (i=0; i<am; i++) { 1547f32d5d43SBarry Smith r1 = 0.0; 1548f32d5d43SBarry Smith n = a->i[i+1] - a->i[i]; 1549f32d5d43SBarry Smith aj = a->j + a->i[i]; 1550a4af7ca8SStefano Zampini aa = av + a->i[i]; 155193aa15f2SStefano Zampini for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]]; 155293aa15f2SStefano Zampini if (add) c1[i] += r1; 155393aa15f2SStefano Zampini else c1[i] = r1; 155493aa15f2SStefano Zampini } 155593aa15f2SStefano Zampini } else if (rc == 2) { 155693aa15f2SStefano Zampini for (i=0; i<am; i++) { 155793aa15f2SStefano Zampini r1 = r2 = 0.0; 155893aa15f2SStefano Zampini n = a->i[i+1] - a->i[i]; 155993aa15f2SStefano Zampini aj = a->j + a->i[i]; 156093aa15f2SStefano Zampini aa = av + a->i[i]; 1561f32d5d43SBarry Smith for (j=0; j<n; j++) { 156293aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 156393aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 156493aa15f2SStefano Zampini r1 += aatmp*b1[ajtmp]; 156593aa15f2SStefano Zampini r2 += aatmp*b2[ajtmp]; 1566f32d5d43SBarry Smith } 156793aa15f2SStefano Zampini if (add) { 156887753ddeSHong Zhang c1[i] += r1; 156993aa15f2SStefano Zampini c2[i] += r2; 157093aa15f2SStefano Zampini } else { 157193aa15f2SStefano Zampini c1[i] = r1; 157293aa15f2SStefano Zampini c2[i] = r2; 1573f32d5d43SBarry Smith } 157493aa15f2SStefano Zampini } 157593aa15f2SStefano Zampini } else { 157693aa15f2SStefano Zampini for (i=0; i<am; i++) { 157793aa15f2SStefano Zampini r1 = r2 = r3 = 0.0; 157893aa15f2SStefano Zampini n = a->i[i+1] - a->i[i]; 157993aa15f2SStefano Zampini aj = a->j + a->i[i]; 158093aa15f2SStefano Zampini aa = av + a->i[i]; 158193aa15f2SStefano Zampini for (j=0; j<n; j++) { 158293aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 158393aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 158493aa15f2SStefano Zampini r1 += aatmp*b1[ajtmp]; 158593aa15f2SStefano Zampini r2 += aatmp*b2[ajtmp]; 158693aa15f2SStefano Zampini r3 += aatmp*b3[ajtmp]; 158793aa15f2SStefano Zampini } 158893aa15f2SStefano Zampini if (add) { 158993aa15f2SStefano Zampini c1[i] += r1; 159093aa15f2SStefano Zampini c2[i] += r2; 159193aa15f2SStefano Zampini c3[i] += r3; 159293aa15f2SStefano Zampini } else { 159393aa15f2SStefano Zampini c1[i] = r1; 159493aa15f2SStefano Zampini c2[i] = r2; 159593aa15f2SStefano Zampini c3[i] = r3; 159693aa15f2SStefano Zampini } 159793aa15f2SStefano Zampini } 159893aa15f2SStefano Zampini } 1599f32d5d43SBarry Smith } 1600dc0b31edSSatish Balay ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 160193aa15f2SStefano Zampini if (add) { 16028c778c55SBarry Smith ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr); 160393aa15f2SStefano Zampini } else { 160493aa15f2SStefano Zampini ierr = MatDenseRestoreArrayWrite(C,&c);CHKERRQ(ierr); 160593aa15f2SStefano Zampini } 1606a4af7ca8SStefano Zampini ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1607a4af7ca8SStefano Zampini ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 1608f32d5d43SBarry Smith PetscFunctionReturn(0); 1609f32d5d43SBarry Smith } 1610f32d5d43SBarry Smith 161187753ddeSHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1612f32d5d43SBarry Smith { 1613f32d5d43SBarry Smith PetscErrorCode ierr; 1614f32d5d43SBarry Smith 1615f32d5d43SBarry Smith PetscFunctionBegin; 161687753ddeSHong Zhang if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n); 161787753ddeSHong Zhang if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 161887753ddeSHong Zhang if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 16194324174eSBarry Smith 162093aa15f2SStefano Zampini ierr = MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE);CHKERRQ(ierr); 16219123193aSHong Zhang PetscFunctionReturn(0); 16229123193aSHong Zhang } 1623b1683b59SHong Zhang 16244222ddf1SHong Zhang /* ------------------------------------------------------- */ 16254222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) 16264222ddf1SHong Zhang { 16274222ddf1SHong Zhang PetscFunctionBegin; 16284222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 16294222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16304222ddf1SHong Zhang PetscFunctionReturn(0); 16314222ddf1SHong Zhang } 16324222ddf1SHong Zhang 16336718818eSStefano Zampini PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat); 16346718818eSStefano Zampini 16354222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) 16364222ddf1SHong Zhang { 16374222ddf1SHong Zhang PetscFunctionBegin; 16386718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16394222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB; 16406718818eSStefano Zampini PetscFunctionReturn(0); 16416718818eSStefano Zampini } 16426718818eSStefano Zampini 16436718818eSStefano Zampini static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) 16446718818eSStefano Zampini { 16456718818eSStefano Zampini PetscFunctionBegin; 16466718818eSStefano Zampini C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16476718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_ABt; 16484222ddf1SHong Zhang PetscFunctionReturn(0); 16494222ddf1SHong Zhang } 16504222ddf1SHong Zhang 16514222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) 16524222ddf1SHong Zhang { 16534222ddf1SHong Zhang PetscErrorCode ierr; 16544222ddf1SHong Zhang Mat_Product *product = C->product; 16554222ddf1SHong Zhang 16564222ddf1SHong Zhang PetscFunctionBegin; 16574222ddf1SHong Zhang switch (product->type) { 16584222ddf1SHong Zhang case MATPRODUCT_AB: 16594222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);CHKERRQ(ierr); 16604222ddf1SHong Zhang break; 16614222ddf1SHong Zhang case MATPRODUCT_AtB: 16624222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);CHKERRQ(ierr); 16634222ddf1SHong Zhang break; 16646718818eSStefano Zampini case MATPRODUCT_ABt: 16656718818eSStefano Zampini ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C);CHKERRQ(ierr); 16664222ddf1SHong Zhang break; 16674222ddf1SHong Zhang default: 16686718818eSStefano Zampini break; 16694222ddf1SHong Zhang } 16704222ddf1SHong Zhang PetscFunctionReturn(0); 16714222ddf1SHong Zhang } 16724222ddf1SHong Zhang /* ------------------------------------------------------- */ 16734222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) 16744222ddf1SHong Zhang { 16754222ddf1SHong Zhang PetscErrorCode ierr; 16764222ddf1SHong Zhang Mat_Product *product = C->product; 16774222ddf1SHong Zhang Mat A = product->A; 16784222ddf1SHong Zhang PetscBool baij; 16794222ddf1SHong Zhang 16804222ddf1SHong Zhang PetscFunctionBegin; 16814222ddf1SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);CHKERRQ(ierr); 16824222ddf1SHong Zhang if (!baij) { /* A is seqsbaij */ 16834222ddf1SHong Zhang PetscBool sbaij; 16844222ddf1SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 16854222ddf1SHong Zhang if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format"); 16864222ddf1SHong Zhang 16874222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 16884222ddf1SHong Zhang } else { /* A is seqbaij */ 16894222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 16904222ddf1SHong Zhang } 16914222ddf1SHong Zhang 16924222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16934222ddf1SHong Zhang PetscFunctionReturn(0); 16944222ddf1SHong Zhang } 16954222ddf1SHong Zhang 16964222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) 16974222ddf1SHong Zhang { 16984222ddf1SHong Zhang PetscErrorCode ierr; 16994222ddf1SHong Zhang Mat_Product *product = C->product; 17004222ddf1SHong Zhang 17014222ddf1SHong Zhang PetscFunctionBegin; 17026718818eSStefano Zampini MatCheckProduct(C,1); 17036718818eSStefano Zampini if (!product->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A"); 17046718818eSStefano Zampini if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric)) { 17054222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);CHKERRQ(ierr); 17066718818eSStefano Zampini } 17074222ddf1SHong Zhang PetscFunctionReturn(0); 17084222ddf1SHong Zhang } 17096718818eSStefano Zampini 17104222ddf1SHong Zhang /* ------------------------------------------------------- */ 17114222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) 17124222ddf1SHong Zhang { 17134222ddf1SHong Zhang PetscFunctionBegin; 17144222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 17154222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 17164222ddf1SHong Zhang PetscFunctionReturn(0); 17174222ddf1SHong Zhang } 17184222ddf1SHong Zhang 17194222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) 17204222ddf1SHong Zhang { 17214222ddf1SHong Zhang PetscErrorCode ierr; 17224222ddf1SHong Zhang Mat_Product *product = C->product; 17234222ddf1SHong Zhang 17244222ddf1SHong Zhang PetscFunctionBegin; 17254222ddf1SHong Zhang if (product->type == MATPRODUCT_AB) { 17264222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);CHKERRQ(ierr); 17276718818eSStefano Zampini } 17284222ddf1SHong Zhang PetscFunctionReturn(0); 17294222ddf1SHong Zhang } 17304222ddf1SHong Zhang /* ------------------------------------------------------- */ 17314222ddf1SHong Zhang 1732b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1733c8db22f6SHong Zhang { 1734c8db22f6SHong Zhang PetscErrorCode ierr; 17352f5f1f90SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 17362f5f1f90SHong Zhang Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 17372f5f1f90SHong Zhang PetscInt *bi = b->i,*bj=b->j; 17382f5f1f90SHong Zhang PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 17392f5f1f90SHong Zhang MatScalar *btval,*btval_den,*ba=b->a; 17402f5f1f90SHong Zhang PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1741c8db22f6SHong Zhang 1742c8db22f6SHong Zhang PetscFunctionBegin; 17432f5f1f90SHong Zhang btval_den=btdense->v; 1744580bdb30SBarry Smith ierr = PetscArrayzero(btval_den,m*n);CHKERRQ(ierr); 17452f5f1f90SHong Zhang for (k=0; k<ncolors; k++) { 17462f5f1f90SHong Zhang ncolumns = coloring->ncolumns[k]; 17472f5f1f90SHong Zhang for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1748d2853540SHong Zhang col = *(columns + colorforcol[k] + l); 17492f5f1f90SHong Zhang btcol = bj + bi[col]; 17502f5f1f90SHong Zhang btval = ba + bi[col]; 17512f5f1f90SHong Zhang anz = bi[col+1] - bi[col]; 1752d2853540SHong Zhang for (j=0; j<anz; j++) { 17532f5f1f90SHong Zhang brow = btcol[j]; 17542f5f1f90SHong Zhang btval_den[brow] = btval[j]; 1755c8db22f6SHong Zhang } 1756c8db22f6SHong Zhang } 17572f5f1f90SHong Zhang btval_den += m; 1758c8db22f6SHong Zhang } 1759c8db22f6SHong Zhang PetscFunctionReturn(0); 1760c8db22f6SHong Zhang } 1761c8db22f6SHong Zhang 1762b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 17638972f759SHong Zhang { 17648972f759SHong Zhang PetscErrorCode ierr; 1765b2d2b04fSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 17661683a169SBarry Smith const PetscScalar *ca_den,*ca_den_ptr; 17671683a169SBarry Smith PetscScalar *ca=csp->a; 1768f99a636bSHong Zhang PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; 1769e88777f2SHong Zhang PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 1770077f23c2SHong Zhang PetscInt nrows,*row,*idx; 1771077f23c2SHong Zhang PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; 17728972f759SHong Zhang 17738972f759SHong Zhang PetscFunctionBegin; 17741683a169SBarry Smith ierr = MatDenseGetArrayRead(Cden,&ca_den);CHKERRQ(ierr); 1775f99a636bSHong Zhang 1776077f23c2SHong Zhang if (brows > 0) { 1777077f23c2SHong Zhang PetscInt *lstart,row_end,row_start; 1778980ae229SHong Zhang lstart = matcoloring->lstart; 1779580bdb30SBarry Smith ierr = PetscArrayzero(lstart,ncolors);CHKERRQ(ierr); 1780eeb4fd02SHong Zhang 1781077f23c2SHong Zhang row_end = brows; 1782eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1783077f23c2SHong Zhang for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ 1784077f23c2SHong Zhang ca_den_ptr = ca_den; 1785980ae229SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ 1786eeb4fd02SHong Zhang nrows = matcoloring->nrows[k]; 1787eeb4fd02SHong Zhang row = rows + colorforrow[k]; 1788eeb4fd02SHong Zhang idx = den2sp + colorforrow[k]; 1789eeb4fd02SHong Zhang for (l=lstart[k]; l<nrows; l++) { 1790eeb4fd02SHong Zhang if (row[l] >= row_end) { 1791eeb4fd02SHong Zhang lstart[k] = l; 1792eeb4fd02SHong Zhang break; 1793eeb4fd02SHong Zhang } else { 1794077f23c2SHong Zhang ca[idx[l]] = ca_den_ptr[row[l]]; 1795eeb4fd02SHong Zhang } 1796eeb4fd02SHong Zhang } 1797077f23c2SHong Zhang ca_den_ptr += m; 1798eeb4fd02SHong Zhang } 1799077f23c2SHong Zhang row_end += brows; 1800eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1801eeb4fd02SHong Zhang } 1802077f23c2SHong Zhang } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1803077f23c2SHong Zhang ca_den_ptr = ca_den; 1804077f23c2SHong Zhang for (k=0; k<ncolors; k++) { 1805077f23c2SHong Zhang nrows = matcoloring->nrows[k]; 1806077f23c2SHong Zhang row = rows + colorforrow[k]; 1807077f23c2SHong Zhang idx = den2sp + colorforrow[k]; 1808077f23c2SHong Zhang for (l=0; l<nrows; l++) { 1809077f23c2SHong Zhang ca[idx[l]] = ca_den_ptr[row[l]]; 1810077f23c2SHong Zhang } 1811077f23c2SHong Zhang ca_den_ptr += m; 1812077f23c2SHong Zhang } 1813f99a636bSHong Zhang } 1814f99a636bSHong Zhang 18151683a169SBarry Smith ierr = MatDenseRestoreArrayRead(Cden,&ca_den);CHKERRQ(ierr); 1816f99a636bSHong Zhang #if defined(PETSC_USE_INFO) 1817077f23c2SHong Zhang if (matcoloring->brows > 0) { 1818f99a636bSHong Zhang ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);CHKERRQ(ierr); 1819e88777f2SHong Zhang } else { 1820077f23c2SHong Zhang ierr = PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");CHKERRQ(ierr); 1821e88777f2SHong Zhang } 1822f99a636bSHong Zhang #endif 18238972f759SHong Zhang PetscFunctionReturn(0); 18248972f759SHong Zhang } 18258972f759SHong Zhang 1826b9af6bddSHong Zhang PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1827b1683b59SHong Zhang { 1828b1683b59SHong Zhang PetscErrorCode ierr; 1829e88777f2SHong Zhang PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; 18301a83f524SJed Brown const PetscInt *is,*ci,*cj,*row_idx; 1831b28d80bdSHong Zhang PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1832b1683b59SHong Zhang IS *isa; 1833b28d80bdSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1834e88777f2SHong Zhang PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; 1835e88777f2SHong Zhang PetscInt *colorforcol,*columns,*columns_i,brows; 1836e88777f2SHong Zhang PetscBool flg; 1837b1683b59SHong Zhang 1838b1683b59SHong Zhang PetscFunctionBegin; 1839071fcb05SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1840e99be685SHong Zhang 18417ecccc15SHong Zhang /* bs >1 is not being tested yet! */ 1842e88777f2SHong Zhang Nbs = mat->cmap->N/bs; 1843b1683b59SHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1844e88777f2SHong Zhang c->N = Nbs; 1845e88777f2SHong Zhang c->m = c->M; 1846b1683b59SHong Zhang c->rstart = 0; 1847e88777f2SHong Zhang c->brows = 100; 1848b1683b59SHong Zhang 1849b1683b59SHong Zhang c->ncolors = nis; 1850dcca6d9dSJed Brown ierr = PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);CHKERRQ(ierr); 1851854ce69bSBarry Smith ierr = PetscMalloc1(csp->nz+1,&rows);CHKERRQ(ierr); 1852854ce69bSBarry Smith ierr = PetscMalloc1(csp->nz+1,&den2sp);CHKERRQ(ierr); 1853e88777f2SHong Zhang 1854e88777f2SHong Zhang brows = c->brows; 1855c5929fdfSBarry Smith ierr = PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);CHKERRQ(ierr); 1856e88777f2SHong Zhang if (flg) c->brows = brows; 1857eeb4fd02SHong Zhang if (brows > 0) { 1858854ce69bSBarry Smith ierr = PetscMalloc1(nis+1,&c->lstart);CHKERRQ(ierr); 1859e88777f2SHong Zhang } 18602205254eSKarl Rupp 1861d2853540SHong Zhang colorforrow[0] = 0; 1862d2853540SHong Zhang rows_i = rows; 1863f99a636bSHong Zhang den2sp_i = den2sp; 1864b1683b59SHong Zhang 1865854ce69bSBarry Smith ierr = PetscMalloc1(nis+1,&colorforcol);CHKERRQ(ierr); 1866854ce69bSBarry Smith ierr = PetscMalloc1(Nbs+1,&columns);CHKERRQ(ierr); 18672205254eSKarl Rupp 1868d2853540SHong Zhang colorforcol[0] = 0; 1869d2853540SHong Zhang columns_i = columns; 1870d2853540SHong Zhang 1871077f23c2SHong Zhang /* get column-wise storage of mat */ 1872077f23c2SHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 1873b1683b59SHong Zhang 1874b28d80bdSHong Zhang cm = c->m; 1875854ce69bSBarry Smith ierr = PetscMalloc1(cm+1,&rowhit);CHKERRQ(ierr); 1876854ce69bSBarry Smith ierr = PetscMalloc1(cm+1,&idxhit);CHKERRQ(ierr); 1877f99a636bSHong Zhang for (i=0; i<nis; i++) { /* loop over color */ 1878b1683b59SHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1879b1683b59SHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 18802205254eSKarl Rupp 1881b1683b59SHong Zhang c->ncolumns[i] = n; 1882b1683b59SHong Zhang if (n) { 1883580bdb30SBarry Smith ierr = PetscArraycpy(columns_i,is,n);CHKERRQ(ierr); 1884b1683b59SHong Zhang } 1885d2853540SHong Zhang colorforcol[i+1] = colorforcol[i] + n; 1886d2853540SHong Zhang columns_i += n; 1887b1683b59SHong Zhang 1888b1683b59SHong Zhang /* fast, crude version requires O(N*N) work */ 1889580bdb30SBarry Smith ierr = PetscArrayzero(rowhit,cm);CHKERRQ(ierr); 1890e99be685SHong Zhang 1891b7caf3d6SHong Zhang for (j=0; j<n; j++) { /* loop over columns*/ 1892b1683b59SHong Zhang col = is[j]; 1893d2853540SHong Zhang row_idx = cj + ci[col]; 1894b1683b59SHong Zhang m = ci[col+1] - ci[col]; 1895b7caf3d6SHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 1896e99be685SHong Zhang idxhit[*row_idx] = spidx[ci[col] + k]; 1897d2853540SHong Zhang rowhit[*row_idx++] = col + 1; 1898b1683b59SHong Zhang } 1899b1683b59SHong Zhang } 1900b1683b59SHong Zhang /* count the number of hits */ 1901b1683b59SHong Zhang nrows = 0; 1902e8354b3eSHong Zhang for (j=0; j<cm; j++) { 1903b1683b59SHong Zhang if (rowhit[j]) nrows++; 1904b1683b59SHong Zhang } 1905b1683b59SHong Zhang c->nrows[i] = nrows; 1906d2853540SHong Zhang colorforrow[i+1] = colorforrow[i] + nrows; 1907d2853540SHong Zhang 1908b1683b59SHong Zhang nrows = 0; 1909b7caf3d6SHong Zhang for (j=0; j<cm; j++) { /* loop over rows */ 1910b1683b59SHong Zhang if (rowhit[j]) { 1911d2853540SHong Zhang rows_i[nrows] = j; 191212b89a43SHong Zhang den2sp_i[nrows] = idxhit[j]; 1913b1683b59SHong Zhang nrows++; 1914b1683b59SHong Zhang } 1915b1683b59SHong Zhang } 1916e88777f2SHong Zhang den2sp_i += nrows; 1917077f23c2SHong Zhang 1918b1683b59SHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1919f99a636bSHong Zhang rows_i += nrows; 1920b1683b59SHong Zhang } 19210298fd71SBarry Smith ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 1922b28d80bdSHong Zhang ierr = PetscFree(rowhit);CHKERRQ(ierr); 1923071fcb05SBarry Smith ierr = ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);CHKERRQ(ierr); 1924d2853540SHong Zhang if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1925b28d80bdSHong Zhang 1926d2853540SHong Zhang c->colorforrow = colorforrow; 1927d2853540SHong Zhang c->rows = rows; 1928f99a636bSHong Zhang c->den2sp = den2sp; 1929d2853540SHong Zhang c->colorforcol = colorforcol; 1930d2853540SHong Zhang c->columns = columns; 19312205254eSKarl Rupp 1932f94ccd6cSHong Zhang ierr = PetscFree(idxhit);CHKERRQ(ierr); 1933b1683b59SHong Zhang PetscFunctionReturn(0); 1934b1683b59SHong Zhang } 1935d013fc79Sandi selinger 19364222ddf1SHong Zhang /* --------------------------------------------------------------- */ 19374222ddf1SHong Zhang static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) 1938df97dc6dSFande Kong { 19394222ddf1SHong Zhang PetscErrorCode ierr; 19404222ddf1SHong Zhang Mat_Product *product = C->product; 19414222ddf1SHong Zhang Mat A=product->A,B=product->B; 19424222ddf1SHong Zhang 1943df97dc6dSFande Kong PetscFunctionBegin; 19444222ddf1SHong Zhang if (C->ops->mattransposemultnumeric) { 19454222ddf1SHong Zhang /* Alg: "outerproduct" */ 19466718818eSStefano Zampini ierr = (*C->ops->mattransposemultnumeric)(A,B,C);CHKERRQ(ierr); 19474222ddf1SHong Zhang } else { 19484222ddf1SHong Zhang /* Alg: "matmatmult" -- C = At*B */ 19496718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 19506718818eSStefano Zampini Mat At; 19514222ddf1SHong Zhang 19526718818eSStefano Zampini if (!atb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 19536718818eSStefano Zampini At = atb->At; 1954089a957eSStefano Zampini if (atb->updateAt && At) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */ 19554222ddf1SHong Zhang ierr = MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);CHKERRQ(ierr); 19564222ddf1SHong Zhang } 1957089a957eSStefano Zampini ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(At ? At : A,B,C);CHKERRQ(ierr); 19584222ddf1SHong Zhang atb->updateAt = PETSC_TRUE; 19594222ddf1SHong Zhang } 1960df97dc6dSFande Kong PetscFunctionReturn(0); 1961df97dc6dSFande Kong } 1962df97dc6dSFande Kong 19634222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) 1964d013fc79Sandi selinger { 1965d013fc79Sandi selinger PetscErrorCode ierr; 19664222ddf1SHong Zhang Mat_Product *product = C->product; 19674222ddf1SHong Zhang Mat A=product->A,B=product->B; 19684222ddf1SHong Zhang PetscReal fill=product->fill; 1969d013fc79Sandi selinger 1970d013fc79Sandi selinger PetscFunctionBegin; 19714222ddf1SHong Zhang ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 19722869b61bSandi selinger 19734222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 19744222ddf1SHong Zhang PetscFunctionReturn(0); 19752869b61bSandi selinger } 1976d013fc79Sandi selinger 19774222ddf1SHong Zhang /* --------------------------------------------------------------- */ 19784222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) 19794222ddf1SHong Zhang { 19804222ddf1SHong Zhang PetscErrorCode ierr; 19814222ddf1SHong Zhang Mat_Product *product = C->product; 19824222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 19834222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 19844222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 19854222ddf1SHong Zhang const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 19864222ddf1SHong Zhang PetscInt nalg = 7; 19874222ddf1SHong Zhang #else 19884222ddf1SHong Zhang const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"}; 19894222ddf1SHong Zhang PetscInt nalg = 8; 19904222ddf1SHong Zhang #endif 19914222ddf1SHong Zhang 19924222ddf1SHong Zhang PetscFunctionBegin; 19934222ddf1SHong Zhang /* Set default algorithm */ 19944222ddf1SHong Zhang ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 19954222ddf1SHong Zhang if (flg) { 19964222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 1997d013fc79Sandi selinger } 1998d013fc79Sandi selinger 19994222ddf1SHong Zhang /* Get runtime option */ 20004222ddf1SHong Zhang if (product->api_user) { 20014222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 20024222ddf1SHong Zhang ierr = PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 20034222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20044222ddf1SHong Zhang } else { 20054222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 20064222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 20074222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 2008d013fc79Sandi selinger } 20094222ddf1SHong Zhang if (flg) { 20104222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2011d013fc79Sandi selinger } 2012d013fc79Sandi selinger 20134222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 20144222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 20154222ddf1SHong Zhang PetscFunctionReturn(0); 20164222ddf1SHong Zhang } 2017d013fc79Sandi selinger 20184222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) 20194222ddf1SHong Zhang { 20204222ddf1SHong Zhang PetscErrorCode ierr; 20214222ddf1SHong Zhang Mat_Product *product = C->product; 20224222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20234222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 2024089a957eSStefano Zampini const char *algTypes[3] = {"default","at*b","outerproduct"}; 2025089a957eSStefano Zampini PetscInt nalg = 3; 2026d013fc79Sandi selinger 20274222ddf1SHong Zhang PetscFunctionBegin; 20284222ddf1SHong Zhang /* Get runtime option */ 20294222ddf1SHong Zhang if (product->api_user) { 20304222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 20314222ddf1SHong Zhang ierr = PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20324222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20334222ddf1SHong Zhang } else { 20344222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 20354222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20364222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20374222ddf1SHong Zhang } 20384222ddf1SHong Zhang if (flg) { 20394222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20404222ddf1SHong Zhang } 2041d013fc79Sandi selinger 20424222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 20434222ddf1SHong Zhang PetscFunctionReturn(0); 20444222ddf1SHong Zhang } 20454222ddf1SHong Zhang 20464222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) 20474222ddf1SHong Zhang { 20484222ddf1SHong Zhang PetscErrorCode ierr; 20494222ddf1SHong Zhang Mat_Product *product = C->product; 20504222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20514222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20524222ddf1SHong Zhang const char *algTypes[2] = {"default","color"}; 20534222ddf1SHong Zhang PetscInt nalg = 2; 20544222ddf1SHong Zhang 20554222ddf1SHong Zhang PetscFunctionBegin; 20564222ddf1SHong Zhang /* Set default algorithm */ 20574222ddf1SHong Zhang ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 20584222ddf1SHong Zhang if (!flg) { 20594222ddf1SHong Zhang alg = 1; 20604222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20614222ddf1SHong Zhang } 20624222ddf1SHong Zhang 20634222ddf1SHong Zhang /* Get runtime option */ 20644222ddf1SHong Zhang if (product->api_user) { 20654222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 20664222ddf1SHong Zhang ierr = PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20674222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20684222ddf1SHong Zhang } else { 20694222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 20704222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20714222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20724222ddf1SHong Zhang } 20734222ddf1SHong Zhang if (flg) { 20744222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20754222ddf1SHong Zhang } 20764222ddf1SHong Zhang 20774222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 20784222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 20794222ddf1SHong Zhang PetscFunctionReturn(0); 20804222ddf1SHong Zhang } 20814222ddf1SHong Zhang 20824222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) 20834222ddf1SHong Zhang { 20844222ddf1SHong Zhang PetscErrorCode ierr; 20854222ddf1SHong Zhang Mat_Product *product = C->product; 20864222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20874222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 20884222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 20894222ddf1SHong Zhang const char *algTypes[2] = {"scalable","rap"}; 20904222ddf1SHong Zhang PetscInt nalg = 2; 20914222ddf1SHong Zhang #else 20924222ddf1SHong Zhang const char *algTypes[3] = {"scalable","rap","hypre"}; 20934222ddf1SHong Zhang PetscInt nalg = 3; 20944222ddf1SHong Zhang #endif 20954222ddf1SHong Zhang 20964222ddf1SHong Zhang PetscFunctionBegin; 20974222ddf1SHong Zhang /* Set default algorithm */ 20984222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 20994222ddf1SHong Zhang if (flg) { 21004222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21014222ddf1SHong Zhang } 21024222ddf1SHong Zhang 21034222ddf1SHong Zhang /* Get runtime option */ 21044222ddf1SHong Zhang if (product->api_user) { 21054222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 21064222ddf1SHong Zhang ierr = PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 21074222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21084222ddf1SHong Zhang } else { 21094222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 21104222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 21114222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21124222ddf1SHong Zhang } 21134222ddf1SHong Zhang if (flg) { 21144222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21154222ddf1SHong Zhang } 21164222ddf1SHong Zhang 21174222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 21184222ddf1SHong Zhang PetscFunctionReturn(0); 21194222ddf1SHong Zhang } 21204222ddf1SHong Zhang 21214222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) 21224222ddf1SHong Zhang { 21234222ddf1SHong Zhang PetscErrorCode ierr; 21244222ddf1SHong Zhang Mat_Product *product = C->product; 21254222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 21264222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 21274222ddf1SHong Zhang const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"}; 21284222ddf1SHong Zhang PetscInt nalg = 3; 21294222ddf1SHong Zhang 21304222ddf1SHong Zhang PetscFunctionBegin; 21314222ddf1SHong Zhang /* Set default algorithm */ 21324222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 21334222ddf1SHong Zhang if (flg) { 21344222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21354222ddf1SHong Zhang } 21364222ddf1SHong Zhang 21374222ddf1SHong Zhang /* Get runtime option */ 21384222ddf1SHong Zhang if (product->api_user) { 21394222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");CHKERRQ(ierr); 21404222ddf1SHong Zhang ierr = PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 21414222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21424222ddf1SHong Zhang } else { 21434222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");CHKERRQ(ierr); 21444222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 21454222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21464222ddf1SHong Zhang } 21474222ddf1SHong Zhang if (flg) { 21484222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21494222ddf1SHong Zhang } 21504222ddf1SHong Zhang 21514222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 21524222ddf1SHong Zhang PetscFunctionReturn(0); 21534222ddf1SHong Zhang } 21544222ddf1SHong Zhang 21554222ddf1SHong Zhang /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 21564222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) 21574222ddf1SHong Zhang { 21584222ddf1SHong Zhang PetscErrorCode ierr; 21594222ddf1SHong Zhang Mat_Product *product = C->product; 21604222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 21614222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 21624222ddf1SHong Zhang const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 21634222ddf1SHong Zhang PetscInt nalg = 7; 21644222ddf1SHong Zhang 21654222ddf1SHong Zhang PetscFunctionBegin; 21664222ddf1SHong Zhang /* Set default algorithm */ 21674222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 21684222ddf1SHong Zhang if (flg) { 21694222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21704222ddf1SHong Zhang } 21714222ddf1SHong Zhang 21724222ddf1SHong Zhang /* Get runtime option */ 21734222ddf1SHong Zhang if (product->api_user) { 21744222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");CHKERRQ(ierr); 21754222ddf1SHong Zhang ierr = PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 21764222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21774222ddf1SHong Zhang } else { 21784222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");CHKERRQ(ierr); 21794222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 21804222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21814222ddf1SHong Zhang } 21824222ddf1SHong Zhang if (flg) { 21834222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21844222ddf1SHong Zhang } 21854222ddf1SHong Zhang 21864222ddf1SHong Zhang C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 21874222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABC; 21884222ddf1SHong Zhang PetscFunctionReturn(0); 21894222ddf1SHong Zhang } 21904222ddf1SHong Zhang 21914222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) 21924222ddf1SHong Zhang { 21934222ddf1SHong Zhang PetscErrorCode ierr; 21944222ddf1SHong Zhang Mat_Product *product = C->product; 21954222ddf1SHong Zhang 21964222ddf1SHong Zhang PetscFunctionBegin; 21974222ddf1SHong Zhang switch (product->type) { 21984222ddf1SHong Zhang case MATPRODUCT_AB: 21994222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_AB(C);CHKERRQ(ierr); 22004222ddf1SHong Zhang break; 22014222ddf1SHong Zhang case MATPRODUCT_AtB: 22024222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_AtB(C);CHKERRQ(ierr); 22034222ddf1SHong Zhang break; 22044222ddf1SHong Zhang case MATPRODUCT_ABt: 22054222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_ABt(C);CHKERRQ(ierr); 22064222ddf1SHong Zhang break; 22074222ddf1SHong Zhang case MATPRODUCT_PtAP: 22084222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_PtAP(C);CHKERRQ(ierr); 22094222ddf1SHong Zhang break; 22104222ddf1SHong Zhang case MATPRODUCT_RARt: 22114222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_RARt(C);CHKERRQ(ierr); 22124222ddf1SHong Zhang break; 22134222ddf1SHong Zhang case MATPRODUCT_ABC: 22144222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_ABC(C);CHKERRQ(ierr); 22154222ddf1SHong Zhang break; 22166718818eSStefano Zampini default: 22176718818eSStefano Zampini break; 22184222ddf1SHong Zhang } 2219d013fc79Sandi selinger PetscFunctionReturn(0); 2220d013fc79Sandi selinger } 2221