1be1d678aSKris Buschelman 2eb9c0419SKris Buschelman /* 342c57489SHong Zhang Defines projective product routines where A is a SeqAIJ matrix 4eb9c0419SKris Buschelman C = P^T * A * P 5eb9c0419SKris Buschelman */ 6eb9c0419SKris Buschelman 7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 9c6db04a5SJed Brown #include <petscbt.h> 108563dfccSBarry Smith #include <petsctime.h> 11eb9c0419SKris Buschelman 126f231fbdSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 134222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_AIJ_AIJ_wHYPRE(Mat,Mat,PetscReal,Mat); 146f231fbdSstefano_zampini #endif 156f231fbdSstefano_zampini 164222ddf1SHong Zhang PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ(Mat C) 177ba1a0bfSKris Buschelman { 184222ddf1SHong Zhang Mat_Product *product = C->product; 194222ddf1SHong Zhang Mat A=product->A,P=product->B; 204222ddf1SHong Zhang MatProductAlgorithm alg=product->alg; 214222ddf1SHong Zhang PetscReal fill=product->fill; 224222ddf1SHong Zhang PetscBool flg; 238fa4b5a6SHong Zhang Mat Pt; 247ba1a0bfSKris Buschelman 257ba1a0bfSKris Buschelman PetscFunctionBegin; 264222ddf1SHong Zhang /* "scalable" */ 27*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(alg,"scalable",&flg)); 284222ddf1SHong Zhang if (flg) { 29*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C)); 304222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 314222ddf1SHong Zhang PetscFunctionReturn(0); 324222ddf1SHong Zhang } 334222ddf1SHong Zhang 344222ddf1SHong Zhang /* "rap" */ 35*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(alg,"rap",&flg)); 366718818eSStefano Zampini if (flg) { 376718818eSStefano Zampini Mat_MatTransMatMult *atb; 386718818eSStefano Zampini 39*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNew(&atb)); 40*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTranspose_SeqAIJ(P,MAT_INITIAL_MATRIX,&Pt)); 41*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(Pt,A,P,fill,C)); 428fa4b5a6SHong Zhang 438fa4b5a6SHong Zhang atb->At = Pt; 446718818eSStefano Zampini atb->data = C->product->data; 456718818eSStefano Zampini atb->destroy = C->product->destroy; 466718818eSStefano Zampini C->product->data = atb; 476718818eSStefano Zampini C->product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 484222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 494222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_PtAP; 508fa4b5a6SHong Zhang PetscFunctionReturn(0); 514222ddf1SHong Zhang } 524222ddf1SHong Zhang 534222ddf1SHong Zhang /* hypre */ 546f231fbdSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 55*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(alg,"hypre",&flg)); 564222ddf1SHong Zhang if (flg) { 57*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAPSymbolic_AIJ_AIJ_wHYPRE(A,P,fill,C)); 584222ddf1SHong Zhang PetscFunctionReturn(0); 594222ddf1SHong Zhang } 606f231fbdSstefano_zampini #endif 614222ddf1SHong Zhang 624222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductType is not supported"); 637ba1a0bfSKris Buschelman } 647ba1a0bfSKris Buschelman 654222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,PetscReal fill,Mat C) 668d0a38e4SHong Zhang { 670298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 68d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 6953565b12SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 7053565b12SHong Zhang PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj,nspacedouble=0; 717d69fa63SHong Zhang PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N; 7253565b12SHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 73d20bfe6fSHong Zhang MatScalar *ca; 7453565b12SHong Zhang PetscBT lnkbt; 757d69fa63SHong Zhang PetscReal afill; 76eb9c0419SKris Buschelman 77eb9c0419SKris Buschelman PetscFunctionBegin; 7853565b12SHong Zhang /* Get ij structure of P^T */ 79*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 80d20bfe6fSHong Zhang ptJ = ptj; 81eb9c0419SKris Buschelman 82d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 83d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 84*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(pn+1,&ci)); 85d20bfe6fSHong Zhang ci[0] = 0; 86eb9c0419SKris Buschelman 87*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(2*an+1,&ptadenserow)); 88d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 8955a3bba9SHong Zhang 9055a3bba9SHong Zhang /* create and initialize a linked list */ 9155a3bba9SHong Zhang nlnk = pn+1; 92*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLLCreate(pn,pn,nlnk,lnk,lnkbt)); 93eb9c0419SKris Buschelman 947d69fa63SHong Zhang /* Set initial free space to be fill*(nnz(A)+ nnz(P)) */ 95*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],pi[pm])),&free_space)); 96d20bfe6fSHong Zhang current_space = free_space; 97d20bfe6fSHong Zhang 98d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 99d20bfe6fSHong Zhang for (i=0; i<pn; i++) { 100d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 101d20bfe6fSHong Zhang ptanzi = 0; 102d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 103d20bfe6fSHong Zhang for (j=0; j<ptnzi; j++) { 104d20bfe6fSHong Zhang arow = *ptJ++; 105d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 106d20bfe6fSHong Zhang ajj = aj + ai[arow]; 107d20bfe6fSHong Zhang for (k=0; k<anzj; k++) { 108d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 109d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 110d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 111d20bfe6fSHong Zhang } 112d20bfe6fSHong Zhang } 113d20bfe6fSHong Zhang } 114d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 115d20bfe6fSHong Zhang ptaj = ptasparserow; 116d20bfe6fSHong Zhang cnzi = 0; 117d20bfe6fSHong Zhang for (j=0; j<ptanzi; j++) { 118d20bfe6fSHong Zhang prow = *ptaj++; 119d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 120d20bfe6fSHong Zhang pjj = pj + pi[prow]; 12155a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 122*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLLAddSorted(pnzj,pjj,pn,nlnk,lnk,lnkbt)); 12355a3bba9SHong Zhang cnzi += nlnk; 124d20bfe6fSHong Zhang } 125d20bfe6fSHong Zhang 126d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 127d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 128d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 129*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 130f2b054eeSHong Zhang nspacedouble++; 131d20bfe6fSHong Zhang } 132d20bfe6fSHong Zhang 133d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 134*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt)); 1352205254eSKarl Rupp 136d20bfe6fSHong Zhang current_space->array += cnzi; 137d20bfe6fSHong Zhang current_space->local_used += cnzi; 138d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 139d20bfe6fSHong Zhang 1402205254eSKarl Rupp for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 1412205254eSKarl Rupp 142d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 143d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 144d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 145d20bfe6fSHong Zhang } 146d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 147d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 148d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 149*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(ci[pn]+1,&cj)); 150*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFreeSpaceContiguous(&free_space,cj)); 151*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(ptadenserow)); 152*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLLDestroy(lnk,lnkbt)); 153d20bfe6fSHong Zhang 154*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(ci[pn]+1,&ca)); 15553565b12SHong Zhang 15653565b12SHong Zhang /* put together the new matrix */ 157*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),pn,pn,ci,cj,ca,((PetscObject)A)->type_name,C)); 158*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetBlockSizes(C,PetscAbs(P->cmap->bs),PetscAbs(P->cmap->bs))); 159ae32dd66SHong Zhang 16053565b12SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 16153565b12SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 1624222ddf1SHong Zhang c = (Mat_SeqAIJ*)((C)->data); 16353565b12SHong Zhang c->free_a = PETSC_TRUE; 16453565b12SHong Zhang c->free_ij = PETSC_TRUE; 16553565b12SHong Zhang c->nonew = 0; 1666718818eSStefano Zampini 1674222ddf1SHong Zhang C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy; 16853565b12SHong Zhang 1697d69fa63SHong Zhang /* set MatInfo */ 1707d69fa63SHong Zhang afill = (PetscReal)ci[pn]/(ai[am]+pi[pm] + 1.e-5); 1717d69fa63SHong Zhang if (afill < 1.0) afill = 1.0; 1724222ddf1SHong Zhang C->info.mallocs = nspacedouble; 1734222ddf1SHong Zhang C->info.fill_ratio_given = fill; 1744222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1757d69fa63SHong Zhang 17653565b12SHong Zhang /* Clean up. */ 177*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 17853565b12SHong Zhang #if defined(PETSC_USE_INFO) 17953565b12SHong Zhang if (ci[pn] != 0) { 180*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",nspacedouble,(double)fill,(double)afill)); 181*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(C,"Use MatPtAP(A,P,MatReuse,%g,&C) for best performance.\n",(double)afill)); 18253565b12SHong Zhang } else { 183*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(C,"Empty matrix product\n")); 18453565b12SHong Zhang } 185f79958ebSHong Zhang #endif 18653565b12SHong Zhang PetscFunctionReturn(0); 18753565b12SHong Zhang } 18853565b12SHong Zhang 189ae32dd66SHong Zhang PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,Mat C) 19053565b12SHong Zhang { 19153565b12SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 19253565b12SHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 19353565b12SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 19453565b12SHong Zhang PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 19553565b12SHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 19653565b12SHong Zhang PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N; 19753565b12SHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 19865e4b4d4SStefano Zampini MatScalar *aa,*apa,*pa,*pA,*paj,*ca,*caj; 19953565b12SHong Zhang 20053565b12SHong Zhang PetscFunctionBegin; 201ae32dd66SHong Zhang /* Allocate temporary array for storage of one row of A*P (cn: non-scalable) */ 202*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc2(cn,&apa,cn,&apjdense)); 203*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(cn,&apj)); 20465e4b4d4SStefano Zampini /* trigger CPU copies if needed and flag CPU mask for C */ 20565e4b4d4SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 20665e4b4d4SStefano Zampini { 20765e4b4d4SStefano Zampini const PetscScalar *dummy; 208*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArrayRead(A,&dummy)); 209*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArrayRead(A,&dummy)); 210*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArrayRead(P,&dummy)); 211*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArrayRead(P,&dummy)); 21265e4b4d4SStefano Zampini if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 21365e4b4d4SStefano Zampini } 21465e4b4d4SStefano Zampini #endif 21565e4b4d4SStefano Zampini aa = a->a; 21665e4b4d4SStefano Zampini pa = p->a; 21765e4b4d4SStefano Zampini pA = p->a; 21865e4b4d4SStefano Zampini ca = c->a; 21953565b12SHong Zhang 22053565b12SHong Zhang /* Clear old values in C */ 221*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(ca,ci[cm])); 22253565b12SHong Zhang 22353565b12SHong Zhang for (i=0; i<am; i++) { 22453565b12SHong Zhang /* Form sparse row of A*P */ 22553565b12SHong Zhang anzi = ai[i+1] - ai[i]; 22653565b12SHong Zhang apnzj = 0; 22753565b12SHong Zhang for (j=0; j<anzi; j++) { 22853565b12SHong Zhang prow = *aj++; 22953565b12SHong Zhang pnzj = pi[prow+1] - pi[prow]; 23053565b12SHong Zhang pjj = pj + pi[prow]; 23153565b12SHong Zhang paj = pa + pi[prow]; 23253565b12SHong Zhang for (k=0; k<pnzj; k++) { 23353565b12SHong Zhang if (!apjdense[pjj[k]]) { 23453565b12SHong Zhang apjdense[pjj[k]] = -1; 23553565b12SHong Zhang apj[apnzj++] = pjj[k]; 23653565b12SHong Zhang } 23753565b12SHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 23853565b12SHong Zhang } 239*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*pnzj)); 24053565b12SHong Zhang aa++; 24153565b12SHong Zhang } 24253565b12SHong Zhang 24353565b12SHong Zhang /* Sort the j index array for quick sparse axpy. */ 24453565b12SHong Zhang /* Note: a array does not need sorting as it is in dense storage locations. */ 245*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortInt(apnzj,apj)); 24653565b12SHong Zhang 24753565b12SHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 24853565b12SHong Zhang pnzi = pi[i+1] - pi[i]; 24953565b12SHong Zhang for (j=0; j<pnzi; j++) { 25053565b12SHong Zhang nextap = 0; 25153565b12SHong Zhang crow = *pJ++; 25253565b12SHong Zhang cjj = cj + ci[crow]; 25353565b12SHong Zhang caj = ca + ci[crow]; 25453565b12SHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 25553565b12SHong Zhang for (k=0; nextap<apnzj; k++) { 2566bdcaf15SBarry Smith PetscAssert(k < ci[crow+1] - ci[crow],PETSC_COMM_SELF,PETSC_ERR_PLIB,"k too large k %" PetscInt_FMT ", crow %" PetscInt_FMT,k,crow); 25753565b12SHong Zhang if (cjj[k]==apj[nextap]) { 25853565b12SHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 25953565b12SHong Zhang } 26053565b12SHong Zhang } 261*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(2.0*apnzj)); 26253565b12SHong Zhang pA++; 26353565b12SHong Zhang } 26453565b12SHong Zhang 26553565b12SHong Zhang /* Zero the current row info for A*P */ 26653565b12SHong Zhang for (j=0; j<apnzj; j++) { 26753565b12SHong Zhang apa[apj[j]] = 0.; 26853565b12SHong Zhang apjdense[apj[j]] = 0; 26953565b12SHong Zhang } 27053565b12SHong Zhang } 27153565b12SHong Zhang 27253565b12SHong Zhang /* Assemble the final matrix and clean up */ 273*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 274*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 275ae32dd66SHong Zhang 276*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(apa,apjdense)); 277*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(apj)); 27853565b12SHong Zhang PetscFunctionReturn(0); 27953565b12SHong Zhang } 28053565b12SHong Zhang 281dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 282dfbe8321SBarry Smith { 2836718818eSStefano Zampini Mat_MatTransMatMult *atb; 284eb9c0419SKris Buschelman 285eb9c0419SKris Buschelman PetscFunctionBegin; 2866718818eSStefano Zampini MatCheckProduct(C,3); 2876718818eSStefano Zampini atb = (Mat_MatTransMatMult*)C->product->data; 2882c71b3e2SJacob Faibussowitsch PetscCheckFalse(!atb,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing data structure"); 289*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTranspose_SeqAIJ(P,MAT_REUSE_MATRIX,&atb->At)); 2902c71b3e2SJacob Faibussowitsch PetscCheckFalse(!C->ops->matmultnumeric,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing numeric operation"); 2916718818eSStefano Zampini /* when using rap, MatMatMatMultSymbolic used a different data */ 2926718818eSStefano Zampini if (atb->data) C->product->data = atb->data; 293*5f80ce2aSJacob Faibussowitsch CHKERRQ((*C->ops->matmatmultnumeric)(atb->At,A,P,C)); 2946718818eSStefano Zampini C->product->data = atb; 29553565b12SHong Zhang PetscFunctionReturn(0); 29653565b12SHong Zhang } 297