1d50806bdSBarry Smith /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2d50806bdSBarry Smith /* 32c9ce0e5SKris Buschelman Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 594e3eecaSKris Buschelman C = P^T * A * P 694e3eecaSKris Buschelman C = P * A * P^T 7d50806bdSBarry Smith */ 8d50806bdSBarry Smith 9d50806bdSBarry Smith #include "src/mat/impls/aij/seq/aij.h" 10*70f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 11d50806bdSBarry Smith 122216b3a4SKris Buschelman static int logkey_matmatmult = 0; 132216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 142216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 152216b3a4SKris Buschelman 162216b3a4SKris Buschelman static int logkey_matapplyptap = 0; 172216b3a4SKris Buschelman static int logkey_matapplyptap_symbolic = 0; 182216b3a4SKris Buschelman static int logkey_matapplyptap_numeric = 0; 192216b3a4SKris Buschelman 2094e3eecaSKris Buschelman static int logkey_matapplypapt = 0; 2194e3eecaSKris Buschelman static int logkey_matapplypapt_symbolic = 0; 2294e3eecaSKris Buschelman static int logkey_matapplypapt_numeric = 0; 2394e3eecaSKris Buschelman 24d50806bdSBarry Smith /* 2594e3eecaSKris Buschelman MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 26d50806bdSBarry Smith C = A * B; 27d50806bdSBarry Smith 2894e3eecaSKris Buschelman Note: C is assumed to be uncreated. 29d50806bdSBarry Smith If this is not the case, Destroy C before calling this routine. 30d50806bdSBarry Smith */ 31d50806bdSBarry Smith #undef __FUNCT__ 3294e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 3394e3eecaSKris Buschelman int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 34d50806bdSBarry Smith { 35d50806bdSBarry Smith int ierr; 36d50806bdSBarry Smith FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 37d50806bdSBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 38d50806bdSBarry Smith int aishift=a->indexshift,bishift=b->indexshift; 39d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 4094e3eecaSKris Buschelman int *ci,*cj,*denserow,*sparserow; 41d50806bdSBarry Smith int an=A->N,am=A->M,bn=B->N,bm=B->M; 42d50806bdSBarry Smith int i,j,k,anzi,brow,bnzj,cnzi; 43d50806bdSBarry Smith MatScalar *ca; 44d50806bdSBarry Smith 45d50806bdSBarry Smith PetscFunctionBegin; 46d50806bdSBarry Smith /* some error checking which could be moved into interface layer */ 47d50806bdSBarry Smith if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 48d50806bdSBarry Smith if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 49d50806bdSBarry Smith 5094e3eecaSKris Buschelman /* Set up timers */ 51d50806bdSBarry Smith if (!logkey_matmatmult_symbolic) { 52d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 53d50806bdSBarry Smith } 54d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 55d50806bdSBarry Smith 56d50806bdSBarry Smith /* Set up */ 57d50806bdSBarry Smith /* Allocate ci array, arrays for fill computation and */ 58d50806bdSBarry Smith /* free space for accumulating nonzero column info */ 59d50806bdSBarry Smith ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 60d50806bdSBarry Smith ci[0] = 0; 61d50806bdSBarry Smith 6294e3eecaSKris Buschelman ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr); 6394e3eecaSKris Buschelman ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr); 6494e3eecaSKris Buschelman sparserow = denserow + bn; 65d50806bdSBarry Smith 66d50806bdSBarry Smith /* Initial FreeSpace size is nnz(B)=bi[bm] */ 67d50806bdSBarry Smith ierr = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr); 68d50806bdSBarry Smith current_space = free_space; 69d50806bdSBarry Smith 7094e3eecaSKris Buschelman /* Determine symbolic info for each row of the product: */ 71d50806bdSBarry Smith for (i=0;i<am;i++) { 72d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 73d50806bdSBarry Smith cnzi = 0; 74d50806bdSBarry Smith for (j=0;j<anzi;j++) { 75d50806bdSBarry Smith brow = *aj++; 76d50806bdSBarry Smith bnzj = bi[brow+1] - bi[brow]; 77d50806bdSBarry Smith bjj = bj + bi[brow]; 78d50806bdSBarry Smith for (k=0;k<bnzj;k++) { 79d50806bdSBarry Smith /* If column is not marked, mark it in compressed and uncompressed locations. */ 80d50806bdSBarry Smith /* For simplicity, leave uncompressed row unsorted until finished with row, */ 81d50806bdSBarry Smith /* and increment nonzero count for this row. */ 8294e3eecaSKris Buschelman if (!denserow[bjj[k]]) { 8394e3eecaSKris Buschelman denserow[bjj[k]] = -1; 8494e3eecaSKris Buschelman sparserow[cnzi++] = bjj[k]; 85d50806bdSBarry Smith } 86d50806bdSBarry Smith } 87d50806bdSBarry Smith } 88d50806bdSBarry Smith 8994e3eecaSKris Buschelman /* sort sparserow */ 9094e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 91d50806bdSBarry Smith 92d50806bdSBarry Smith /* If free space is not available, make more free space */ 93d50806bdSBarry Smith /* Double the amount of total space in the list */ 94d50806bdSBarry Smith if (current_space->local_remaining<cnzi) { 95d50806bdSBarry Smith ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 96d50806bdSBarry Smith } 97d50806bdSBarry Smith 9894e3eecaSKris Buschelman /* Copy data into free space, and zero out denserow */ 9994e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 100d50806bdSBarry Smith current_space->array += cnzi; 101d50806bdSBarry Smith current_space->local_used += cnzi; 102d50806bdSBarry Smith current_space->local_remaining -= cnzi; 103d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 10494e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 105d50806bdSBarry Smith } 106d50806bdSBarry Smith ci[i+1] = ci[i] + cnzi; 107d50806bdSBarry Smith } 108d50806bdSBarry Smith 10994e3eecaSKris Buschelman /* Column indices are in the list of free space */ 110d50806bdSBarry Smith /* Allocate space for cj, initialize cj, and */ 111d50806bdSBarry Smith /* destroy list of free space and other temporary array(s) */ 112d50806bdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 113*70f19b1fSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 11494e3eecaSKris Buschelman ierr = PetscFree(denserow);CHKERRQ(ierr); 115d50806bdSBarry Smith 116d50806bdSBarry Smith /* Allocate space for ca */ 117d50806bdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 118d50806bdSBarry Smith ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 119d50806bdSBarry Smith 120d50806bdSBarry Smith /* put together the new matrix */ 121d50806bdSBarry Smith ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 122d50806bdSBarry Smith 123d50806bdSBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 124d50806bdSBarry Smith /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 125d50806bdSBarry Smith c = (Mat_SeqAIJ *)((*C)->data); 126d50806bdSBarry Smith c->freedata = PETSC_TRUE; 127d50806bdSBarry Smith c->nonew = 0; 128d50806bdSBarry Smith 129d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 130d50806bdSBarry Smith PetscFunctionReturn(0); 131d50806bdSBarry Smith } 132d50806bdSBarry Smith 133d50806bdSBarry Smith /* 13494e3eecaSKris Buschelman MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 135d50806bdSBarry Smith C=A*B; 13694e3eecaSKris Buschelman Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ. 137d50806bdSBarry Smith */ 138d50806bdSBarry Smith #undef __FUNCT__ 13994e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 14094e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 141d50806bdSBarry Smith { 14294e3eecaSKris Buschelman int ierr,flops=0; 143d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 144d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 145d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 146d50806bdSBarry Smith int aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift; 147d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 148d50806bdSBarry Smith int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M; 14994e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 150d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 151d50806bdSBarry Smith 152d50806bdSBarry Smith PetscFunctionBegin; 153d50806bdSBarry Smith 154d50806bdSBarry Smith /* This error checking should be unnecessary if the symbolic was performed */ 155d50806bdSBarry Smith if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 156d50806bdSBarry Smith if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm); 157d50806bdSBarry Smith if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 158d50806bdSBarry Smith if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn); 159d50806bdSBarry Smith 16094e3eecaSKris Buschelman /* Set up timers */ 161d50806bdSBarry Smith if (!logkey_matmatmult_numeric) { 162d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 163d50806bdSBarry Smith } 164d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 16594e3eecaSKris Buschelman 166d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 167d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 168d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 169d50806bdSBarry Smith /* Traverse A row-wise. */ 170d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 171d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 172d50806bdSBarry Smith for (i=0;i<am;i++) { 173d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 174d50806bdSBarry Smith for (j=0;j<anzi;j++) { 175d50806bdSBarry Smith brow = *aj++; 176d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 177d50806bdSBarry Smith bjj = bj + bi[brow]; 178d50806bdSBarry Smith baj = ba + bi[brow]; 179d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 180d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 181d50806bdSBarry Smith } 182d50806bdSBarry Smith flops += 2*bnzi; 183d50806bdSBarry Smith aa++; 184d50806bdSBarry Smith } 185d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 186d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 187d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 188d50806bdSBarry Smith ca[j] = temp[cj[j]]; 189d50806bdSBarry Smith temp[cj[j]] = 0.0; 190d50806bdSBarry Smith } 191d50806bdSBarry Smith ca += cnzi; 192d50806bdSBarry Smith cj += cnzi; 193d50806bdSBarry Smith } 194716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196716bacf3SKris Buschelman 197d50806bdSBarry Smith /* Free temp */ 198d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 199d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 200d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 201d50806bdSBarry Smith PetscFunctionReturn(0); 202d50806bdSBarry Smith } 203d50806bdSBarry Smith 204d50806bdSBarry Smith #undef __FUNCT__ 205d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 206d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) { 207d50806bdSBarry Smith int ierr; 208d50806bdSBarry Smith 209d50806bdSBarry Smith PetscFunctionBegin; 2102216b3a4SKris Buschelman if (!logkey_matmatmult) { 2112216b3a4SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 2122216b3a4SKris Buschelman } 2132216b3a4SKris Buschelman ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 21494e3eecaSKris Buschelman ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr); 21594e3eecaSKris Buschelman ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 2162216b3a4SKris Buschelman ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 217d50806bdSBarry Smith PetscFunctionReturn(0); 218d50806bdSBarry Smith } 21994e3eecaSKris Buschelman /* 220*70f19b1fSKris Buschelman MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 22194e3eecaSKris Buschelman C = P^T * A * P; 22294e3eecaSKris Buschelman 22394e3eecaSKris Buschelman Note: C is assumed to be uncreated. 22494e3eecaSKris Buschelman If this is not the case, Destroy C before calling this routine. 22594e3eecaSKris Buschelman */ 22694e3eecaSKris Buschelman #undef __FUNCT__ 227*70f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ" 228*70f19b1fSKris Buschelman int MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 229d50806bdSBarry Smith int ierr; 230d50806bdSBarry Smith FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 231d50806bdSBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 232d50806bdSBarry Smith int aishift=a->indexshift,pishift=p->indexshift; 23394e3eecaSKris Buschelman int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 23494e3eecaSKris Buschelman int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 235d50806bdSBarry Smith int an=A->N,am=A->M,pn=P->N,pm=P->M; 236d50806bdSBarry Smith int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 237d50806bdSBarry Smith MatScalar *ca; 238d50806bdSBarry Smith 239d50806bdSBarry Smith PetscFunctionBegin; 240d50806bdSBarry Smith 241d50806bdSBarry Smith /* some error checking which could be moved into interface layer */ 242d50806bdSBarry Smith if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 243d50806bdSBarry Smith if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 244d50806bdSBarry Smith if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 245d50806bdSBarry Smith 24694e3eecaSKris Buschelman /* Set up timers */ 247d50806bdSBarry Smith if (!logkey_matapplyptap_symbolic) { 248d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matapplyptap_symbolic,"MatApplyPtAP_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 249d50806bdSBarry Smith } 250d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 251d50806bdSBarry Smith 25294e3eecaSKris Buschelman /* Get ij structure of P^T */ 25394e3eecaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 25494e3eecaSKris Buschelman ptJ=ptj; 255d50806bdSBarry Smith 256d50806bdSBarry Smith /* Allocate ci array, arrays for fill computation and */ 257d50806bdSBarry Smith /* free space for accumulating nonzero column info */ 258d50806bdSBarry Smith ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 259d50806bdSBarry Smith ci[0] = 0; 260d50806bdSBarry Smith 26194e3eecaSKris Buschelman ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 26294e3eecaSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 26394e3eecaSKris Buschelman ptasparserow = ptadenserow + an; 26494e3eecaSKris Buschelman denserow = ptasparserow + an; 26594e3eecaSKris Buschelman sparserow = denserow + pn; 266d50806bdSBarry Smith 267d50806bdSBarry Smith /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 26894e3eecaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 269716bacf3SKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 270d50806bdSBarry Smith current_space = free_space; 271d50806bdSBarry Smith 27294e3eecaSKris Buschelman /* Determine symbolic info for each row of C: */ 273d50806bdSBarry Smith for (i=0;i<pn;i++) { 274d50806bdSBarry Smith ptnzi = pti[i+1] - pti[i]; 275d50806bdSBarry Smith ptanzi = 0; 27694e3eecaSKris Buschelman /* Determine symbolic row of PtA: */ 277d50806bdSBarry Smith for (j=0;j<ptnzi;j++) { 27894e3eecaSKris Buschelman arow = *ptJ++; 279d50806bdSBarry Smith anzj = ai[arow+1] - ai[arow]; 280d50806bdSBarry Smith ajj = aj + ai[arow]; 281d50806bdSBarry Smith for (k=0;k<anzj;k++) { 28294e3eecaSKris Buschelman if (!ptadenserow[ajj[k]]) { 28394e3eecaSKris Buschelman ptadenserow[ajj[k]] = -1; 28494e3eecaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 285d50806bdSBarry Smith } 286d50806bdSBarry Smith } 287d50806bdSBarry Smith } 28894e3eecaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 28994e3eecaSKris Buschelman ptaj = ptasparserow; 290d50806bdSBarry Smith cnzi = 0; 291d50806bdSBarry Smith for (j=0;j<ptanzi;j++) { 292d50806bdSBarry Smith prow = *ptaj++; 293d50806bdSBarry Smith pnzj = pi[prow+1] - pi[prow]; 294d50806bdSBarry Smith pjj = pj + pi[prow]; 295d50806bdSBarry Smith for (k=0;k<pnzj;k++) { 29694e3eecaSKris Buschelman if (!denserow[pjj[k]]) { 29794e3eecaSKris Buschelman denserow[pjj[k]] = -1; 29894e3eecaSKris Buschelman sparserow[cnzi++] = pjj[k]; 299d50806bdSBarry Smith } 300d50806bdSBarry Smith } 301d50806bdSBarry Smith } 302d50806bdSBarry Smith 30394e3eecaSKris Buschelman /* sort sparserow */ 30494e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 305d50806bdSBarry Smith 306d50806bdSBarry Smith /* If free space is not available, make more free space */ 307d50806bdSBarry Smith /* Double the amount of total space in the list */ 308d50806bdSBarry Smith if (current_space->local_remaining<cnzi) { 309d50806bdSBarry Smith ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 310d50806bdSBarry Smith } 311d50806bdSBarry Smith 31294e3eecaSKris Buschelman /* Copy data into free space, and zero out denserows */ 31394e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 314d50806bdSBarry Smith current_space->array += cnzi; 315d50806bdSBarry Smith current_space->local_used += cnzi; 316d50806bdSBarry Smith current_space->local_remaining -= cnzi; 317d50806bdSBarry Smith 318d50806bdSBarry Smith for (j=0;j<ptanzi;j++) { 31994e3eecaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 320d50806bdSBarry Smith } 321d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 32294e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 323d50806bdSBarry Smith } 324d50806bdSBarry Smith /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 325d50806bdSBarry Smith /* For now, we will recompute what is needed. */ 326d50806bdSBarry Smith ci[i+1] = ci[i] + cnzi; 327d50806bdSBarry Smith } 328d50806bdSBarry Smith /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 329d50806bdSBarry Smith /* Allocate space for cj, initialize cj, and */ 330d50806bdSBarry Smith /* destroy list of free space and other temporary array(s) */ 331d50806bdSBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 332*70f19b1fSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 33394e3eecaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 334d50806bdSBarry Smith 335d50806bdSBarry Smith /* Allocate space for ca */ 336d50806bdSBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 337d50806bdSBarry Smith ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 338d50806bdSBarry Smith 339d50806bdSBarry Smith /* put together the new matrix */ 340d50806bdSBarry Smith ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 341d50806bdSBarry Smith 342d50806bdSBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 343d50806bdSBarry Smith /* Since these are PETSc arrays, change flags to free them as necessary. */ 344d50806bdSBarry Smith c = (Mat_SeqAIJ *)((*C)->data); 345d50806bdSBarry Smith c->freedata = PETSC_TRUE; 346d50806bdSBarry Smith c->nonew = 0; 347d50806bdSBarry Smith 348d50806bdSBarry Smith /* Clean up. */ 349*70f19b1fSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 350d50806bdSBarry Smith 351d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 352d50806bdSBarry Smith PetscFunctionReturn(0); 353d50806bdSBarry Smith } 354d50806bdSBarry Smith 35594e3eecaSKris Buschelman /* 356*70f19b1fSKris Buschelman MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 35794e3eecaSKris Buschelman C = P^T * A * P; 35894e3eecaSKris Buschelman Note: C must have been created by calling MatApplyPtAP_Symbolic_SeqAIJ. 35994e3eecaSKris Buschelman */ 360d50806bdSBarry Smith #undef __FUNCT__ 361*70f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ" 362*70f19b1fSKris Buschelman int MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 36394e3eecaSKris Buschelman int ierr,flops=0; 364d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 365d50806bdSBarry Smith Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 366d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 367d50806bdSBarry Smith int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 368716bacf3SKris Buschelman int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 369716bacf3SKris Buschelman int *ci=c->i,*cj=c->j,*cjj; 370d50806bdSBarry Smith int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 37194e3eecaSKris Buschelman int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,cnzj,prow,crow; 372d50806bdSBarry Smith MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 373d50806bdSBarry Smith 374d50806bdSBarry Smith PetscFunctionBegin; 375d50806bdSBarry Smith 376d50806bdSBarry Smith /* This error checking should be unnecessary if the symbolic was performed */ 377d50806bdSBarry Smith if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 378d50806bdSBarry Smith if (pn!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,cm); 379d50806bdSBarry Smith if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 380d50806bdSBarry Smith if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 381d50806bdSBarry Smith if (pn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn, cn); 382d50806bdSBarry Smith 38394e3eecaSKris Buschelman /* Set up timers */ 384d50806bdSBarry Smith if (!logkey_matapplyptap_numeric) { 385d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matapplyptap_numeric,"MatApplyPtAP_Numeric",MAT_COOKIE);CHKERRQ(ierr); 386d50806bdSBarry Smith } 387d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 388d50806bdSBarry Smith 389716bacf3SKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 390716bacf3SKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 391d50806bdSBarry Smith ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 392d50806bdSBarry Smith 393716bacf3SKris Buschelman apj = (int *)(apa + cn); 394716bacf3SKris Buschelman apjdense = apj + cn; 395716bacf3SKris Buschelman 396d50806bdSBarry Smith for (i=0;i<am;i++) { 397d50806bdSBarry Smith /* Form sparse row of A*P */ 398d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 399d50806bdSBarry Smith apnzj = 0; 400d50806bdSBarry Smith for (j=0;j<anzi;j++) { 401d50806bdSBarry Smith prow = *aj++; 402d50806bdSBarry Smith pnzj = pi[prow+1] - pi[prow]; 403d50806bdSBarry Smith pjj = pj + pi[prow]; 404d50806bdSBarry Smith paj = pa + pi[prow]; 405d50806bdSBarry Smith for (k=0;k<pnzj;k++) { 406716bacf3SKris Buschelman if (!apjdense[pjj[k]]) { 407716bacf3SKris Buschelman apjdense[pjj[k]] = -1; 408d50806bdSBarry Smith apj[apnzj++] = pjj[k]; 409d50806bdSBarry Smith } 410d50806bdSBarry Smith apa[pjj[k]] += (*aa)*paj[k]; 411d50806bdSBarry Smith } 412d50806bdSBarry Smith flops += 2*pnzj; 413d50806bdSBarry Smith aa++; 414d50806bdSBarry Smith } 415d50806bdSBarry Smith 416d50806bdSBarry Smith /* Sort the j index array for quick sparse axpy. */ 417d50806bdSBarry Smith ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 418d50806bdSBarry Smith 419d50806bdSBarry Smith /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 420d50806bdSBarry Smith pnzi = pi[i+1] - pi[i]; 421d50806bdSBarry Smith for (j=0;j<pnzi;j++) { 42294e3eecaSKris Buschelman nextap = 0; 423d50806bdSBarry Smith crow = *pJ++; 424d50806bdSBarry Smith cnzj = ci[crow+1] - ci[crow]; 425d50806bdSBarry Smith cjj = cj + ci[crow]; 426d50806bdSBarry Smith caj = ca + ci[crow]; 42794e3eecaSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 428716bacf3SKris Buschelman for (k=0;nextap<apnzj;k++) { 429d50806bdSBarry Smith if (cjj[k]==apj[nextap]) { 430d50806bdSBarry Smith caj[k] += (*pA)*apa[apj[nextap++]]; 431d50806bdSBarry Smith } 432d50806bdSBarry Smith } 433d50806bdSBarry Smith flops += 2*apnzj; 434d50806bdSBarry Smith pA++; 435d50806bdSBarry Smith } 436d50806bdSBarry Smith 437716bacf3SKris Buschelman /* Zero the current row info for A*P */ 438d50806bdSBarry Smith for (j=0;j<apnzj;j++) { 439d50806bdSBarry Smith apa[apj[j]] = 0.; 440716bacf3SKris Buschelman apjdense[apj[j]] = 0; 441d50806bdSBarry Smith } 442d50806bdSBarry Smith } 4432216b3a4SKris Buschelman 4442216b3a4SKris Buschelman /* Assemble the final matrix and clean up */ 4452216b3a4SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462216b3a4SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 447d50806bdSBarry Smith ierr = PetscFree(apa);CHKERRQ(ierr); 448d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 449d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 4502216b3a4SKris Buschelman 451d50806bdSBarry Smith PetscFunctionReturn(0); 452d50806bdSBarry Smith } 453d50806bdSBarry Smith 45494e3eecaSKris Buschelman 455d50806bdSBarry Smith #undef __FUNCT__ 456*70f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPtAP_SeqAIJ_SeqAIJ" 457*70f19b1fSKris Buschelman int MatApplyPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 458d50806bdSBarry Smith int ierr; 459d50806bdSBarry Smith 460d50806bdSBarry Smith PetscFunctionBegin; 461716bacf3SKris Buschelman if (!logkey_matapplyptap) { 462716bacf3SKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplyptap,"MatApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 463716bacf3SKris Buschelman } 4642216b3a4SKris Buschelman ierr = PetscLogEventBegin(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 46594e3eecaSKris Buschelman 466*70f19b1fSKris Buschelman ierr = MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 467*70f19b1fSKris Buschelman ierr = MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 46894e3eecaSKris Buschelman 4692216b3a4SKris Buschelman ierr = PetscLogEventEnd(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 470d50806bdSBarry Smith PetscFunctionReturn(0); 471d50806bdSBarry Smith } 47294e3eecaSKris Buschelman 47394e3eecaSKris Buschelman /* 474*70f19b1fSKris Buschelman MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 47594e3eecaSKris Buschelman C = P * A * P^T; 47694e3eecaSKris Buschelman 47794e3eecaSKris Buschelman Note: C is assumed to be uncreated. 47894e3eecaSKris Buschelman If this is not the case, Destroy C before calling this routine. 47994e3eecaSKris Buschelman */ 48094e3eecaSKris Buschelman #undef __FUNCT__ 481*70f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ" 482*70f19b1fSKris Buschelman int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 48394e3eecaSKris Buschelman /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 48494e3eecaSKris Buschelman /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 48594e3eecaSKris Buschelman int ierr; 48694e3eecaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 48794e3eecaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 48894e3eecaSKris Buschelman int aishift=a->indexshift,pishift=p->indexshift; 48994e3eecaSKris Buschelman int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 49094e3eecaSKris Buschelman int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 49194e3eecaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M; 49294e3eecaSKris Buschelman int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 49394e3eecaSKris Buschelman MatScalar *ca; 49494e3eecaSKris Buschelman 49594e3eecaSKris Buschelman PetscFunctionBegin; 49694e3eecaSKris Buschelman 49794e3eecaSKris Buschelman /* some error checking which could be moved into interface layer */ 49894e3eecaSKris Buschelman if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 49994e3eecaSKris Buschelman if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 50094e3eecaSKris Buschelman if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 50194e3eecaSKris Buschelman 50294e3eecaSKris Buschelman /* Set up timers */ 50394e3eecaSKris Buschelman if (!logkey_matapplypapt_symbolic) { 50494e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 50594e3eecaSKris Buschelman } 50694e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 50794e3eecaSKris Buschelman 50894e3eecaSKris Buschelman /* Create ij structure of P^T */ 50994e3eecaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 51094e3eecaSKris Buschelman 51194e3eecaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 51294e3eecaSKris Buschelman /* free space for accumulating nonzero column info */ 51394e3eecaSKris Buschelman ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 51494e3eecaSKris Buschelman ci[0] = 0; 51594e3eecaSKris Buschelman 51694e3eecaSKris Buschelman ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr); 51794e3eecaSKris Buschelman ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr); 51894e3eecaSKris Buschelman pasparserow = padenserow + an; 51994e3eecaSKris Buschelman denserow = pasparserow + an; 52094e3eecaSKris Buschelman sparserow = denserow + pm; 52194e3eecaSKris Buschelman 52294e3eecaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 52394e3eecaSKris Buschelman /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 52494e3eecaSKris Buschelman ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space); 52594e3eecaSKris Buschelman current_space = free_space; 52694e3eecaSKris Buschelman 52794e3eecaSKris Buschelman /* Determine fill for each row of C: */ 52894e3eecaSKris Buschelman for (i=0;i<pm;i++) { 52994e3eecaSKris Buschelman pnzi = pi[i+1] - pi[i]; 53094e3eecaSKris Buschelman panzi = 0; 53194e3eecaSKris Buschelman /* Get symbolic sparse row of PA: */ 53294e3eecaSKris Buschelman for (j=0;j<pnzi;j++) { 53394e3eecaSKris Buschelman arow = *pj++; 53494e3eecaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 53594e3eecaSKris Buschelman ajj = aj + ai[arow]; 53694e3eecaSKris Buschelman for (k=0;k<anzj;k++) { 53794e3eecaSKris Buschelman if (!padenserow[ajj[k]]) { 53894e3eecaSKris Buschelman padenserow[ajj[k]] = -1; 53994e3eecaSKris Buschelman pasparserow[panzi++] = ajj[k]; 54094e3eecaSKris Buschelman } 54194e3eecaSKris Buschelman } 54294e3eecaSKris Buschelman } 54394e3eecaSKris Buschelman /* Using symbolic row of PA, determine symbolic row of C: */ 54494e3eecaSKris Buschelman paj = pasparserow; 54594e3eecaSKris Buschelman cnzi = 0; 54694e3eecaSKris Buschelman for (j=0;j<panzi;j++) { 54794e3eecaSKris Buschelman ptrow = *paj++; 54894e3eecaSKris Buschelman ptnzj = pti[ptrow+1] - pti[ptrow]; 54994e3eecaSKris Buschelman ptjj = ptj + pti[ptrow]; 55094e3eecaSKris Buschelman for (k=0;k<ptnzj;k++) { 55194e3eecaSKris Buschelman if (!denserow[ptjj[k]]) { 55294e3eecaSKris Buschelman denserow[ptjj[k]] = -1; 55394e3eecaSKris Buschelman sparserow[cnzi++] = ptjj[k]; 55494e3eecaSKris Buschelman } 55594e3eecaSKris Buschelman } 55694e3eecaSKris Buschelman } 55794e3eecaSKris Buschelman 55894e3eecaSKris Buschelman /* sort sparse representation */ 55994e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 56094e3eecaSKris Buschelman 56194e3eecaSKris Buschelman /* If free space is not available, make more free space */ 56294e3eecaSKris Buschelman /* Double the amount of total space in the list */ 56394e3eecaSKris Buschelman if (current_space->local_remaining<cnzi) { 56494e3eecaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 56594e3eecaSKris Buschelman } 56694e3eecaSKris Buschelman 56794e3eecaSKris Buschelman /* Copy data into free space, and zero out dense row */ 56894e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 56994e3eecaSKris Buschelman current_space->array += cnzi; 57094e3eecaSKris Buschelman current_space->local_used += cnzi; 57194e3eecaSKris Buschelman current_space->local_remaining -= cnzi; 57294e3eecaSKris Buschelman 57394e3eecaSKris Buschelman for (j=0;j<panzi;j++) { 57494e3eecaSKris Buschelman padenserow[pasparserow[j]] = 0; 57594e3eecaSKris Buschelman } 57694e3eecaSKris Buschelman for (j=0;j<cnzi;j++) { 57794e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 57894e3eecaSKris Buschelman } 57994e3eecaSKris Buschelman ci[i+1] = ci[i] + cnzi; 58094e3eecaSKris Buschelman } 58194e3eecaSKris Buschelman /* column indices are in the list of free space */ 58294e3eecaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 58394e3eecaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 58494e3eecaSKris Buschelman ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr); 585*70f19b1fSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 58694e3eecaSKris Buschelman ierr = PetscFree(padenserow);CHKERRQ(ierr); 58794e3eecaSKris Buschelman 58894e3eecaSKris Buschelman /* Allocate space for ca */ 58994e3eecaSKris Buschelman ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 59094e3eecaSKris Buschelman ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 59194e3eecaSKris Buschelman 59294e3eecaSKris Buschelman /* put together the new matrix */ 59394e3eecaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 59494e3eecaSKris Buschelman 59594e3eecaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 59694e3eecaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 59794e3eecaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 59894e3eecaSKris Buschelman c->freedata = PETSC_TRUE; 59994e3eecaSKris Buschelman c->nonew = 0; 60094e3eecaSKris Buschelman 60194e3eecaSKris Buschelman /* Clean up. */ 602*70f19b1fSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 60394e3eecaSKris Buschelman 60494e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 60594e3eecaSKris Buschelman PetscFunctionReturn(0); 60694e3eecaSKris Buschelman } 60794e3eecaSKris Buschelman 60894e3eecaSKris Buschelman /* 60994e3eecaSKris Buschelman MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 61094e3eecaSKris Buschelman C = P * A * P^T; 61194e3eecaSKris Buschelman Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 61294e3eecaSKris Buschelman */ 61394e3eecaSKris Buschelman #undef __FUNCT__ 614*70f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ" 615*70f19b1fSKris Buschelman int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 61694e3eecaSKris Buschelman int ierr,flops=0; 61794e3eecaSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 61894e3eecaSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 61994e3eecaSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 62094e3eecaSKris Buschelman int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 62194e3eecaSKris Buschelman int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 62294e3eecaSKris Buschelman int *ci=c->i,*cj=c->j; 62394e3eecaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 62494e3eecaSKris Buschelman int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 62594e3eecaSKris Buschelman MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 62694e3eecaSKris Buschelman 62794e3eecaSKris Buschelman PetscFunctionBegin; 62894e3eecaSKris Buschelman 62994e3eecaSKris Buschelman /* This error checking should be unnecessary if the symbolic was performed */ 63094e3eecaSKris Buschelman if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 63194e3eecaSKris Buschelman if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm); 63294e3eecaSKris Buschelman if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 63394e3eecaSKris Buschelman if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 63494e3eecaSKris Buschelman if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn); 63594e3eecaSKris Buschelman 63694e3eecaSKris Buschelman /* Set up timers */ 63794e3eecaSKris Buschelman if (!logkey_matapplypapt_numeric) { 63894e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr); 63994e3eecaSKris Buschelman } 64094e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 64194e3eecaSKris Buschelman 64294e3eecaSKris Buschelman ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr); 64394e3eecaSKris Buschelman ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 64494e3eecaSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 64594e3eecaSKris Buschelman 64694e3eecaSKris Buschelman paj = (int *)(paa + an); 64794e3eecaSKris Buschelman pajdense = paj + an; 64894e3eecaSKris Buschelman 64994e3eecaSKris Buschelman for (i=0;i<pm;i++) { 65094e3eecaSKris Buschelman /* Form sparse row of P*A */ 65194e3eecaSKris Buschelman pnzi = pi[i+1] - pi[i]; 65294e3eecaSKris Buschelman panzj = 0; 65394e3eecaSKris Buschelman for (j=0;j<pnzi;j++) { 65494e3eecaSKris Buschelman arow = *pj++; 65594e3eecaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 65694e3eecaSKris Buschelman ajj = aj + ai[arow]; 65794e3eecaSKris Buschelman aaj = aa + ai[arow]; 65894e3eecaSKris Buschelman for (k=0;k<anzj;k++) { 65994e3eecaSKris Buschelman if (!pajdense[ajj[k]]) { 66094e3eecaSKris Buschelman pajdense[ajj[k]] = -1; 66194e3eecaSKris Buschelman paj[panzj++] = ajj[k]; 66294e3eecaSKris Buschelman } 66394e3eecaSKris Buschelman paa[ajj[k]] += (*pa)*aaj[k]; 66494e3eecaSKris Buschelman } 66594e3eecaSKris Buschelman flops += 2*anzj; 66694e3eecaSKris Buschelman pa++; 66794e3eecaSKris Buschelman } 66894e3eecaSKris Buschelman 66994e3eecaSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 67094e3eecaSKris Buschelman ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 67194e3eecaSKris Buschelman 67294e3eecaSKris Buschelman /* Compute P*A*P^T using sparse inner products. */ 67394e3eecaSKris Buschelman /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 67494e3eecaSKris Buschelman cnzi = ci[i+1] - ci[i]; 67594e3eecaSKris Buschelman for (j=0;j<cnzi;j++) { 67694e3eecaSKris Buschelman /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 67794e3eecaSKris Buschelman ptcol = *cj++; 67894e3eecaSKris Buschelman ptnzj = pi[ptcol+1] - pi[ptcol]; 67994e3eecaSKris Buschelman ptj = pjj + pi[ptcol]; 68094e3eecaSKris Buschelman ptaj = pta + pi[ptcol]; 68194e3eecaSKris Buschelman sum = 0.; 68294e3eecaSKris Buschelman k1 = 0; 68394e3eecaSKris Buschelman k2 = 0; 68494e3eecaSKris Buschelman while ((k1<panzj) && (k2<ptnzj)) { 68594e3eecaSKris Buschelman if (paj[k1]==ptj[k2]) { 68694e3eecaSKris Buschelman sum += paa[paj[k1++]]*pta[k2++]; 68794e3eecaSKris Buschelman } else if (paj[k1] < ptj[k2]) { 68894e3eecaSKris Buschelman k1++; 68994e3eecaSKris Buschelman } else /* if (paj[k1] > ptj[k2]) */ { 69094e3eecaSKris Buschelman k2++; 69194e3eecaSKris Buschelman } 69294e3eecaSKris Buschelman } 69394e3eecaSKris Buschelman *ca++ = sum; 69494e3eecaSKris Buschelman } 69594e3eecaSKris Buschelman 69694e3eecaSKris Buschelman /* Zero the current row info for P*A */ 69794e3eecaSKris Buschelman for (j=0;j<panzj;j++) { 69894e3eecaSKris Buschelman paa[paj[j]] = 0.; 69994e3eecaSKris Buschelman pajdense[paj[j]] = 0; 70094e3eecaSKris Buschelman } 70194e3eecaSKris Buschelman } 70294e3eecaSKris Buschelman 70394e3eecaSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 70494e3eecaSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 70594e3eecaSKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 70694e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 70794e3eecaSKris Buschelman PetscFunctionReturn(0); 70894e3eecaSKris Buschelman } 70994e3eecaSKris Buschelman 71094e3eecaSKris Buschelman #undef __FUNCT__ 711*70f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ" 712*70f19b1fSKris Buschelman int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 71394e3eecaSKris Buschelman int ierr; 71494e3eecaSKris Buschelman 71594e3eecaSKris Buschelman PetscFunctionBegin; 71694e3eecaSKris Buschelman if (!logkey_matapplypapt) { 71794e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr); 71894e3eecaSKris Buschelman } 71994e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 720*70f19b1fSKris Buschelman ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 721*70f19b1fSKris Buschelman ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 72294e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 72394e3eecaSKris Buschelman PetscFunctionReturn(0); 72494e3eecaSKris Buschelman } 725