1d50806bdSBarry Smith /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2d50806bdSBarry Smith /* 32c9ce0e5SKris Buschelman Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 594e3eecaSKris Buschelman C = P * A * P^T 6d50806bdSBarry Smith */ 7d50806bdSBarry Smith 8d50806bdSBarry Smith #include "src/mat/impls/aij/seq/aij.h" 970f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 10d50806bdSBarry Smith 112216b3a4SKris Buschelman static int logkey_matmatmult = 0; 122216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 132216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 142216b3a4SKris Buschelman 1594e3eecaSKris Buschelman static int logkey_matapplypapt = 0; 1694e3eecaSKris Buschelman static int logkey_matapplypapt_symbolic = 0; 1794e3eecaSKris Buschelman static int logkey_matapplypapt_numeric = 0; 1894e3eecaSKris Buschelman 19d50806bdSBarry Smith /* 2094e3eecaSKris Buschelman MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 21d50806bdSBarry Smith C = A * B; 22d50806bdSBarry Smith 2394e3eecaSKris Buschelman Note: C is assumed to be uncreated. 24d50806bdSBarry Smith If this is not the case, Destroy C before calling this routine. 25d50806bdSBarry Smith */ 26*58c24d83SHong Zhang #ifdef USE_INTSORT 27*58c24d83SHong Zhang /* 28*58c24d83SHong Zhang This roution is modified by the one below for better performance. 29*58c24d83SHong Zhang The changes are: 30*58c24d83SHong Zhang -- PetscSortInt() is replace by a linked list 31*58c24d83SHong Zhang -- malloc larger Initial FreeSpace 32*58c24d83SHong Zhang */ 33d50806bdSBarry Smith #undef __FUNCT__ 3494e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 3594e3eecaSKris Buschelman int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 36d50806bdSBarry Smith { 37d50806bdSBarry Smith int ierr; 38d50806bdSBarry Smith FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 39d50806bdSBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 40d50806bdSBarry Smith int aishift=a->indexshift,bishift=b->indexshift; 41d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 4294e3eecaSKris Buschelman int *ci,*cj,*denserow,*sparserow; 43d50806bdSBarry Smith int an=A->N,am=A->M,bn=B->N,bm=B->M; 44d50806bdSBarry Smith int i,j,k,anzi,brow,bnzj,cnzi; 45d50806bdSBarry Smith MatScalar *ca; 46d50806bdSBarry Smith 47d50806bdSBarry Smith PetscFunctionBegin; 48d50806bdSBarry Smith /* some error checking which could be moved into interface layer */ 49d50806bdSBarry Smith if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 50d50806bdSBarry Smith if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 51d50806bdSBarry Smith 5294e3eecaSKris Buschelman /* Set up timers */ 53d50806bdSBarry Smith if (!logkey_matmatmult_symbolic) { 54d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 55d50806bdSBarry Smith } 56d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 57d50806bdSBarry Smith 58d50806bdSBarry Smith /* Set up */ 59d50806bdSBarry Smith /* Allocate ci array, arrays for fill computation and */ 60d50806bdSBarry Smith /* free space for accumulating nonzero column info */ 61d50806bdSBarry Smith ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 62d50806bdSBarry Smith ci[0] = 0; 63d50806bdSBarry Smith 6494e3eecaSKris Buschelman ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr); 6594e3eecaSKris Buschelman ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr); 6694e3eecaSKris Buschelman sparserow = denserow + bn; 67d50806bdSBarry Smith 68d50806bdSBarry Smith /* Initial FreeSpace size is nnz(B)=bi[bm] */ 69d50806bdSBarry Smith ierr = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr); 70d50806bdSBarry Smith current_space = free_space; 71d50806bdSBarry Smith 7294e3eecaSKris Buschelman /* Determine symbolic info for each row of the product: */ 73d50806bdSBarry Smith for (i=0;i<am;i++) { 74d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 75d50806bdSBarry Smith cnzi = 0; 76d50806bdSBarry Smith for (j=0;j<anzi;j++) { 77d50806bdSBarry Smith brow = *aj++; 78d50806bdSBarry Smith bnzj = bi[brow+1] - bi[brow]; 79d50806bdSBarry Smith bjj = bj + bi[brow]; 80d50806bdSBarry Smith for (k=0;k<bnzj;k++) { 81d50806bdSBarry Smith /* If column is not marked, mark it in compressed and uncompressed locations. */ 82d50806bdSBarry Smith /* For simplicity, leave uncompressed row unsorted until finished with row, */ 83d50806bdSBarry Smith /* and increment nonzero count for this row. */ 8494e3eecaSKris Buschelman if (!denserow[bjj[k]]) { 8594e3eecaSKris Buschelman denserow[bjj[k]] = -1; 8694e3eecaSKris Buschelman sparserow[cnzi++] = bjj[k]; 87d50806bdSBarry Smith } 88d50806bdSBarry Smith } 89d50806bdSBarry Smith } 90d50806bdSBarry Smith 9194e3eecaSKris Buschelman /* sort sparserow */ 9294e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 93d50806bdSBarry Smith 94d50806bdSBarry Smith /* If free space is not available, make more free space */ 95d50806bdSBarry Smith /* Double the amount of total space in the list */ 96d50806bdSBarry Smith if (current_space->local_remaining<cnzi) { 97d50806bdSBarry Smith ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 98d50806bdSBarry Smith } 99d50806bdSBarry Smith 10094e3eecaSKris Buschelman /* Copy data into free space, and zero out denserow */ 10194e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 102d50806bdSBarry Smith current_space->array += cnzi; 103d50806bdSBarry Smith current_space->local_used += cnzi; 104d50806bdSBarry Smith current_space->local_remaining -= cnzi; 105d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 10694e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 107d50806bdSBarry Smith } 108d50806bdSBarry Smith ci[i+1] = ci[i] + cnzi; 109d50806bdSBarry Smith } 110d50806bdSBarry Smith 11194e3eecaSKris Buschelman /* Column indices are in the list of free space */ 112d50806bdSBarry Smith /* Allocate space for cj, initialize cj, and */ 113d50806bdSBarry Smith /* destroy list of free space and other temporary array(s) */ 114d50806bdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 11570f19b1fSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 11694e3eecaSKris Buschelman ierr = PetscFree(denserow);CHKERRQ(ierr); 117d50806bdSBarry Smith 118d50806bdSBarry Smith /* Allocate space for ca */ 119d50806bdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 120d50806bdSBarry Smith ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 121d50806bdSBarry Smith 122d50806bdSBarry Smith /* put together the new matrix */ 123d50806bdSBarry Smith ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 124d50806bdSBarry Smith 125d50806bdSBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 126d50806bdSBarry Smith /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 127d50806bdSBarry Smith c = (Mat_SeqAIJ *)((*C)->data); 128d50806bdSBarry Smith c->freedata = PETSC_TRUE; 129d50806bdSBarry Smith c->nonew = 0; 130d50806bdSBarry Smith 131d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 132d50806bdSBarry Smith PetscFunctionReturn(0); 133d50806bdSBarry Smith } 134*58c24d83SHong Zhang #endif /* USE_INTSORT */ 135*58c24d83SHong Zhang 136*58c24d83SHong Zhang #undef __FUNCT__ 137*58c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 138*58c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 139*58c24d83SHong Zhang { 140*58c24d83SHong Zhang int ierr; 141*58c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 142*58c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 143*58c24d83SHong Zhang int aishift=a->indexshift,bishift=b->indexshift; 144*58c24d83SHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 145*58c24d83SHong Zhang int *ci,*cj,*lnk,idx0,idx,bcol; 146*58c24d83SHong Zhang int an=A->N,am=A->M,bn=B->N,bm=B->M; 147*58c24d83SHong Zhang int i,j,k,anzi,brow,bnzj,cnzi; 148*58c24d83SHong Zhang MatScalar *ca; 149*58c24d83SHong Zhang PetscLogDouble t0,t1,etime=0.0; 150*58c24d83SHong Zhang 151*58c24d83SHong Zhang PetscFunctionBegin; 152*58c24d83SHong Zhang /* some error checking which could be moved into interface layer */ 153*58c24d83SHong Zhang if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 154*58c24d83SHong Zhang if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 155*58c24d83SHong Zhang 156*58c24d83SHong Zhang /* Set up timers */ 157*58c24d83SHong Zhang if (!logkey_matmatmult_symbolic) { 158*58c24d83SHong Zhang ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 159*58c24d83SHong Zhang } 160*58c24d83SHong Zhang ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 161*58c24d83SHong Zhang 162*58c24d83SHong Zhang /* Set up */ 163*58c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 164*58c24d83SHong Zhang /* free space for accumulating nonzero column info */ 165*58c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 166*58c24d83SHong Zhang ci[0] = 0; 167*58c24d83SHong Zhang 168*58c24d83SHong Zhang ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 169*58c24d83SHong Zhang for (i=0; i<bn; i++) lnk[i] = -1; 170*58c24d83SHong Zhang 171*58c24d83SHong Zhang /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 172*58c24d83SHong Zhang ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 173*58c24d83SHong Zhang current_space = free_space; 174*58c24d83SHong Zhang 175*58c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 176*58c24d83SHong Zhang for (i=0;i<am;i++) { 177*58c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 178*58c24d83SHong Zhang cnzi = 0; 179*58c24d83SHong Zhang lnk[bn] = bn; 180*58c24d83SHong Zhang for (j=0;j<anzi;j++) { 181*58c24d83SHong Zhang brow = *aj++; 182*58c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 183*58c24d83SHong Zhang bjj = bj + bi[brow]; 184*58c24d83SHong Zhang idx = bn; 185*58c24d83SHong Zhang for (k=0;k<bnzj;k++) { 186*58c24d83SHong Zhang bcol = bjj[k]; 187*58c24d83SHong Zhang if (lnk[bcol] == -1) { /* new col */ 188*58c24d83SHong Zhang if (k>0) idx = bjj[k-1]; 189*58c24d83SHong Zhang do { 190*58c24d83SHong Zhang idx0 = idx; 191*58c24d83SHong Zhang idx = lnk[idx0]; 192*58c24d83SHong Zhang } while (bcol > idx); 193*58c24d83SHong Zhang lnk[idx0] = bcol; 194*58c24d83SHong Zhang lnk[bcol] = idx; 195*58c24d83SHong Zhang cnzi++; 196*58c24d83SHong Zhang } 197*58c24d83SHong Zhang } 198*58c24d83SHong Zhang } 199*58c24d83SHong Zhang 200*58c24d83SHong Zhang /* If free space is not available, make more free space */ 201*58c24d83SHong Zhang /* Double the amount of total space in the list */ 202*58c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 203*58c24d83SHong Zhang printf("...%d -th row, double space ...\n",i); 204*58c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 205*58c24d83SHong Zhang } 206*58c24d83SHong Zhang 207*58c24d83SHong Zhang /* Copy data into free space, and zero out denserow and lnk */ 208*58c24d83SHong Zhang idx = bn; 209*58c24d83SHong Zhang for (j=0; j<cnzi; j++){ 210*58c24d83SHong Zhang idx0 = idx; 211*58c24d83SHong Zhang idx = lnk[idx0]; 212*58c24d83SHong Zhang *current_space->array++ = idx; 213*58c24d83SHong Zhang lnk[idx0] = -1; 214*58c24d83SHong Zhang } 215*58c24d83SHong Zhang lnk[idx] = -1; 216*58c24d83SHong Zhang 217*58c24d83SHong Zhang current_space->local_used += cnzi; 218*58c24d83SHong Zhang current_space->local_remaining -= cnzi; 219*58c24d83SHong Zhang 220*58c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 221*58c24d83SHong Zhang } 222*58c24d83SHong Zhang 223*58c24d83SHong Zhang /* Column indices are in the list of free space */ 224*58c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 225*58c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 226*58c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 227*58c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 228*58c24d83SHong Zhang ierr = PetscFree(lnk);CHKERRQ(ierr); 229*58c24d83SHong Zhang 230*58c24d83SHong Zhang /* Allocate space for ca */ 231*58c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 232*58c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 233*58c24d83SHong Zhang 234*58c24d83SHong Zhang /* put together the new matrix */ 235*58c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 236*58c24d83SHong Zhang 237*58c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 238*58c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 239*58c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 240*58c24d83SHong Zhang c->freedata = PETSC_TRUE; 241*58c24d83SHong Zhang c->nonew = 0; 242*58c24d83SHong Zhang 243*58c24d83SHong Zhang ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 244*58c24d83SHong Zhang PetscFunctionReturn(0); 245*58c24d83SHong Zhang } 246d50806bdSBarry Smith 247d50806bdSBarry Smith /* 24894e3eecaSKris Buschelman MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 249d50806bdSBarry Smith C=A*B; 25094e3eecaSKris Buschelman Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ. 251d50806bdSBarry Smith */ 252d50806bdSBarry Smith #undef __FUNCT__ 25394e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 25494e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 255d50806bdSBarry Smith { 25694e3eecaSKris Buschelman int ierr,flops=0; 257d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 258d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 259d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 260d50806bdSBarry Smith int aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift; 261d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 262d50806bdSBarry Smith int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M; 26394e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 264d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 265d50806bdSBarry Smith 266d50806bdSBarry Smith PetscFunctionBegin; 267d50806bdSBarry Smith 268d50806bdSBarry Smith /* This error checking should be unnecessary if the symbolic was performed */ 269d50806bdSBarry Smith if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 270d50806bdSBarry Smith if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm); 271d50806bdSBarry Smith if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 272d50806bdSBarry Smith if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn); 273d50806bdSBarry Smith 27494e3eecaSKris Buschelman /* Set up timers */ 275d50806bdSBarry Smith if (!logkey_matmatmult_numeric) { 276d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 277d50806bdSBarry Smith } 278d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 27994e3eecaSKris Buschelman 280d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 281d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 282d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 283d50806bdSBarry Smith /* Traverse A row-wise. */ 284d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 285d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 286d50806bdSBarry Smith for (i=0;i<am;i++) { 287d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 288d50806bdSBarry Smith for (j=0;j<anzi;j++) { 289d50806bdSBarry Smith brow = *aj++; 290d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 291d50806bdSBarry Smith bjj = bj + bi[brow]; 292d50806bdSBarry Smith baj = ba + bi[brow]; 293d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 294d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 295d50806bdSBarry Smith } 296d50806bdSBarry Smith flops += 2*bnzi; 297d50806bdSBarry Smith aa++; 298d50806bdSBarry Smith } 299d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 300d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 301d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 302d50806bdSBarry Smith ca[j] = temp[cj[j]]; 303d50806bdSBarry Smith temp[cj[j]] = 0.0; 304d50806bdSBarry Smith } 305d50806bdSBarry Smith ca += cnzi; 306d50806bdSBarry Smith cj += cnzi; 307d50806bdSBarry Smith } 308716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 309716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310716bacf3SKris Buschelman 311d50806bdSBarry Smith /* Free temp */ 312d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 313d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 314d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 315d50806bdSBarry Smith PetscFunctionReturn(0); 316d50806bdSBarry Smith } 317d50806bdSBarry Smith 318d50806bdSBarry Smith #undef __FUNCT__ 319d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 320d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) { 321d50806bdSBarry Smith int ierr; 322d50806bdSBarry Smith 323d50806bdSBarry Smith PetscFunctionBegin; 3242216b3a4SKris Buschelman if (!logkey_matmatmult) { 3252216b3a4SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 3262216b3a4SKris Buschelman } 3272216b3a4SKris Buschelman ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 32894e3eecaSKris Buschelman ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr); 32994e3eecaSKris Buschelman ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 3302216b3a4SKris Buschelman ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 331d50806bdSBarry Smith PetscFunctionReturn(0); 332d50806bdSBarry Smith } 33394e3eecaSKris Buschelman 33494e3eecaSKris Buschelman 33594e3eecaSKris Buschelman /* 33670f19b1fSKris Buschelman MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 33794e3eecaSKris Buschelman C = P * A * P^T; 33894e3eecaSKris Buschelman 33994e3eecaSKris Buschelman Note: C is assumed to be uncreated. 34094e3eecaSKris Buschelman If this is not the case, Destroy C before calling this routine. 34194e3eecaSKris Buschelman */ 34294e3eecaSKris Buschelman #undef __FUNCT__ 34370f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ" 34470f19b1fSKris Buschelman int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 34594e3eecaSKris Buschelman /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 34694e3eecaSKris Buschelman /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 34794e3eecaSKris Buschelman int ierr; 34894e3eecaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 34994e3eecaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 35094e3eecaSKris Buschelman int aishift=a->indexshift,pishift=p->indexshift; 35194e3eecaSKris Buschelman int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 35294e3eecaSKris Buschelman int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 35394e3eecaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M; 35494e3eecaSKris Buschelman int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 35594e3eecaSKris Buschelman MatScalar *ca; 35694e3eecaSKris Buschelman 35794e3eecaSKris Buschelman PetscFunctionBegin; 35894e3eecaSKris Buschelman 35994e3eecaSKris Buschelman /* some error checking which could be moved into interface layer */ 36094e3eecaSKris Buschelman if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 36194e3eecaSKris Buschelman if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 36294e3eecaSKris Buschelman if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 36394e3eecaSKris Buschelman 36494e3eecaSKris Buschelman /* Set up timers */ 36594e3eecaSKris Buschelman if (!logkey_matapplypapt_symbolic) { 36694e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 36794e3eecaSKris Buschelman } 36894e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 36994e3eecaSKris Buschelman 37094e3eecaSKris Buschelman /* Create ij structure of P^T */ 37194e3eecaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 37294e3eecaSKris Buschelman 37394e3eecaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 37494e3eecaSKris Buschelman /* free space for accumulating nonzero column info */ 37594e3eecaSKris Buschelman ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 37694e3eecaSKris Buschelman ci[0] = 0; 37794e3eecaSKris Buschelman 37894e3eecaSKris Buschelman ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr); 37994e3eecaSKris Buschelman ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr); 38094e3eecaSKris Buschelman pasparserow = padenserow + an; 38194e3eecaSKris Buschelman denserow = pasparserow + an; 38294e3eecaSKris Buschelman sparserow = denserow + pm; 38394e3eecaSKris Buschelman 38494e3eecaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 38594e3eecaSKris Buschelman /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 38694e3eecaSKris Buschelman ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space); 38794e3eecaSKris Buschelman current_space = free_space; 38894e3eecaSKris Buschelman 38994e3eecaSKris Buschelman /* Determine fill for each row of C: */ 39094e3eecaSKris Buschelman for (i=0;i<pm;i++) { 39194e3eecaSKris Buschelman pnzi = pi[i+1] - pi[i]; 39294e3eecaSKris Buschelman panzi = 0; 39394e3eecaSKris Buschelman /* Get symbolic sparse row of PA: */ 39494e3eecaSKris Buschelman for (j=0;j<pnzi;j++) { 39594e3eecaSKris Buschelman arow = *pj++; 39694e3eecaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 39794e3eecaSKris Buschelman ajj = aj + ai[arow]; 39894e3eecaSKris Buschelman for (k=0;k<anzj;k++) { 39994e3eecaSKris Buschelman if (!padenserow[ajj[k]]) { 40094e3eecaSKris Buschelman padenserow[ajj[k]] = -1; 40194e3eecaSKris Buschelman pasparserow[panzi++] = ajj[k]; 40294e3eecaSKris Buschelman } 40394e3eecaSKris Buschelman } 40494e3eecaSKris Buschelman } 40594e3eecaSKris Buschelman /* Using symbolic row of PA, determine symbolic row of C: */ 40694e3eecaSKris Buschelman paj = pasparserow; 40794e3eecaSKris Buschelman cnzi = 0; 40894e3eecaSKris Buschelman for (j=0;j<panzi;j++) { 40994e3eecaSKris Buschelman ptrow = *paj++; 41094e3eecaSKris Buschelman ptnzj = pti[ptrow+1] - pti[ptrow]; 41194e3eecaSKris Buschelman ptjj = ptj + pti[ptrow]; 41294e3eecaSKris Buschelman for (k=0;k<ptnzj;k++) { 41394e3eecaSKris Buschelman if (!denserow[ptjj[k]]) { 41494e3eecaSKris Buschelman denserow[ptjj[k]] = -1; 41594e3eecaSKris Buschelman sparserow[cnzi++] = ptjj[k]; 41694e3eecaSKris Buschelman } 41794e3eecaSKris Buschelman } 41894e3eecaSKris Buschelman } 41994e3eecaSKris Buschelman 42094e3eecaSKris Buschelman /* sort sparse representation */ 42194e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 42294e3eecaSKris Buschelman 42394e3eecaSKris Buschelman /* If free space is not available, make more free space */ 42494e3eecaSKris Buschelman /* Double the amount of total space in the list */ 42594e3eecaSKris Buschelman if (current_space->local_remaining<cnzi) { 42694e3eecaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 42794e3eecaSKris Buschelman } 42894e3eecaSKris Buschelman 42994e3eecaSKris Buschelman /* Copy data into free space, and zero out dense row */ 43094e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 43194e3eecaSKris Buschelman current_space->array += cnzi; 43294e3eecaSKris Buschelman current_space->local_used += cnzi; 43394e3eecaSKris Buschelman current_space->local_remaining -= cnzi; 43494e3eecaSKris Buschelman 43594e3eecaSKris Buschelman for (j=0;j<panzi;j++) { 43694e3eecaSKris Buschelman padenserow[pasparserow[j]] = 0; 43794e3eecaSKris Buschelman } 43894e3eecaSKris Buschelman for (j=0;j<cnzi;j++) { 43994e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 44094e3eecaSKris Buschelman } 44194e3eecaSKris Buschelman ci[i+1] = ci[i] + cnzi; 44294e3eecaSKris Buschelman } 44394e3eecaSKris Buschelman /* column indices are in the list of free space */ 44494e3eecaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 44594e3eecaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 44694e3eecaSKris Buschelman ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr); 44770f19b1fSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 44894e3eecaSKris Buschelman ierr = PetscFree(padenserow);CHKERRQ(ierr); 44994e3eecaSKris Buschelman 45094e3eecaSKris Buschelman /* Allocate space for ca */ 45194e3eecaSKris Buschelman ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 45294e3eecaSKris Buschelman ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 45394e3eecaSKris Buschelman 45494e3eecaSKris Buschelman /* put together the new matrix */ 45594e3eecaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 45694e3eecaSKris Buschelman 45794e3eecaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 45894e3eecaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 45994e3eecaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 46094e3eecaSKris Buschelman c->freedata = PETSC_TRUE; 46194e3eecaSKris Buschelman c->nonew = 0; 46294e3eecaSKris Buschelman 46394e3eecaSKris Buschelman /* Clean up. */ 46470f19b1fSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 46594e3eecaSKris Buschelman 46694e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 46794e3eecaSKris Buschelman PetscFunctionReturn(0); 46894e3eecaSKris Buschelman } 46994e3eecaSKris Buschelman 47094e3eecaSKris Buschelman /* 47194e3eecaSKris Buschelman MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 47294e3eecaSKris Buschelman C = P * A * P^T; 47394e3eecaSKris Buschelman Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 47494e3eecaSKris Buschelman */ 47594e3eecaSKris Buschelman #undef __FUNCT__ 47670f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ" 47770f19b1fSKris Buschelman int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 47894e3eecaSKris Buschelman int ierr,flops=0; 47994e3eecaSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 48094e3eecaSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 48194e3eecaSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 48294e3eecaSKris Buschelman int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 48394e3eecaSKris Buschelman int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 48494e3eecaSKris Buschelman int *ci=c->i,*cj=c->j; 48594e3eecaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 48694e3eecaSKris Buschelman int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 48794e3eecaSKris Buschelman MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 48894e3eecaSKris Buschelman 48994e3eecaSKris Buschelman PetscFunctionBegin; 49094e3eecaSKris Buschelman 49194e3eecaSKris Buschelman /* This error checking should be unnecessary if the symbolic was performed */ 49294e3eecaSKris Buschelman if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 49394e3eecaSKris Buschelman if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm); 49494e3eecaSKris Buschelman if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 49594e3eecaSKris Buschelman if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 49694e3eecaSKris Buschelman if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn); 49794e3eecaSKris Buschelman 49894e3eecaSKris Buschelman /* Set up timers */ 49994e3eecaSKris Buschelman if (!logkey_matapplypapt_numeric) { 50094e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr); 50194e3eecaSKris Buschelman } 50294e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 50394e3eecaSKris Buschelman 50494e3eecaSKris Buschelman ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr); 50594e3eecaSKris Buschelman ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 50694e3eecaSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 50794e3eecaSKris Buschelman 50894e3eecaSKris Buschelman paj = (int *)(paa + an); 50994e3eecaSKris Buschelman pajdense = paj + an; 51094e3eecaSKris Buschelman 51194e3eecaSKris Buschelman for (i=0;i<pm;i++) { 51294e3eecaSKris Buschelman /* Form sparse row of P*A */ 51394e3eecaSKris Buschelman pnzi = pi[i+1] - pi[i]; 51494e3eecaSKris Buschelman panzj = 0; 51594e3eecaSKris Buschelman for (j=0;j<pnzi;j++) { 51694e3eecaSKris Buschelman arow = *pj++; 51794e3eecaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 51894e3eecaSKris Buschelman ajj = aj + ai[arow]; 51994e3eecaSKris Buschelman aaj = aa + ai[arow]; 52094e3eecaSKris Buschelman for (k=0;k<anzj;k++) { 52194e3eecaSKris Buschelman if (!pajdense[ajj[k]]) { 52294e3eecaSKris Buschelman pajdense[ajj[k]] = -1; 52394e3eecaSKris Buschelman paj[panzj++] = ajj[k]; 52494e3eecaSKris Buschelman } 52594e3eecaSKris Buschelman paa[ajj[k]] += (*pa)*aaj[k]; 52694e3eecaSKris Buschelman } 52794e3eecaSKris Buschelman flops += 2*anzj; 52894e3eecaSKris Buschelman pa++; 52994e3eecaSKris Buschelman } 53094e3eecaSKris Buschelman 53194e3eecaSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 53294e3eecaSKris Buschelman ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 53394e3eecaSKris Buschelman 53494e3eecaSKris Buschelman /* Compute P*A*P^T using sparse inner products. */ 53594e3eecaSKris Buschelman /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 53694e3eecaSKris Buschelman cnzi = ci[i+1] - ci[i]; 53794e3eecaSKris Buschelman for (j=0;j<cnzi;j++) { 53894e3eecaSKris Buschelman /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 53994e3eecaSKris Buschelman ptcol = *cj++; 54094e3eecaSKris Buschelman ptnzj = pi[ptcol+1] - pi[ptcol]; 54194e3eecaSKris Buschelman ptj = pjj + pi[ptcol]; 54294e3eecaSKris Buschelman ptaj = pta + pi[ptcol]; 54394e3eecaSKris Buschelman sum = 0.; 54494e3eecaSKris Buschelman k1 = 0; 54594e3eecaSKris Buschelman k2 = 0; 54694e3eecaSKris Buschelman while ((k1<panzj) && (k2<ptnzj)) { 54794e3eecaSKris Buschelman if (paj[k1]==ptj[k2]) { 548cdaf1259SKris Buschelman sum += paa[paj[k1++]]*ptaj[k2++]; 54994e3eecaSKris Buschelman } else if (paj[k1] < ptj[k2]) { 55094e3eecaSKris Buschelman k1++; 55194e3eecaSKris Buschelman } else /* if (paj[k1] > ptj[k2]) */ { 55294e3eecaSKris Buschelman k2++; 55394e3eecaSKris Buschelman } 55494e3eecaSKris Buschelman } 55594e3eecaSKris Buschelman *ca++ = sum; 55694e3eecaSKris Buschelman } 55794e3eecaSKris Buschelman 55894e3eecaSKris Buschelman /* Zero the current row info for P*A */ 55994e3eecaSKris Buschelman for (j=0;j<panzj;j++) { 56094e3eecaSKris Buschelman paa[paj[j]] = 0.; 56194e3eecaSKris Buschelman pajdense[paj[j]] = 0; 56294e3eecaSKris Buschelman } 56394e3eecaSKris Buschelman } 56494e3eecaSKris Buschelman 56594e3eecaSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 56694e3eecaSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 56794e3eecaSKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 56894e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 56994e3eecaSKris Buschelman PetscFunctionReturn(0); 57094e3eecaSKris Buschelman } 57194e3eecaSKris Buschelman 57294e3eecaSKris Buschelman #undef __FUNCT__ 57370f19b1fSKris Buschelman #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ" 57470f19b1fSKris Buschelman int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 57594e3eecaSKris Buschelman int ierr; 57694e3eecaSKris Buschelman 57794e3eecaSKris Buschelman PetscFunctionBegin; 57894e3eecaSKris Buschelman if (!logkey_matapplypapt) { 57994e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr); 58094e3eecaSKris Buschelman } 58194e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 58270f19b1fSKris Buschelman ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 58370f19b1fSKris Buschelman ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 58494e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 58594e3eecaSKris Buschelman PetscFunctionReturn(0); 58694e3eecaSKris Buschelman } 587