1d50806bdSBarry Smith /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2d50806bdSBarry Smith /* 3*94e3eecaSKris Buschelman Defines a matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 5*94e3eecaSKris Buschelman C = P^T * A * P 6*94e3eecaSKris Buschelman C = P * A * P^T 7d50806bdSBarry Smith */ 8d50806bdSBarry Smith 9d50806bdSBarry Smith #include "src/mat/impls/aij/seq/aij.h" 10d50806bdSBarry Smith 112216b3a4SKris Buschelman static int logkey_matmatmult = 0; 122216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 132216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 142216b3a4SKris Buschelman 15*94e3eecaSKris Buschelman static int logkey_matgetsymtranspose = 0; 16*94e3eecaSKris Buschelman static int logkey_mattranspose = 0; 17*94e3eecaSKris Buschelman 182216b3a4SKris Buschelman static int logkey_matapplyptap = 0; 192216b3a4SKris Buschelman static int logkey_matapplyptap_symbolic = 0; 202216b3a4SKris Buschelman static int logkey_matapplyptap_numeric = 0; 212216b3a4SKris Buschelman 22*94e3eecaSKris Buschelman static int logkey_matapplypapt = 0; 23*94e3eecaSKris Buschelman static int logkey_matapplypapt_symbolic = 0; 24*94e3eecaSKris Buschelman static int logkey_matapplypapt_numeric = 0; 25*94e3eecaSKris Buschelman 26d50806bdSBarry Smith typedef struct _Space *FreeSpaceList; 27d50806bdSBarry Smith typedef struct _Space { 28d50806bdSBarry Smith FreeSpaceList more_space; 29d50806bdSBarry Smith int *array; 30d50806bdSBarry Smith int *array_head; 31d50806bdSBarry Smith int total_array_size; 32d50806bdSBarry Smith int local_used; 33d50806bdSBarry Smith int local_remaining; 34d50806bdSBarry Smith } FreeSpace; 35d50806bdSBarry Smith 36d50806bdSBarry Smith #undef __FUNCT__ 37d50806bdSBarry Smith #define __FUNCT__ "GetMoreSpace" 382216b3a4SKris Buschelman int GetMoreSpace(int size,FreeSpaceList *list) { 39d50806bdSBarry Smith FreeSpaceList a; 40d50806bdSBarry Smith int ierr; 41d50806bdSBarry Smith 42d50806bdSBarry Smith PetscFunctionBegin; 43d50806bdSBarry Smith ierr = PetscMalloc(sizeof(FreeSpace),&a);CHKERRQ(ierr); 44d50806bdSBarry Smith ierr = PetscMalloc(size*sizeof(int),&(a->array_head));CHKERRQ(ierr); 45d50806bdSBarry Smith a->array = a->array_head; 46d50806bdSBarry Smith a->local_remaining = size; 47d50806bdSBarry Smith a->local_used = 0; 48d50806bdSBarry Smith a->total_array_size = 0; 49d50806bdSBarry Smith a->more_space = NULL; 50d50806bdSBarry Smith 51d50806bdSBarry Smith if (*list) { 52d50806bdSBarry Smith (*list)->more_space = a; 53d50806bdSBarry Smith a->total_array_size = (*list)->total_array_size; 54d50806bdSBarry Smith } 55d50806bdSBarry Smith 56d50806bdSBarry Smith a->total_array_size += size; 57d50806bdSBarry Smith *list = a; 58d50806bdSBarry Smith PetscFunctionReturn(0); 59d50806bdSBarry Smith } 60d50806bdSBarry Smith 61d50806bdSBarry Smith #undef __FUNCT__ 62d50806bdSBarry Smith #define __FUNCT__ "MakeSpaceContiguous" 63d50806bdSBarry Smith int MakeSpaceContiguous(int *space,FreeSpaceList *head) { 64d50806bdSBarry Smith FreeSpaceList a; 65d50806bdSBarry Smith int ierr; 66d50806bdSBarry Smith 67d50806bdSBarry Smith PetscFunctionBegin; 68d50806bdSBarry Smith while ((*head)!=NULL) { 69d50806bdSBarry Smith a = (*head)->more_space; 70d50806bdSBarry Smith ierr = PetscMemcpy(space,(*head)->array_head,((*head)->local_used)*sizeof(int));CHKERRQ(ierr); 71d50806bdSBarry Smith space += (*head)->local_used; 72d50806bdSBarry Smith ierr = PetscFree((*head)->array_head);CHKERRQ(ierr); 73d50806bdSBarry Smith ierr = PetscFree(*head);CHKERRQ(ierr); 74d50806bdSBarry Smith *head = a; 75d50806bdSBarry Smith } 76d50806bdSBarry Smith PetscFunctionReturn(0); 77d50806bdSBarry Smith } 78d50806bdSBarry Smith 79d50806bdSBarry Smith /* 80*94e3eecaSKris Buschelman MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 81d50806bdSBarry Smith C = A * B; 82d50806bdSBarry Smith 83*94e3eecaSKris Buschelman Note: C is assumed to be uncreated. 84d50806bdSBarry Smith If this is not the case, Destroy C before calling this routine. 85d50806bdSBarry Smith */ 86d50806bdSBarry Smith #undef __FUNCT__ 87*94e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 88*94e3eecaSKris Buschelman int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 89d50806bdSBarry Smith { 90d50806bdSBarry Smith int ierr; 91d50806bdSBarry Smith FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 92d50806bdSBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 93d50806bdSBarry Smith int aishift=a->indexshift,bishift=b->indexshift; 94d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 95*94e3eecaSKris Buschelman int *ci,*cj,*denserow,*sparserow; 96d50806bdSBarry Smith int an=A->N,am=A->M,bn=B->N,bm=B->M; 97d50806bdSBarry Smith int i,j,k,anzi,brow,bnzj,cnzi; 98d50806bdSBarry Smith MatScalar *ca; 99d50806bdSBarry Smith 100d50806bdSBarry Smith PetscFunctionBegin; 101d50806bdSBarry Smith /* some error checking which could be moved into interface layer */ 102d50806bdSBarry Smith if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 103d50806bdSBarry Smith if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 104d50806bdSBarry Smith 105*94e3eecaSKris Buschelman /* Set up timers */ 106d50806bdSBarry Smith if (!logkey_matmatmult_symbolic) { 107d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 108d50806bdSBarry Smith } 109d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 110d50806bdSBarry Smith 111d50806bdSBarry Smith /* Set up */ 112d50806bdSBarry Smith /* Allocate ci array, arrays for fill computation and */ 113d50806bdSBarry Smith /* free space for accumulating nonzero column info */ 114d50806bdSBarry Smith ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 115d50806bdSBarry Smith ci[0] = 0; 116d50806bdSBarry Smith 117*94e3eecaSKris Buschelman ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr); 118*94e3eecaSKris Buschelman ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr); 119*94e3eecaSKris Buschelman sparserow = denserow + bn; 120d50806bdSBarry Smith 121d50806bdSBarry Smith /* Initial FreeSpace size is nnz(B)=bi[bm] */ 122*94e3eecaSKris Buschelman /* No idea what is most reasonable here. */ 123d50806bdSBarry Smith ierr = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr); 124d50806bdSBarry Smith current_space = free_space; 125d50806bdSBarry Smith 126*94e3eecaSKris Buschelman /* Determine symbolic info for each row of the product: */ 127d50806bdSBarry Smith for (i=0;i<am;i++) { 128d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 129d50806bdSBarry Smith cnzi = 0; 130d50806bdSBarry Smith for (j=0;j<anzi;j++) { 131d50806bdSBarry Smith brow = *aj++; 132d50806bdSBarry Smith bnzj = bi[brow+1] - bi[brow]; 133d50806bdSBarry Smith bjj = bj + bi[brow]; 134d50806bdSBarry Smith for (k=0;k<bnzj;k++) { 135d50806bdSBarry Smith /* If column is not marked, mark it in compressed and uncompressed locations. */ 136d50806bdSBarry Smith /* For simplicity, leave uncompressed row unsorted until finished with row, */ 137d50806bdSBarry Smith /* and increment nonzero count for this row. */ 138*94e3eecaSKris Buschelman if (!denserow[bjj[k]]) { 139*94e3eecaSKris Buschelman denserow[bjj[k]] = -1; 140*94e3eecaSKris Buschelman sparserow[cnzi++] = bjj[k]; 141d50806bdSBarry Smith } 142d50806bdSBarry Smith } 143d50806bdSBarry Smith } 144d50806bdSBarry Smith 145*94e3eecaSKris Buschelman /* sort sparserow */ 146*94e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 147d50806bdSBarry Smith 148d50806bdSBarry Smith /* If free space is not available, make more free space */ 149d50806bdSBarry Smith /* Double the amount of total space in the list */ 150d50806bdSBarry Smith if (current_space->local_remaining<cnzi) { 151d50806bdSBarry Smith ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 152d50806bdSBarry Smith } 153d50806bdSBarry Smith 154*94e3eecaSKris Buschelman /* Copy data into free space, and zero out denserow */ 155*94e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 156d50806bdSBarry Smith current_space->array += cnzi; 157d50806bdSBarry Smith current_space->local_used += cnzi; 158d50806bdSBarry Smith current_space->local_remaining -= cnzi; 159d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 160*94e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 161d50806bdSBarry Smith } 162d50806bdSBarry Smith ci[i+1] = ci[i] + cnzi; 163d50806bdSBarry Smith } 164d50806bdSBarry Smith 165*94e3eecaSKris Buschelman /* Column indices are in the list of free space */ 166d50806bdSBarry Smith /* Allocate space for cj, initialize cj, and */ 167d50806bdSBarry Smith /* destroy list of free space and other temporary array(s) */ 168d50806bdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 169d50806bdSBarry Smith ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr); 170*94e3eecaSKris Buschelman ierr = PetscFree(denserow);CHKERRQ(ierr); 171d50806bdSBarry Smith 172d50806bdSBarry Smith /* Allocate space for ca */ 173d50806bdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 174d50806bdSBarry Smith ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 175d50806bdSBarry Smith 176d50806bdSBarry Smith /* put together the new matrix */ 177d50806bdSBarry Smith ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 178d50806bdSBarry Smith 179d50806bdSBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 180d50806bdSBarry Smith /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 181d50806bdSBarry Smith c = (Mat_SeqAIJ *)((*C)->data); 182d50806bdSBarry Smith c->freedata = PETSC_TRUE; 183d50806bdSBarry Smith c->nonew = 0; 184d50806bdSBarry Smith 185d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 186d50806bdSBarry Smith PetscFunctionReturn(0); 187d50806bdSBarry Smith } 188d50806bdSBarry Smith 189d50806bdSBarry Smith /* 190*94e3eecaSKris Buschelman MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 191d50806bdSBarry Smith C=A*B; 192*94e3eecaSKris Buschelman Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ. 193d50806bdSBarry Smith */ 194d50806bdSBarry Smith #undef __FUNCT__ 195*94e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 196*94e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 197d50806bdSBarry Smith { 198*94e3eecaSKris Buschelman int ierr,flops=0; 199d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 200d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 201d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 202d50806bdSBarry Smith int aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift; 203d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 204d50806bdSBarry Smith int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M; 205*94e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 206d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 207d50806bdSBarry Smith 208d50806bdSBarry Smith PetscFunctionBegin; 209d50806bdSBarry Smith 210d50806bdSBarry Smith /* This error checking should be unnecessary if the symbolic was performed */ 211d50806bdSBarry Smith if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 212d50806bdSBarry Smith if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm); 213d50806bdSBarry Smith if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 214d50806bdSBarry Smith if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn); 215d50806bdSBarry Smith 216*94e3eecaSKris Buschelman /* Set up timers */ 217d50806bdSBarry Smith if (!logkey_matmatmult_numeric) { 218d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 219d50806bdSBarry Smith } 220d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 221*94e3eecaSKris Buschelman 222d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 223d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 224d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 225d50806bdSBarry Smith /* Traverse A row-wise. */ 226d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 227d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 228d50806bdSBarry Smith for (i=0;i<am;i++) { 229d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 230d50806bdSBarry Smith for (j=0;j<anzi;j++) { 231d50806bdSBarry Smith brow = *aj++; 232d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 233d50806bdSBarry Smith bjj = bj + bi[brow]; 234d50806bdSBarry Smith baj = ba + bi[brow]; 235d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 236d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 237d50806bdSBarry Smith } 238d50806bdSBarry Smith flops += 2*bnzi; 239d50806bdSBarry Smith aa++; 240d50806bdSBarry Smith } 241d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 242d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 243d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 244d50806bdSBarry Smith ca[j] = temp[cj[j]]; 245d50806bdSBarry Smith temp[cj[j]] = 0.0; 246d50806bdSBarry Smith } 247d50806bdSBarry Smith ca += cnzi; 248d50806bdSBarry Smith cj += cnzi; 249d50806bdSBarry Smith } 250716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252716bacf3SKris Buschelman 253d50806bdSBarry Smith /* Free temp */ 254d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 255d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 256d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 257d50806bdSBarry Smith PetscFunctionReturn(0); 258d50806bdSBarry Smith } 259d50806bdSBarry Smith 260d50806bdSBarry Smith #undef __FUNCT__ 261d50806bdSBarry Smith #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 262d50806bdSBarry Smith int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) { 263d50806bdSBarry Smith int ierr; 264d50806bdSBarry Smith 265d50806bdSBarry Smith PetscFunctionBegin; 2662216b3a4SKris Buschelman if (!logkey_matmatmult) { 2672216b3a4SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 2682216b3a4SKris Buschelman } 2692216b3a4SKris Buschelman ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 270*94e3eecaSKris Buschelman ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr); 271*94e3eecaSKris Buschelman ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 2722216b3a4SKris Buschelman ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 273d50806bdSBarry Smith PetscFunctionReturn(0); 274d50806bdSBarry Smith } 275d50806bdSBarry Smith 276d50806bdSBarry Smith #undef __FUNCT__ 277*94e3eecaSKris Buschelman #define __FUNCT__ "MatGetSymbolicTranspose_SeqIJ" 278*94e3eecaSKris Buschelman int MatGetSymbolicTranspose_SeqAIJ(Mat A,int *Ati[],int *Atj[]) { 279*94e3eecaSKris Buschelman int ierr,i,j,anzj; 280*94e3eecaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 281*94e3eecaSKris Buschelman int aishift = a->indexshift,an=A->N,am=A->M; 282*94e3eecaSKris Buschelman int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 283*94e3eecaSKris Buschelman 284*94e3eecaSKris Buschelman PetscFunctionBegin; 285*94e3eecaSKris Buschelman 286*94e3eecaSKris Buschelman ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 287*94e3eecaSKris Buschelman if (aishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 288*94e3eecaSKris Buschelman 289*94e3eecaSKris Buschelman /* Set up timers */ 290*94e3eecaSKris Buschelman if (!logkey_matgetsymtranspose) { 291*94e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matgetsymtranspose,"MatGetSymbolicTranspose",MAT_COOKIE);CHKERRQ(ierr); 292*94e3eecaSKris Buschelman } 293*94e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 294*94e3eecaSKris Buschelman 295*94e3eecaSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 296*94e3eecaSKris Buschelman ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 297*94e3eecaSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 298*94e3eecaSKris Buschelman ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 299*94e3eecaSKris Buschelman ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 300*94e3eecaSKris Buschelman 301*94e3eecaSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 302*94e3eecaSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 303*94e3eecaSKris Buschelman for (i=0;i<ai[am];i++) { 304*94e3eecaSKris Buschelman ati[aj[i]+1] += 1; 305*94e3eecaSKris Buschelman } 306*94e3eecaSKris Buschelman /* Form ati for csr format of A^T. */ 307*94e3eecaSKris Buschelman for (i=0;i<an;i++) { 308*94e3eecaSKris Buschelman ati[i+1] += ati[i]; 309*94e3eecaSKris Buschelman } 310*94e3eecaSKris Buschelman 311*94e3eecaSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 312*94e3eecaSKris Buschelman ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 313*94e3eecaSKris Buschelman 314*94e3eecaSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 315*94e3eecaSKris Buschelman for (i=0;i<am;i++) { 316*94e3eecaSKris Buschelman anzj = ai[i+1] - ai[i]; 317*94e3eecaSKris Buschelman for (j=0;j<anzj;j++) { 318*94e3eecaSKris Buschelman atj[atfill[*aj]] = i; 319*94e3eecaSKris Buschelman atfill[*aj++] += 1; 320*94e3eecaSKris Buschelman } 321*94e3eecaSKris Buschelman } 322*94e3eecaSKris Buschelman 323*94e3eecaSKris Buschelman /* Clean up temporary space and complete requests. */ 324*94e3eecaSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 325*94e3eecaSKris Buschelman *Ati = ati; 326*94e3eecaSKris Buschelman *Atj = atj; 327*94e3eecaSKris Buschelman 328*94e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 329*94e3eecaSKris Buschelman PetscFunctionReturn(0); 330*94e3eecaSKris Buschelman } 331*94e3eecaSKris Buschelman 332*94e3eecaSKris Buschelman extern int MatTranspose_SeqAIJ(Mat A,Mat *B); 333*94e3eecaSKris Buschelman 334*94e3eecaSKris Buschelman #undef __FUNCT__ 335*94e3eecaSKris Buschelman #define __FUNCT__ "MatTranspose_SeqIJ_FAST" 336*94e3eecaSKris Buschelman int MatTranspose_SeqAIJ_FAST(Mat A,Mat *B) { 337*94e3eecaSKris Buschelman int ierr,i,j,anzj; 338*94e3eecaSKris Buschelman Mat At; 339*94e3eecaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data,*at; 340*94e3eecaSKris Buschelman int aishift = a->indexshift,an=A->N,am=A->M; 341*94e3eecaSKris Buschelman int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 342*94e3eecaSKris Buschelman MatScalar *ata,*aa=a->a; 343*94e3eecaSKris Buschelman PetscFunctionBegin; 344*94e3eecaSKris Buschelman 345*94e3eecaSKris Buschelman if (aishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 346*94e3eecaSKris Buschelman 347*94e3eecaSKris Buschelman /* Set up timers */ 348*94e3eecaSKris Buschelman if (!logkey_mattranspose) { 349*94e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_mattranspose,"MatTranspose_SeqAIJ_FAST",MAT_COOKIE);CHKERRQ(ierr); 350*94e3eecaSKris Buschelman } 351*94e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 352*94e3eecaSKris Buschelman 353*94e3eecaSKris Buschelman /* Allocate space for symbolic transpose info and work array */ 354*94e3eecaSKris Buschelman ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 355*94e3eecaSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 356*94e3eecaSKris Buschelman ierr = PetscMalloc(ai[am]*sizeof(MatScalar),&ata);CHKERRQ(ierr); 357*94e3eecaSKris Buschelman ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 358*94e3eecaSKris Buschelman ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 359*94e3eecaSKris Buschelman /* Walk through aj and count ## of non-zeros in each row of A^T. */ 360*94e3eecaSKris Buschelman /* Note: offset by 1 for fast conversion into csr format. */ 361*94e3eecaSKris Buschelman for (i=0;i<ai[am];i++) { 362*94e3eecaSKris Buschelman ati[aj[i]+1] += 1; 363*94e3eecaSKris Buschelman } 364*94e3eecaSKris Buschelman /* Form ati for csr format of A^T. */ 365*94e3eecaSKris Buschelman for (i=0;i<an;i++) { 366*94e3eecaSKris Buschelman ati[i+1] += ati[i]; 367*94e3eecaSKris Buschelman } 368*94e3eecaSKris Buschelman 369*94e3eecaSKris Buschelman /* Copy ati into atfill so we have locations of the next free space in atj */ 370*94e3eecaSKris Buschelman ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 371*94e3eecaSKris Buschelman 372*94e3eecaSKris Buschelman /* Walk through A row-wise and mark nonzero entries of A^T. */ 373*94e3eecaSKris Buschelman for (i=0;i<am;i++) { 374*94e3eecaSKris Buschelman anzj = ai[i+1] - ai[i]; 375*94e3eecaSKris Buschelman for (j=0;j<anzj;j++) { 376*94e3eecaSKris Buschelman atj[atfill[*aj]] = i; 377*94e3eecaSKris Buschelman ata[atfill[*aj]] = *aa++; 378*94e3eecaSKris Buschelman atfill[*aj++] += 1; 379*94e3eecaSKris Buschelman } 380*94e3eecaSKris Buschelman } 381*94e3eecaSKris Buschelman 382*94e3eecaSKris Buschelman /* Clean up temporary space and complete requests. */ 383*94e3eecaSKris Buschelman ierr = PetscFree(atfill);CHKERRQ(ierr); 384*94e3eecaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,an,am,ati,atj,ata,&At);CHKERRQ(ierr); 385*94e3eecaSKris Buschelman at = (Mat_SeqAIJ *)(At->data); 386*94e3eecaSKris Buschelman at->freedata = PETSC_TRUE; 387*94e3eecaSKris Buschelman at->nonew = 0; 388*94e3eecaSKris Buschelman if (B) { 389*94e3eecaSKris Buschelman *B = At; 390*94e3eecaSKris Buschelman } else { 391*94e3eecaSKris Buschelman ierr = MatHeaderCopy(A,At); 392*94e3eecaSKris Buschelman } 393*94e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 394*94e3eecaSKris Buschelman PetscFunctionReturn(0); 395*94e3eecaSKris Buschelman } 396*94e3eecaSKris Buschelman 397*94e3eecaSKris Buschelman #undef __FUNCT__ 398*94e3eecaSKris Buschelman #define __FUNCT__ "MatRestoreSymbolicTranspose" 399*94e3eecaSKris Buschelman int MatRestoreSymbolicTranspose(Mat A,int *ati[],int *atj[]) { 400*94e3eecaSKris Buschelman int ierr; 401*94e3eecaSKris Buschelman 402*94e3eecaSKris Buschelman PetscFunctionBegin; 403*94e3eecaSKris Buschelman ierr = PetscLogInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr); 404*94e3eecaSKris Buschelman ierr = PetscFree(*ati);CHKERRQ(ierr); 405*94e3eecaSKris Buschelman ati = PETSC_NULL; 406*94e3eecaSKris Buschelman ierr = PetscFree(*atj);CHKERRQ(ierr); 407*94e3eecaSKris Buschelman atj = PETSC_NULL; 408*94e3eecaSKris Buschelman PetscFunctionReturn(0); 409*94e3eecaSKris Buschelman } 410*94e3eecaSKris Buschelman 411*94e3eecaSKris Buschelman /* 412*94e3eecaSKris Buschelman MatApplyPtAP_Symbolic_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 413*94e3eecaSKris Buschelman C = P^T * A * P; 414*94e3eecaSKris Buschelman 415*94e3eecaSKris Buschelman Note: C is assumed to be uncreated. 416*94e3eecaSKris Buschelman If this is not the case, Destroy C before calling this routine. 417*94e3eecaSKris Buschelman */ 418*94e3eecaSKris Buschelman #undef __FUNCT__ 419*94e3eecaSKris Buschelman #define __FUNCT__ "MatApplyPtAP_Symbolic_SeqAIJ" 420*94e3eecaSKris Buschelman int MatApplyPtAP_Symbolic_SeqAIJ(Mat A,Mat P,Mat *C) { 421d50806bdSBarry Smith int ierr; 422d50806bdSBarry Smith FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 423d50806bdSBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 424d50806bdSBarry Smith int aishift=a->indexshift,pishift=p->indexshift; 425*94e3eecaSKris Buschelman int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 426*94e3eecaSKris Buschelman int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 427d50806bdSBarry Smith int an=A->N,am=A->M,pn=P->N,pm=P->M; 428d50806bdSBarry Smith int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 429d50806bdSBarry Smith MatScalar *ca; 430d50806bdSBarry Smith 431d50806bdSBarry Smith PetscFunctionBegin; 432d50806bdSBarry Smith 433d50806bdSBarry Smith /* some error checking which could be moved into interface layer */ 434d50806bdSBarry Smith if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 435d50806bdSBarry Smith if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 436d50806bdSBarry Smith if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 437d50806bdSBarry Smith 438*94e3eecaSKris Buschelman /* Set up timers */ 439d50806bdSBarry Smith if (!logkey_matapplyptap_symbolic) { 440d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matapplyptap_symbolic,"MatApplyPtAP_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 441d50806bdSBarry Smith } 442d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 443d50806bdSBarry Smith 444*94e3eecaSKris Buschelman /* Get ij structure of P^T */ 445*94e3eecaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 446*94e3eecaSKris Buschelman ptJ=ptj; 447d50806bdSBarry Smith 448d50806bdSBarry Smith /* Allocate ci array, arrays for fill computation and */ 449d50806bdSBarry Smith /* free space for accumulating nonzero column info */ 450d50806bdSBarry Smith ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 451d50806bdSBarry Smith ci[0] = 0; 452d50806bdSBarry Smith 453*94e3eecaSKris Buschelman ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 454*94e3eecaSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 455*94e3eecaSKris Buschelman ptasparserow = ptadenserow + an; 456*94e3eecaSKris Buschelman denserow = ptasparserow + an; 457*94e3eecaSKris Buschelman sparserow = denserow + pn; 458d50806bdSBarry Smith 459d50806bdSBarry Smith /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 460*94e3eecaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 461716bacf3SKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 462d50806bdSBarry Smith current_space = free_space; 463d50806bdSBarry Smith 464*94e3eecaSKris Buschelman /* Determine symbolic info for each row of C: */ 465d50806bdSBarry Smith for (i=0;i<pn;i++) { 466d50806bdSBarry Smith ptnzi = pti[i+1] - pti[i]; 467d50806bdSBarry Smith ptanzi = 0; 468*94e3eecaSKris Buschelman /* Determine symbolic row of PtA: */ 469d50806bdSBarry Smith for (j=0;j<ptnzi;j++) { 470*94e3eecaSKris Buschelman arow = *ptJ++; 471d50806bdSBarry Smith anzj = ai[arow+1] - ai[arow]; 472d50806bdSBarry Smith ajj = aj + ai[arow]; 473d50806bdSBarry Smith for (k=0;k<anzj;k++) { 474*94e3eecaSKris Buschelman if (!ptadenserow[ajj[k]]) { 475*94e3eecaSKris Buschelman ptadenserow[ajj[k]] = -1; 476*94e3eecaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 477d50806bdSBarry Smith } 478d50806bdSBarry Smith } 479d50806bdSBarry Smith } 480*94e3eecaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 481*94e3eecaSKris Buschelman ptaj = ptasparserow; 482d50806bdSBarry Smith cnzi = 0; 483d50806bdSBarry Smith for (j=0;j<ptanzi;j++) { 484d50806bdSBarry Smith prow = *ptaj++; 485d50806bdSBarry Smith pnzj = pi[prow+1] - pi[prow]; 486d50806bdSBarry Smith pjj = pj + pi[prow]; 487d50806bdSBarry Smith for (k=0;k<pnzj;k++) { 488*94e3eecaSKris Buschelman if (!denserow[pjj[k]]) { 489*94e3eecaSKris Buschelman denserow[pjj[k]] = -1; 490*94e3eecaSKris Buschelman sparserow[cnzi++] = pjj[k]; 491d50806bdSBarry Smith } 492d50806bdSBarry Smith } 493d50806bdSBarry Smith } 494d50806bdSBarry Smith 495*94e3eecaSKris Buschelman /* sort sparserow */ 496*94e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 497d50806bdSBarry Smith 498d50806bdSBarry Smith /* If free space is not available, make more free space */ 499d50806bdSBarry Smith /* Double the amount of total space in the list */ 500d50806bdSBarry Smith if (current_space->local_remaining<cnzi) { 501d50806bdSBarry Smith ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 502d50806bdSBarry Smith } 503d50806bdSBarry Smith 504*94e3eecaSKris Buschelman /* Copy data into free space, and zero out denserows */ 505*94e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 506d50806bdSBarry Smith current_space->array += cnzi; 507d50806bdSBarry Smith current_space->local_used += cnzi; 508d50806bdSBarry Smith current_space->local_remaining -= cnzi; 509d50806bdSBarry Smith 510d50806bdSBarry Smith for (j=0;j<ptanzi;j++) { 511*94e3eecaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 512d50806bdSBarry Smith } 513d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 514*94e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 515d50806bdSBarry Smith } 516d50806bdSBarry Smith /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 517d50806bdSBarry Smith /* For now, we will recompute what is needed. */ 518d50806bdSBarry Smith ci[i+1] = ci[i] + cnzi; 519d50806bdSBarry Smith } 520d50806bdSBarry Smith /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 521d50806bdSBarry Smith /* Allocate space for cj, initialize cj, and */ 522d50806bdSBarry Smith /* destroy list of free space and other temporary array(s) */ 523d50806bdSBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 524d50806bdSBarry Smith ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr); 525*94e3eecaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 526d50806bdSBarry Smith 527d50806bdSBarry Smith /* Allocate space for ca */ 528d50806bdSBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 529d50806bdSBarry Smith ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 530d50806bdSBarry Smith 531d50806bdSBarry Smith /* put together the new matrix */ 532d50806bdSBarry Smith ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 533d50806bdSBarry Smith 534d50806bdSBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 535d50806bdSBarry Smith /* Since these are PETSc arrays, change flags to free them as necessary. */ 536d50806bdSBarry Smith c = (Mat_SeqAIJ *)((*C)->data); 537d50806bdSBarry Smith c->freedata = PETSC_TRUE; 538d50806bdSBarry Smith c->nonew = 0; 539d50806bdSBarry Smith 540d50806bdSBarry Smith /* Clean up. */ 541*94e3eecaSKris Buschelman ierr = MatRestoreSymbolicTranspose(P,&pti,&ptj);CHKERRQ(ierr); 542d50806bdSBarry Smith 543d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 544d50806bdSBarry Smith PetscFunctionReturn(0); 545d50806bdSBarry Smith } 546d50806bdSBarry Smith 547*94e3eecaSKris Buschelman /* 548*94e3eecaSKris Buschelman MatApplyPtAP_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 549*94e3eecaSKris Buschelman C = P^T * A * P; 550*94e3eecaSKris Buschelman Note: C must have been created by calling MatApplyPtAP_Symbolic_SeqAIJ. 551*94e3eecaSKris Buschelman */ 552d50806bdSBarry Smith #undef __FUNCT__ 553*94e3eecaSKris Buschelman #define __FUNCT__ "MatApplyPtAP_Numeric_SeqAIJ" 554*94e3eecaSKris Buschelman int MatApplyPtAP_Numeric_SeqAIJ(Mat A,Mat P,Mat C) { 555*94e3eecaSKris Buschelman int ierr,flops=0; 556d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 557d50806bdSBarry Smith Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 558d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 559d50806bdSBarry Smith int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 560716bacf3SKris Buschelman int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 561716bacf3SKris Buschelman int *ci=c->i,*cj=c->j,*cjj; 562d50806bdSBarry Smith int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 563*94e3eecaSKris Buschelman int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,cnzj,prow,crow; 564d50806bdSBarry Smith MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 565d50806bdSBarry Smith 566d50806bdSBarry Smith PetscFunctionBegin; 567d50806bdSBarry Smith 568d50806bdSBarry Smith /* This error checking should be unnecessary if the symbolic was performed */ 569d50806bdSBarry Smith if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 570d50806bdSBarry Smith if (pn!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,cm); 571d50806bdSBarry Smith if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 572d50806bdSBarry Smith if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 573d50806bdSBarry Smith if (pn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn, cn); 574d50806bdSBarry Smith 575*94e3eecaSKris Buschelman /* Set up timers */ 576d50806bdSBarry Smith if (!logkey_matapplyptap_numeric) { 577d50806bdSBarry Smith ierr = PetscLogEventRegister(&logkey_matapplyptap_numeric,"MatApplyPtAP_Numeric",MAT_COOKIE);CHKERRQ(ierr); 578d50806bdSBarry Smith } 579d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 580d50806bdSBarry Smith 581716bacf3SKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 582716bacf3SKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 583d50806bdSBarry Smith ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 584d50806bdSBarry Smith 585716bacf3SKris Buschelman apj = (int *)(apa + cn); 586716bacf3SKris Buschelman apjdense = apj + cn; 587716bacf3SKris Buschelman 588d50806bdSBarry Smith for (i=0;i<am;i++) { 589d50806bdSBarry Smith /* Form sparse row of A*P */ 590d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 591d50806bdSBarry Smith apnzj = 0; 592d50806bdSBarry Smith for (j=0;j<anzi;j++) { 593d50806bdSBarry Smith prow = *aj++; 594d50806bdSBarry Smith pnzj = pi[prow+1] - pi[prow]; 595d50806bdSBarry Smith pjj = pj + pi[prow]; 596d50806bdSBarry Smith paj = pa + pi[prow]; 597d50806bdSBarry Smith for (k=0;k<pnzj;k++) { 598716bacf3SKris Buschelman if (!apjdense[pjj[k]]) { 599716bacf3SKris Buschelman apjdense[pjj[k]] = -1; 600d50806bdSBarry Smith apj[apnzj++] = pjj[k]; 601d50806bdSBarry Smith } 602d50806bdSBarry Smith apa[pjj[k]] += (*aa)*paj[k]; 603d50806bdSBarry Smith } 604d50806bdSBarry Smith flops += 2*pnzj; 605d50806bdSBarry Smith aa++; 606d50806bdSBarry Smith } 607d50806bdSBarry Smith 608d50806bdSBarry Smith /* Sort the j index array for quick sparse axpy. */ 609d50806bdSBarry Smith ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 610d50806bdSBarry Smith 611d50806bdSBarry Smith /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 612d50806bdSBarry Smith pnzi = pi[i+1] - pi[i]; 613d50806bdSBarry Smith for (j=0;j<pnzi;j++) { 614*94e3eecaSKris Buschelman nextap = 0; 615d50806bdSBarry Smith crow = *pJ++; 616d50806bdSBarry Smith cnzj = ci[crow+1] - ci[crow]; 617d50806bdSBarry Smith cjj = cj + ci[crow]; 618d50806bdSBarry Smith caj = ca + ci[crow]; 619*94e3eecaSKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 620716bacf3SKris Buschelman for (k=0;nextap<apnzj;k++) { 621d50806bdSBarry Smith if (cjj[k]==apj[nextap]) { 622d50806bdSBarry Smith caj[k] += (*pA)*apa[apj[nextap++]]; 623d50806bdSBarry Smith } 624d50806bdSBarry Smith } 625d50806bdSBarry Smith flops += 2*apnzj; 626d50806bdSBarry Smith pA++; 627d50806bdSBarry Smith } 628d50806bdSBarry Smith 629716bacf3SKris Buschelman /* Zero the current row info for A*P */ 630d50806bdSBarry Smith for (j=0;j<apnzj;j++) { 631d50806bdSBarry Smith apa[apj[j]] = 0.; 632716bacf3SKris Buschelman apjdense[apj[j]] = 0; 633d50806bdSBarry Smith } 634d50806bdSBarry Smith } 6352216b3a4SKris Buschelman 6362216b3a4SKris Buschelman /* Assemble the final matrix and clean up */ 6372216b3a4SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6382216b3a4SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 639d50806bdSBarry Smith ierr = PetscFree(apa);CHKERRQ(ierr); 640d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 641d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 6422216b3a4SKris Buschelman 643d50806bdSBarry Smith PetscFunctionReturn(0); 644d50806bdSBarry Smith } 645d50806bdSBarry Smith 646*94e3eecaSKris Buschelman 647d50806bdSBarry Smith #undef __FUNCT__ 648d50806bdSBarry Smith #define __FUNCT__ "MatApplyPtAP_SeqAIJ" 649d50806bdSBarry Smith int MatApplyPtAP_SeqAIJ(Mat A,Mat P,Mat *C) { 650d50806bdSBarry Smith int ierr; 651d50806bdSBarry Smith 652d50806bdSBarry Smith PetscFunctionBegin; 653716bacf3SKris Buschelman if (!logkey_matapplyptap) { 654716bacf3SKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplyptap,"MatApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 655716bacf3SKris Buschelman } 6562216b3a4SKris Buschelman ierr = PetscLogEventBegin(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 657*94e3eecaSKris Buschelman 658*94e3eecaSKris Buschelman ierr = MatApplyPtAP_Symbolic_SeqAIJ(A,P,C);CHKERRQ(ierr); 659*94e3eecaSKris Buschelman ierr = MatApplyPtAP_Numeric_SeqAIJ(A,P,*C);CHKERRQ(ierr); 660*94e3eecaSKris Buschelman 6612216b3a4SKris Buschelman ierr = PetscLogEventEnd(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 662d50806bdSBarry Smith PetscFunctionReturn(0); 663d50806bdSBarry Smith } 664*94e3eecaSKris Buschelman 665*94e3eecaSKris Buschelman /* 666*94e3eecaSKris Buschelman MatApplyPAPt_Symbolic_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 667*94e3eecaSKris Buschelman C = P * A * P^T; 668*94e3eecaSKris Buschelman 669*94e3eecaSKris Buschelman Note: C is assumed to be uncreated. 670*94e3eecaSKris Buschelman If this is not the case, Destroy C before calling this routine. 671*94e3eecaSKris Buschelman */ 672*94e3eecaSKris Buschelman #undef __FUNCT__ 673*94e3eecaSKris Buschelman #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ" 674*94e3eecaSKris Buschelman int MatApplyPAPt_Symbolic_SeqAIJ(Mat A,Mat P,Mat *C) { 675*94e3eecaSKris Buschelman /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 676*94e3eecaSKris Buschelman /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 677*94e3eecaSKris Buschelman int ierr; 678*94e3eecaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 679*94e3eecaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 680*94e3eecaSKris Buschelman int aishift=a->indexshift,pishift=p->indexshift; 681*94e3eecaSKris Buschelman int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 682*94e3eecaSKris Buschelman int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 683*94e3eecaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M; 684*94e3eecaSKris Buschelman int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 685*94e3eecaSKris Buschelman MatScalar *ca; 686*94e3eecaSKris Buschelman 687*94e3eecaSKris Buschelman PetscFunctionBegin; 688*94e3eecaSKris Buschelman 689*94e3eecaSKris Buschelman /* some error checking which could be moved into interface layer */ 690*94e3eecaSKris Buschelman if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 691*94e3eecaSKris Buschelman if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 692*94e3eecaSKris Buschelman if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 693*94e3eecaSKris Buschelman 694*94e3eecaSKris Buschelman /* Set up timers */ 695*94e3eecaSKris Buschelman if (!logkey_matapplypapt_symbolic) { 696*94e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 697*94e3eecaSKris Buschelman } 698*94e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 699*94e3eecaSKris Buschelman 700*94e3eecaSKris Buschelman /* Create ij structure of P^T */ 701*94e3eecaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 702*94e3eecaSKris Buschelman 703*94e3eecaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 704*94e3eecaSKris Buschelman /* free space for accumulating nonzero column info */ 705*94e3eecaSKris Buschelman ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 706*94e3eecaSKris Buschelman ci[0] = 0; 707*94e3eecaSKris Buschelman 708*94e3eecaSKris Buschelman ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr); 709*94e3eecaSKris Buschelman ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr); 710*94e3eecaSKris Buschelman pasparserow = padenserow + an; 711*94e3eecaSKris Buschelman denserow = pasparserow + an; 712*94e3eecaSKris Buschelman sparserow = denserow + pm; 713*94e3eecaSKris Buschelman 714*94e3eecaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 715*94e3eecaSKris Buschelman /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 716*94e3eecaSKris Buschelman ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space); 717*94e3eecaSKris Buschelman current_space = free_space; 718*94e3eecaSKris Buschelman 719*94e3eecaSKris Buschelman /* Determine fill for each row of C: */ 720*94e3eecaSKris Buschelman for (i=0;i<pm;i++) { 721*94e3eecaSKris Buschelman pnzi = pi[i+1] - pi[i]; 722*94e3eecaSKris Buschelman panzi = 0; 723*94e3eecaSKris Buschelman /* Get symbolic sparse row of PA: */ 724*94e3eecaSKris Buschelman for (j=0;j<pnzi;j++) { 725*94e3eecaSKris Buschelman arow = *pj++; 726*94e3eecaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 727*94e3eecaSKris Buschelman ajj = aj + ai[arow]; 728*94e3eecaSKris Buschelman for (k=0;k<anzj;k++) { 729*94e3eecaSKris Buschelman if (!padenserow[ajj[k]]) { 730*94e3eecaSKris Buschelman padenserow[ajj[k]] = -1; 731*94e3eecaSKris Buschelman pasparserow[panzi++] = ajj[k]; 732*94e3eecaSKris Buschelman } 733*94e3eecaSKris Buschelman } 734*94e3eecaSKris Buschelman } 735*94e3eecaSKris Buschelman /* Using symbolic row of PA, determine symbolic row of C: */ 736*94e3eecaSKris Buschelman paj = pasparserow; 737*94e3eecaSKris Buschelman cnzi = 0; 738*94e3eecaSKris Buschelman for (j=0;j<panzi;j++) { 739*94e3eecaSKris Buschelman ptrow = *paj++; 740*94e3eecaSKris Buschelman ptnzj = pti[ptrow+1] - pti[ptrow]; 741*94e3eecaSKris Buschelman ptjj = ptj + pti[ptrow]; 742*94e3eecaSKris Buschelman for (k=0;k<ptnzj;k++) { 743*94e3eecaSKris Buschelman if (!denserow[ptjj[k]]) { 744*94e3eecaSKris Buschelman denserow[ptjj[k]] = -1; 745*94e3eecaSKris Buschelman sparserow[cnzi++] = ptjj[k]; 746*94e3eecaSKris Buschelman } 747*94e3eecaSKris Buschelman } 748*94e3eecaSKris Buschelman } 749*94e3eecaSKris Buschelman 750*94e3eecaSKris Buschelman /* sort sparse representation */ 751*94e3eecaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 752*94e3eecaSKris Buschelman 753*94e3eecaSKris Buschelman /* If free space is not available, make more free space */ 754*94e3eecaSKris Buschelman /* Double the amount of total space in the list */ 755*94e3eecaSKris Buschelman if (current_space->local_remaining<cnzi) { 756*94e3eecaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 757*94e3eecaSKris Buschelman } 758*94e3eecaSKris Buschelman 759*94e3eecaSKris Buschelman /* Copy data into free space, and zero out dense row */ 760*94e3eecaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 761*94e3eecaSKris Buschelman current_space->array += cnzi; 762*94e3eecaSKris Buschelman current_space->local_used += cnzi; 763*94e3eecaSKris Buschelman current_space->local_remaining -= cnzi; 764*94e3eecaSKris Buschelman 765*94e3eecaSKris Buschelman for (j=0;j<panzi;j++) { 766*94e3eecaSKris Buschelman padenserow[pasparserow[j]] = 0; 767*94e3eecaSKris Buschelman } 768*94e3eecaSKris Buschelman for (j=0;j<cnzi;j++) { 769*94e3eecaSKris Buschelman denserow[sparserow[j]] = 0; 770*94e3eecaSKris Buschelman } 771*94e3eecaSKris Buschelman ci[i+1] = ci[i] + cnzi; 772*94e3eecaSKris Buschelman } 773*94e3eecaSKris Buschelman /* column indices are in the list of free space */ 774*94e3eecaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 775*94e3eecaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 776*94e3eecaSKris Buschelman ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr); 777*94e3eecaSKris Buschelman ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr); 778*94e3eecaSKris Buschelman ierr = PetscFree(padenserow);CHKERRQ(ierr); 779*94e3eecaSKris Buschelman 780*94e3eecaSKris Buschelman /* Allocate space for ca */ 781*94e3eecaSKris Buschelman ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 782*94e3eecaSKris Buschelman ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 783*94e3eecaSKris Buschelman 784*94e3eecaSKris Buschelman /* put together the new matrix */ 785*94e3eecaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 786*94e3eecaSKris Buschelman 787*94e3eecaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 788*94e3eecaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 789*94e3eecaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 790*94e3eecaSKris Buschelman c->freedata = PETSC_TRUE; 791*94e3eecaSKris Buschelman c->nonew = 0; 792*94e3eecaSKris Buschelman 793*94e3eecaSKris Buschelman /* Clean up. */ 794*94e3eecaSKris Buschelman ierr = MatRestoreSymbolicTranspose(P,&pti,&ptj);CHKERRQ(ierr); 795*94e3eecaSKris Buschelman 796*94e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 797*94e3eecaSKris Buschelman PetscFunctionReturn(0); 798*94e3eecaSKris Buschelman } 799*94e3eecaSKris Buschelman 800*94e3eecaSKris Buschelman /* 801*94e3eecaSKris Buschelman MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 802*94e3eecaSKris Buschelman C = P * A * P^T; 803*94e3eecaSKris Buschelman Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 804*94e3eecaSKris Buschelman */ 805*94e3eecaSKris Buschelman #undef __FUNCT__ 806*94e3eecaSKris Buschelman #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ" 807*94e3eecaSKris Buschelman int MatApplyPAPt_Numeric_SeqAIJ(Mat A,Mat P,Mat C) { 808*94e3eecaSKris Buschelman int ierr,flops=0; 809*94e3eecaSKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 810*94e3eecaSKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 811*94e3eecaSKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 812*94e3eecaSKris Buschelman int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 813*94e3eecaSKris Buschelman int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 814*94e3eecaSKris Buschelman int *ci=c->i,*cj=c->j; 815*94e3eecaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 816*94e3eecaSKris Buschelman int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 817*94e3eecaSKris Buschelman MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 818*94e3eecaSKris Buschelman 819*94e3eecaSKris Buschelman PetscFunctionBegin; 820*94e3eecaSKris Buschelman 821*94e3eecaSKris Buschelman /* This error checking should be unnecessary if the symbolic was performed */ 822*94e3eecaSKris Buschelman if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 823*94e3eecaSKris Buschelman if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm); 824*94e3eecaSKris Buschelman if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 825*94e3eecaSKris Buschelman if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 826*94e3eecaSKris Buschelman if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn); 827*94e3eecaSKris Buschelman 828*94e3eecaSKris Buschelman /* Set up timers */ 829*94e3eecaSKris Buschelman if (!logkey_matapplypapt_numeric) { 830*94e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr); 831*94e3eecaSKris Buschelman } 832*94e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 833*94e3eecaSKris Buschelman 834*94e3eecaSKris Buschelman ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr); 835*94e3eecaSKris Buschelman ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 836*94e3eecaSKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 837*94e3eecaSKris Buschelman 838*94e3eecaSKris Buschelman paj = (int *)(paa + an); 839*94e3eecaSKris Buschelman pajdense = paj + an; 840*94e3eecaSKris Buschelman 841*94e3eecaSKris Buschelman for (i=0;i<pm;i++) { 842*94e3eecaSKris Buschelman /* Form sparse row of P*A */ 843*94e3eecaSKris Buschelman pnzi = pi[i+1] - pi[i]; 844*94e3eecaSKris Buschelman panzj = 0; 845*94e3eecaSKris Buschelman for (j=0;j<pnzi;j++) { 846*94e3eecaSKris Buschelman arow = *pj++; 847*94e3eecaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 848*94e3eecaSKris Buschelman ajj = aj + ai[arow]; 849*94e3eecaSKris Buschelman aaj = aa + ai[arow]; 850*94e3eecaSKris Buschelman for (k=0;k<anzj;k++) { 851*94e3eecaSKris Buschelman if (!pajdense[ajj[k]]) { 852*94e3eecaSKris Buschelman pajdense[ajj[k]] = -1; 853*94e3eecaSKris Buschelman paj[panzj++] = ajj[k]; 854*94e3eecaSKris Buschelman } 855*94e3eecaSKris Buschelman paa[ajj[k]] += (*pa)*aaj[k]; 856*94e3eecaSKris Buschelman } 857*94e3eecaSKris Buschelman flops += 2*anzj; 858*94e3eecaSKris Buschelman pa++; 859*94e3eecaSKris Buschelman } 860*94e3eecaSKris Buschelman 861*94e3eecaSKris Buschelman /* Sort the j index array for quick sparse axpy. */ 862*94e3eecaSKris Buschelman ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 863*94e3eecaSKris Buschelman 864*94e3eecaSKris Buschelman /* Compute P*A*P^T using sparse inner products. */ 865*94e3eecaSKris Buschelman /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 866*94e3eecaSKris Buschelman cnzi = ci[i+1] - ci[i]; 867*94e3eecaSKris Buschelman for (j=0;j<cnzi;j++) { 868*94e3eecaSKris Buschelman /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 869*94e3eecaSKris Buschelman ptcol = *cj++; 870*94e3eecaSKris Buschelman ptnzj = pi[ptcol+1] - pi[ptcol]; 871*94e3eecaSKris Buschelman ptj = pjj + pi[ptcol]; 872*94e3eecaSKris Buschelman ptaj = pta + pi[ptcol]; 873*94e3eecaSKris Buschelman sum = 0.; 874*94e3eecaSKris Buschelman k1 = 0; 875*94e3eecaSKris Buschelman k2 = 0; 876*94e3eecaSKris Buschelman while ((k1<panzj) && (k2<ptnzj)) { 877*94e3eecaSKris Buschelman if (paj[k1]==ptj[k2]) { 878*94e3eecaSKris Buschelman sum += paa[paj[k1++]]*pta[k2++]; 879*94e3eecaSKris Buschelman } else if (paj[k1] < ptj[k2]) { 880*94e3eecaSKris Buschelman k1++; 881*94e3eecaSKris Buschelman } else /* if (paj[k1] > ptj[k2]) */ { 882*94e3eecaSKris Buschelman k2++; 883*94e3eecaSKris Buschelman } 884*94e3eecaSKris Buschelman } 885*94e3eecaSKris Buschelman *ca++ = sum; 886*94e3eecaSKris Buschelman } 887*94e3eecaSKris Buschelman 888*94e3eecaSKris Buschelman /* Zero the current row info for P*A */ 889*94e3eecaSKris Buschelman for (j=0;j<panzj;j++) { 890*94e3eecaSKris Buschelman paa[paj[j]] = 0.; 891*94e3eecaSKris Buschelman pajdense[paj[j]] = 0; 892*94e3eecaSKris Buschelman } 893*94e3eecaSKris Buschelman } 894*94e3eecaSKris Buschelman 895*94e3eecaSKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896*94e3eecaSKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897*94e3eecaSKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 898*94e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 899*94e3eecaSKris Buschelman PetscFunctionReturn(0); 900*94e3eecaSKris Buschelman } 901*94e3eecaSKris Buschelman 902*94e3eecaSKris Buschelman #undef __FUNCT__ 903*94e3eecaSKris Buschelman #define __FUNCT__ "MatApplyPAPt_SeqAIJ" 904*94e3eecaSKris Buschelman int MatApplyPAPt_SeqAIJ(Mat A,Mat P,Mat *C) { 905*94e3eecaSKris Buschelman int ierr; 906*94e3eecaSKris Buschelman 907*94e3eecaSKris Buschelman PetscFunctionBegin; 908*94e3eecaSKris Buschelman if (!logkey_matapplypapt) { 909*94e3eecaSKris Buschelman ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr); 910*94e3eecaSKris Buschelman } 911*94e3eecaSKris Buschelman ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 912*94e3eecaSKris Buschelman ierr = MatApplyPAPt_Symbolic_SeqAIJ(A,P,C);CHKERRQ(ierr); 913*94e3eecaSKris Buschelman ierr = MatApplyPAPt_Numeric_SeqAIJ(A,P,*C);CHKERRQ(ierr); 914*94e3eecaSKris Buschelman ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 915*94e3eecaSKris Buschelman PetscFunctionReturn(0); 916*94e3eecaSKris Buschelman } 917