1*eb9c0419SKris Buschelman /* 2*eb9c0419SKris Buschelman Defines projective product routines where A is a SeqAIJ matrix 3*eb9c0419SKris Buschelman C = P^T * A * P 4*eb9c0419SKris Buschelman */ 5*eb9c0419SKris Buschelman 6*eb9c0419SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" 7*eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 8*eb9c0419SKris Buschelman 9*eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat,Mat,Mat*); 10*eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*); 11*eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat,Mat,Mat); 12*eb9c0419SKris Buschelman 13*eb9c0419SKris Buschelman static int MATSeqAIJ_PtAP = 0; 14*eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPSymbolic = 0; 15*eb9c0419SKris Buschelman static int MATSeqAIJ_PtAPNumeric = 0; 16*eb9c0419SKris Buschelman 17*eb9c0419SKris Buschelman /* 18*eb9c0419SKris Buschelman MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C, 19*eb9c0419SKris Buschelman of SeqAIJ matrix A and matrix P: 20*eb9c0419SKris Buschelman C = P^T * A * P; 21*eb9c0419SKris Buschelman 22*eb9c0419SKris Buschelman Note: C is assumed to be uncreated. 23*eb9c0419SKris Buschelman If this is not the case, Destroy C before calling this routine. 24*eb9c0419SKris Buschelman */ 25*eb9c0419SKris Buschelman #undef __FUNCT__ 26*eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAP" 27*eb9c0419SKris Buschelman int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) { 28*eb9c0419SKris Buschelman int ierr; 29*eb9c0419SKris Buschelman char funct[80]; 30*eb9c0419SKris Buschelman 31*eb9c0419SKris Buschelman PetscFunctionBegin; 32*eb9c0419SKris Buschelman 33*eb9c0419SKris Buschelman ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 34*eb9c0419SKris Buschelman 35*eb9c0419SKris Buschelman ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr); 36*eb9c0419SKris Buschelman 37*eb9c0419SKris Buschelman /* Avoid additional error checking included in */ 38*eb9c0419SKris Buschelman /* ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */ 39*eb9c0419SKris Buschelman 40*eb9c0419SKris Buschelman /* Query A for ApplyPtAPNumeric implementation based on types of P */ 41*eb9c0419SKris Buschelman ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 42*eb9c0419SKris Buschelman ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 43*eb9c0419SKris Buschelman ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));CHKERRQ(ierr); 44*eb9c0419SKris Buschelman 45*eb9c0419SKris Buschelman ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 46*eb9c0419SKris Buschelman 47*eb9c0419SKris Buschelman PetscFunctionReturn(0); 48*eb9c0419SKris Buschelman } 49*eb9c0419SKris Buschelman 50*eb9c0419SKris Buschelman /* 51*eb9c0419SKris Buschelman MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C, 52*eb9c0419SKris Buschelman of SeqAIJ matrix A and matrix P, according to: 53*eb9c0419SKris Buschelman C = P^T * A * P; 54*eb9c0419SKris Buschelman 55*eb9c0419SKris Buschelman Note: C is assumed to be uncreated. 56*eb9c0419SKris Buschelman If this is not the case, Destroy C before calling this routine. 57*eb9c0419SKris Buschelman */ 58*eb9c0419SKris Buschelman #undef __FUNCT__ 59*eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPSymbolic" 60*eb9c0419SKris Buschelman int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) { 61*eb9c0419SKris Buschelman int ierr; 62*eb9c0419SKris Buschelman char funct[80]; 63*eb9c0419SKris Buschelman 64*eb9c0419SKris Buschelman PetscFunctionBegin; 65*eb9c0419SKris Buschelman 66*eb9c0419SKris Buschelman PetscValidPointer(C); 67*eb9c0419SKris Buschelman 68*eb9c0419SKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 69*eb9c0419SKris Buschelman PetscValidType(A); 70*eb9c0419SKris Buschelman MatPreallocated(A); 71*eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 72*eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 73*eb9c0419SKris Buschelman 74*eb9c0419SKris Buschelman PetscValidHeaderSpecific(P,MAT_COOKIE); 75*eb9c0419SKris Buschelman PetscValidType(P); 76*eb9c0419SKris Buschelman MatPreallocated(P); 77*eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 78*eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 79*eb9c0419SKris Buschelman 80*eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 81*eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 82*eb9c0419SKris Buschelman 83*eb9c0419SKris Buschelman /* Query A for ApplyPtAP implementation based on types of P */ 84*eb9c0419SKris Buschelman ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");CHKERRQ(ierr); 85*eb9c0419SKris Buschelman ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 86*eb9c0419SKris Buschelman ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));CHKERRQ(ierr); 87*eb9c0419SKris Buschelman 88*eb9c0419SKris Buschelman PetscFunctionReturn(0); 89*eb9c0419SKris Buschelman } 90*eb9c0419SKris Buschelman 91*eb9c0419SKris Buschelman EXTERN_C_BEGIN 92*eb9c0419SKris Buschelman #undef __FUNCT__ 93*eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ" 94*eb9c0419SKris Buschelman int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 95*eb9c0419SKris Buschelman int ierr; 96*eb9c0419SKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 97*eb9c0419SKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 98*eb9c0419SKris Buschelman int aishift=a->indexshift,pishift=p->indexshift; 99*eb9c0419SKris Buschelman int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 100*eb9c0419SKris Buschelman int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 101*eb9c0419SKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M; 102*eb9c0419SKris Buschelman int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 103*eb9c0419SKris Buschelman MatScalar *ca; 104*eb9c0419SKris Buschelman 105*eb9c0419SKris Buschelman PetscFunctionBegin; 106*eb9c0419SKris Buschelman 107*eb9c0419SKris Buschelman /* some error checking which could be moved into interface layer */ 108*eb9c0419SKris Buschelman if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 109*eb9c0419SKris Buschelman 110*eb9c0419SKris Buschelman /* Start timer */ 111*eb9c0419SKris Buschelman ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 112*eb9c0419SKris Buschelman 113*eb9c0419SKris Buschelman /* Get ij structure of P^T */ 114*eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 115*eb9c0419SKris Buschelman ptJ=ptj; 116*eb9c0419SKris Buschelman 117*eb9c0419SKris Buschelman /* Allocate ci array, arrays for fill computation and */ 118*eb9c0419SKris Buschelman /* free space for accumulating nonzero column info */ 119*eb9c0419SKris Buschelman ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 120*eb9c0419SKris Buschelman ci[0] = 0; 121*eb9c0419SKris Buschelman 122*eb9c0419SKris Buschelman ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 123*eb9c0419SKris Buschelman ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 124*eb9c0419SKris Buschelman ptasparserow = ptadenserow + an; 125*eb9c0419SKris Buschelman denserow = ptasparserow + an; 126*eb9c0419SKris Buschelman sparserow = denserow + pn; 127*eb9c0419SKris Buschelman 128*eb9c0419SKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 129*eb9c0419SKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 130*eb9c0419SKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 131*eb9c0419SKris Buschelman current_space = free_space; 132*eb9c0419SKris Buschelman 133*eb9c0419SKris Buschelman /* Determine symbolic info for each row of C: */ 134*eb9c0419SKris Buschelman for (i=0;i<pn;i++) { 135*eb9c0419SKris Buschelman ptnzi = pti[i+1] - pti[i]; 136*eb9c0419SKris Buschelman ptanzi = 0; 137*eb9c0419SKris Buschelman /* Determine symbolic row of PtA: */ 138*eb9c0419SKris Buschelman for (j=0;j<ptnzi;j++) { 139*eb9c0419SKris Buschelman arow = *ptJ++; 140*eb9c0419SKris Buschelman anzj = ai[arow+1] - ai[arow]; 141*eb9c0419SKris Buschelman ajj = aj + ai[arow]; 142*eb9c0419SKris Buschelman for (k=0;k<anzj;k++) { 143*eb9c0419SKris Buschelman if (!ptadenserow[ajj[k]]) { 144*eb9c0419SKris Buschelman ptadenserow[ajj[k]] = -1; 145*eb9c0419SKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 146*eb9c0419SKris Buschelman } 147*eb9c0419SKris Buschelman } 148*eb9c0419SKris Buschelman } 149*eb9c0419SKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 150*eb9c0419SKris Buschelman ptaj = ptasparserow; 151*eb9c0419SKris Buschelman cnzi = 0; 152*eb9c0419SKris Buschelman for (j=0;j<ptanzi;j++) { 153*eb9c0419SKris Buschelman prow = *ptaj++; 154*eb9c0419SKris Buschelman pnzj = pi[prow+1] - pi[prow]; 155*eb9c0419SKris Buschelman pjj = pj + pi[prow]; 156*eb9c0419SKris Buschelman for (k=0;k<pnzj;k++) { 157*eb9c0419SKris Buschelman if (!denserow[pjj[k]]) { 158*eb9c0419SKris Buschelman denserow[pjj[k]] = -1; 159*eb9c0419SKris Buschelman sparserow[cnzi++] = pjj[k]; 160*eb9c0419SKris Buschelman } 161*eb9c0419SKris Buschelman } 162*eb9c0419SKris Buschelman } 163*eb9c0419SKris Buschelman 164*eb9c0419SKris Buschelman /* sort sparserow */ 165*eb9c0419SKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 166*eb9c0419SKris Buschelman 167*eb9c0419SKris Buschelman /* If free space is not available, make more free space */ 168*eb9c0419SKris Buschelman /* Double the amount of total space in the list */ 169*eb9c0419SKris Buschelman if (current_space->local_remaining<cnzi) { 170*eb9c0419SKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 171*eb9c0419SKris Buschelman } 172*eb9c0419SKris Buschelman 173*eb9c0419SKris Buschelman /* Copy data into free space, and zero out denserows */ 174*eb9c0419SKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 175*eb9c0419SKris Buschelman current_space->array += cnzi; 176*eb9c0419SKris Buschelman current_space->local_used += cnzi; 177*eb9c0419SKris Buschelman current_space->local_remaining -= cnzi; 178*eb9c0419SKris Buschelman 179*eb9c0419SKris Buschelman for (j=0;j<ptanzi;j++) { 180*eb9c0419SKris Buschelman ptadenserow[ptasparserow[j]] = 0; 181*eb9c0419SKris Buschelman } 182*eb9c0419SKris Buschelman for (j=0;j<cnzi;j++) { 183*eb9c0419SKris Buschelman denserow[sparserow[j]] = 0; 184*eb9c0419SKris Buschelman } 185*eb9c0419SKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 186*eb9c0419SKris Buschelman /* For now, we will recompute what is needed. */ 187*eb9c0419SKris Buschelman ci[i+1] = ci[i] + cnzi; 188*eb9c0419SKris Buschelman } 189*eb9c0419SKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 190*eb9c0419SKris Buschelman /* Allocate space for cj, initialize cj, and */ 191*eb9c0419SKris Buschelman /* destroy list of free space and other temporary array(s) */ 192*eb9c0419SKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 193*eb9c0419SKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 194*eb9c0419SKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 195*eb9c0419SKris Buschelman 196*eb9c0419SKris Buschelman /* Allocate space for ca */ 197*eb9c0419SKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 198*eb9c0419SKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 199*eb9c0419SKris Buschelman 200*eb9c0419SKris Buschelman /* put together the new matrix */ 201*eb9c0419SKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 202*eb9c0419SKris Buschelman 203*eb9c0419SKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 204*eb9c0419SKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 205*eb9c0419SKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 206*eb9c0419SKris Buschelman c->freedata = PETSC_TRUE; 207*eb9c0419SKris Buschelman c->nonew = 0; 208*eb9c0419SKris Buschelman 209*eb9c0419SKris Buschelman /* Clean up. */ 210*eb9c0419SKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 211*eb9c0419SKris Buschelman 212*eb9c0419SKris Buschelman ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 213*eb9c0419SKris Buschelman PetscFunctionReturn(0); 214*eb9c0419SKris Buschelman } 215*eb9c0419SKris Buschelman EXTERN_C_END 216*eb9c0419SKris Buschelman 217*eb9c0419SKris Buschelman /* 218*eb9c0419SKris Buschelman MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C, 219*eb9c0419SKris Buschelman of SeqAIJ matrix A and matrix P, according to: 220*eb9c0419SKris Buschelman C = P^T * A * P 221*eb9c0419SKris Buschelman Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic. 222*eb9c0419SKris Buschelman */ 223*eb9c0419SKris Buschelman #undef __FUNCT__ 224*eb9c0419SKris Buschelman #define __FUNCT__ "MatSeqAIJPtAPNumeric" 225*eb9c0419SKris Buschelman int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) { 226*eb9c0419SKris Buschelman int ierr; 227*eb9c0419SKris Buschelman char funct[80]; 228*eb9c0419SKris Buschelman 229*eb9c0419SKris Buschelman PetscFunctionBegin; 230*eb9c0419SKris Buschelman 231*eb9c0419SKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 232*eb9c0419SKris Buschelman PetscValidType(A); 233*eb9c0419SKris Buschelman MatPreallocated(A); 234*eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 235*eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 236*eb9c0419SKris Buschelman 237*eb9c0419SKris Buschelman PetscValidHeaderSpecific(P,MAT_COOKIE); 238*eb9c0419SKris Buschelman PetscValidType(P); 239*eb9c0419SKris Buschelman MatPreallocated(P); 240*eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 241*eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 242*eb9c0419SKris Buschelman 243*eb9c0419SKris Buschelman PetscValidHeaderSpecific(C,MAT_COOKIE); 244*eb9c0419SKris Buschelman PetscValidType(C); 245*eb9c0419SKris Buschelman MatPreallocated(C); 246*eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 247*eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 248*eb9c0419SKris Buschelman 249*eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 250*eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 251*eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 252*eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 253*eb9c0419SKris Buschelman 254*eb9c0419SKris Buschelman /* Query A for ApplyPtAP implementation based on types of P */ 255*eb9c0419SKris Buschelman ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 256*eb9c0419SKris Buschelman ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 257*eb9c0419SKris Buschelman ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));CHKERRQ(ierr); 258*eb9c0419SKris Buschelman 259*eb9c0419SKris Buschelman PetscFunctionReturn(0); 260*eb9c0419SKris Buschelman } 261*eb9c0419SKris Buschelman 262*eb9c0419SKris Buschelman EXTERN_C_BEGIN 263*eb9c0419SKris Buschelman #undef __FUNCT__ 264*eb9c0419SKris Buschelman #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ" 265*eb9c0419SKris Buschelman int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 266*eb9c0419SKris Buschelman int ierr,flops=0; 267*eb9c0419SKris Buschelman Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 268*eb9c0419SKris Buschelman Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 269*eb9c0419SKris Buschelman Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 270*eb9c0419SKris Buschelman int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 271*eb9c0419SKris Buschelman int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 272*eb9c0419SKris Buschelman int *ci=c->i,*cj=c->j,*cjj; 273*eb9c0419SKris Buschelman int am=A->M,cn=C->N,cm=C->M; 274*eb9c0419SKris Buschelman int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 275*eb9c0419SKris Buschelman MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 276*eb9c0419SKris Buschelman 277*eb9c0419SKris Buschelman PetscFunctionBegin; 278*eb9c0419SKris Buschelman 279*eb9c0419SKris Buschelman /* Currently not for shifted matrices! */ 280*eb9c0419SKris Buschelman if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 281*eb9c0419SKris Buschelman 282*eb9c0419SKris Buschelman ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 283*eb9c0419SKris Buschelman 284*eb9c0419SKris Buschelman /* Allocate temporary array for storage of one row of A*P */ 285*eb9c0419SKris Buschelman ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 286*eb9c0419SKris Buschelman ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 287*eb9c0419SKris Buschelman 288*eb9c0419SKris Buschelman apj = (int *)(apa + cn); 289*eb9c0419SKris Buschelman apjdense = apj + cn; 290*eb9c0419SKris Buschelman 291*eb9c0419SKris Buschelman /* Clear old values in C */ 292*eb9c0419SKris Buschelman ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 293*eb9c0419SKris Buschelman 294*eb9c0419SKris Buschelman for (i=0;i<am;i++) { 295*eb9c0419SKris Buschelman /* Form sparse row of A*P */ 296*eb9c0419SKris Buschelman anzi = ai[i+1] - ai[i]; 297*eb9c0419SKris Buschelman apnzj = 0; 298*eb9c0419SKris Buschelman for (j=0;j<anzi;j++) { 299*eb9c0419SKris Buschelman prow = *aj++; 300*eb9c0419SKris Buschelman pnzj = pi[prow+1] - pi[prow]; 301*eb9c0419SKris Buschelman pjj = pj + pi[prow]; 302*eb9c0419SKris Buschelman paj = pa + pi[prow]; 303*eb9c0419SKris Buschelman for (k=0;k<pnzj;k++) { 304*eb9c0419SKris Buschelman if (!apjdense[pjj[k]]) { 305*eb9c0419SKris Buschelman apjdense[pjj[k]] = -1; 306*eb9c0419SKris Buschelman apj[apnzj++] = pjj[k]; 307*eb9c0419SKris Buschelman } 308*eb9c0419SKris Buschelman apa[pjj[k]] += (*aa)*paj[k]; 309*eb9c0419SKris Buschelman } 310*eb9c0419SKris Buschelman flops += 2*pnzj; 311*eb9c0419SKris Buschelman aa++; 312*eb9c0419SKris Buschelman } 313*eb9c0419SKris Buschelman 314*eb9c0419SKris Buschelman /* Sort the j index array for quick sparse axpy. */ 315*eb9c0419SKris Buschelman ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 316*eb9c0419SKris Buschelman 317*eb9c0419SKris Buschelman /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 318*eb9c0419SKris Buschelman pnzi = pi[i+1] - pi[i]; 319*eb9c0419SKris Buschelman for (j=0;j<pnzi;j++) { 320*eb9c0419SKris Buschelman nextap = 0; 321*eb9c0419SKris Buschelman crow = *pJ++; 322*eb9c0419SKris Buschelman cjj = cj + ci[crow]; 323*eb9c0419SKris Buschelman caj = ca + ci[crow]; 324*eb9c0419SKris Buschelman /* Perform sparse axpy operation. Note cjj includes apj. */ 325*eb9c0419SKris Buschelman for (k=0;nextap<apnzj;k++) { 326*eb9c0419SKris Buschelman if (cjj[k]==apj[nextap]) { 327*eb9c0419SKris Buschelman caj[k] += (*pA)*apa[apj[nextap++]]; 328*eb9c0419SKris Buschelman } 329*eb9c0419SKris Buschelman } 330*eb9c0419SKris Buschelman flops += 2*apnzj; 331*eb9c0419SKris Buschelman pA++; 332*eb9c0419SKris Buschelman } 333*eb9c0419SKris Buschelman 334*eb9c0419SKris Buschelman /* Zero the current row info for A*P */ 335*eb9c0419SKris Buschelman for (j=0;j<apnzj;j++) { 336*eb9c0419SKris Buschelman apa[apj[j]] = 0.; 337*eb9c0419SKris Buschelman apjdense[apj[j]] = 0; 338*eb9c0419SKris Buschelman } 339*eb9c0419SKris Buschelman } 340*eb9c0419SKris Buschelman 341*eb9c0419SKris Buschelman /* Assemble the final matrix and clean up */ 342*eb9c0419SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343*eb9c0419SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344*eb9c0419SKris Buschelman ierr = PetscFree(apa);CHKERRQ(ierr); 345*eb9c0419SKris Buschelman ierr = PetscLogFlops(flops);CHKERRQ(ierr); 346*eb9c0419SKris Buschelman ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 347*eb9c0419SKris Buschelman 348*eb9c0419SKris Buschelman PetscFunctionReturn(0); 349*eb9c0419SKris Buschelman } 350*eb9c0419SKris Buschelman EXTERN_C_END 351*eb9c0419SKris Buschelman 352*eb9c0419SKris Buschelman #undef __FUNCT__ 353*eb9c0419SKris Buschelman #define __FUNCT__ "RegisterApplyPtAPRoutines_Private" 354*eb9c0419SKris Buschelman int RegisterApplyPtAPRoutines_Private(Mat A) { 355*eb9c0419SKris Buschelman int ierr; 356*eb9c0419SKris Buschelman 357*eb9c0419SKris Buschelman PetscFunctionBegin; 358*eb9c0419SKris Buschelman 359*eb9c0419SKris Buschelman if (!MATSeqAIJ_PtAP) { 360*eb9c0419SKris Buschelman ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 361*eb9c0419SKris Buschelman } 362*eb9c0419SKris Buschelman 363*eb9c0419SKris Buschelman if (!MATSeqAIJ_PtAPSymbolic) { 364*eb9c0419SKris Buschelman ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr); 365*eb9c0419SKris Buschelman } 366*eb9c0419SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij", 367*eb9c0419SKris Buschelman "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ", 368*eb9c0419SKris Buschelman MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 369*eb9c0419SKris Buschelman 370*eb9c0419SKris Buschelman if (!MATSeqAIJ_PtAPNumeric) { 371*eb9c0419SKris Buschelman ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr); 372*eb9c0419SKris Buschelman } 373*eb9c0419SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij", 374*eb9c0419SKris Buschelman "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ", 375*eb9c0419SKris Buschelman MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 376*eb9c0419SKris Buschelman PetscFunctionReturn(0); 377*eb9c0419SKris Buschelman } 378