1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 EXTERN int RegisterMatMatMultRoutines_Private(Mat); 11 12 static int MAT_PtAPSymbolic = 0; 13 static int MAT_PtAPNumeric = 0; 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatPtAP" 17 /*@ 18 MatPtAP - Creates the matrix projection C = P^T * A * P 19 20 Collective on Mat 21 22 Input Parameters: 23 + A - the matrix 24 - P - the projection matrix 25 26 Output Parameters: 27 . C - the product matrix 28 29 Notes: 30 C will be created and must be destroyed by the user with MatDestroy(). 31 32 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 33 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 34 35 Level: intermediate 36 37 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 38 @*/ 39 int MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 40 int ierr; 41 42 PetscFunctionBegin; 43 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 44 PetscValidType(A,1); 45 MatPreallocated(A); 46 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 47 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 48 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 49 PetscValidType(P,2); 50 MatPreallocated(P); 51 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 52 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 53 PetscValidPointer(C,3); 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 59 ierr = (*A->ops->matptap)(A,P,scall,fill,C);CHKERRQ(ierr); 60 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 66 int MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 67 { 68 int ierr; 69 PetscFunctionBegin; 70 if (scall == MAT_INITIAL_MATRIX){ 71 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 72 } 73 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatPtAPSymbolic" 79 /*@ 80 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 81 82 Collective on Mat 83 84 Input Parameters: 85 + A - the matrix 86 - P - the projection matrix 87 88 Output Parameters: 89 . C - the (i,j) structure of the product matrix 90 91 Notes: 92 C will be created and must be destroyed by the user with MatDestroy(). 93 94 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 95 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 96 this (i,j) structure by calling MatPtAPNumeric(). 97 98 Level: intermediate 99 100 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 101 @*/ 102 int MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 103 int ierr; 104 char funct[80]; 105 int (*f)(Mat,Mat,Mat*); 106 107 PetscFunctionBegin; 108 109 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 110 PetscValidType(A,1); 111 MatPreallocated(A); 112 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 113 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 114 115 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 116 PetscValidType(P,2); 117 MatPreallocated(P); 118 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 119 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 120 121 PetscValidPointer(C,3); 122 123 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 124 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 125 126 /* Currently only _seqaij_seqaij is implemented, so just query for it. */ 127 /* When other implementations exist, attack the multiple dispatch problem. */ 128 ierr = PetscStrcpy(funct,"MatPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr); 129 ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 130 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for P of type %s",P->type_name); 131 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 132 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for A of type %s",A->type_name); 133 134 ierr = (*f)(A,P,C);CHKERRQ(ierr); 135 136 PetscFunctionReturn(0); 137 } 138 139 EXTERN_C_BEGIN 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 142 int MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 143 int ierr; 144 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 145 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 146 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 147 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 148 int an=A->N,am=A->M,pn=P->N,pm=P->M; 149 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 150 MatScalar *ca; 151 152 PetscFunctionBegin; 153 154 /* Start timer */ 155 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 156 157 /* Get ij structure of P^T */ 158 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 159 ptJ=ptj; 160 161 /* Allocate ci array, arrays for fill computation and */ 162 /* free space for accumulating nonzero column info */ 163 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 164 ci[0] = 0; 165 166 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 167 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 168 ptasparserow = ptadenserow + an; 169 denserow = ptasparserow + an; 170 sparserow = denserow + pn; 171 172 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 173 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 174 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 175 current_space = free_space; 176 177 /* Determine symbolic info for each row of C: */ 178 for (i=0;i<pn;i++) { 179 ptnzi = pti[i+1] - pti[i]; 180 ptanzi = 0; 181 /* Determine symbolic row of PtA: */ 182 for (j=0;j<ptnzi;j++) { 183 arow = *ptJ++; 184 anzj = ai[arow+1] - ai[arow]; 185 ajj = aj + ai[arow]; 186 for (k=0;k<anzj;k++) { 187 if (!ptadenserow[ajj[k]]) { 188 ptadenserow[ajj[k]] = -1; 189 ptasparserow[ptanzi++] = ajj[k]; 190 } 191 } 192 } 193 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 194 ptaj = ptasparserow; 195 cnzi = 0; 196 for (j=0;j<ptanzi;j++) { 197 prow = *ptaj++; 198 pnzj = pi[prow+1] - pi[prow]; 199 pjj = pj + pi[prow]; 200 for (k=0;k<pnzj;k++) { 201 if (!denserow[pjj[k]]) { 202 denserow[pjj[k]] = -1; 203 sparserow[cnzi++] = pjj[k]; 204 } 205 } 206 } 207 208 /* sort sparserow */ 209 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 210 211 /* If free space is not available, make more free space */ 212 /* Double the amount of total space in the list */ 213 if (current_space->local_remaining<cnzi) { 214 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 215 } 216 217 /* Copy data into free space, and zero out denserows */ 218 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 219 current_space->array += cnzi; 220 current_space->local_used += cnzi; 221 current_space->local_remaining -= cnzi; 222 223 for (j=0;j<ptanzi;j++) { 224 ptadenserow[ptasparserow[j]] = 0; 225 } 226 for (j=0;j<cnzi;j++) { 227 denserow[sparserow[j]] = 0; 228 } 229 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 230 /* For now, we will recompute what is needed. */ 231 ci[i+1] = ci[i] + cnzi; 232 } 233 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 234 /* Allocate space for cj, initialize cj, and */ 235 /* destroy list of free space and other temporary array(s) */ 236 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 237 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 238 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 239 240 /* Allocate space for ca */ 241 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 242 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 243 244 /* put together the new matrix */ 245 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 246 247 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 248 /* Since these are PETSc arrays, change flags to free them as necessary. */ 249 c = (Mat_SeqAIJ *)((*C)->data); 250 c->freedata = PETSC_TRUE; 251 c->nonew = 0; 252 253 /* Clean up. */ 254 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 255 256 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 257 PetscFunctionReturn(0); 258 } 259 EXTERN_C_END 260 261 #include "src/mat/impls/maij/maij.h" 262 EXTERN_C_BEGIN 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 265 int MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 266 /* This routine requires testing -- I don't think it works. */ 267 int ierr; 268 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 269 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 270 Mat P=pp->AIJ; 271 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 272 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 273 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 274 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 275 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 276 MatScalar *ca; 277 278 PetscFunctionBegin; 279 /* Start timer */ 280 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 281 282 /* Get ij structure of P^T */ 283 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 284 285 /* Allocate ci array, arrays for fill computation and */ 286 /* free space for accumulating nonzero column info */ 287 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 288 ci[0] = 0; 289 290 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 291 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 292 ptasparserow = ptadenserow + an; 293 denserow = ptasparserow + an; 294 sparserow = denserow + pn; 295 296 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 297 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 298 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 299 current_space = free_space; 300 301 /* Determine symbolic info for each row of C: */ 302 for (i=0;i<pn/ppdof;i++) { 303 ptnzi = pti[i+1] - pti[i]; 304 ptanzi = 0; 305 ptJ = ptj + pti[i]; 306 for (dof=0;dof<ppdof;dof++) { 307 /* Determine symbolic row of PtA: */ 308 for (j=0;j<ptnzi;j++) { 309 arow = ptJ[j] + dof; 310 anzj = ai[arow+1] - ai[arow]; 311 ajj = aj + ai[arow]; 312 for (k=0;k<anzj;k++) { 313 if (!ptadenserow[ajj[k]]) { 314 ptadenserow[ajj[k]] = -1; 315 ptasparserow[ptanzi++] = ajj[k]; 316 } 317 } 318 } 319 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 320 ptaj = ptasparserow; 321 cnzi = 0; 322 for (j=0;j<ptanzi;j++) { 323 pdof = *ptaj%dof; 324 prow = (*ptaj++)/dof; 325 pnzj = pi[prow+1] - pi[prow]; 326 pjj = pj + pi[prow]; 327 for (k=0;k<pnzj;k++) { 328 if (!denserow[pjj[k]+pdof]) { 329 denserow[pjj[k]+pdof] = -1; 330 sparserow[cnzi++] = pjj[k]+pdof; 331 } 332 } 333 } 334 335 /* sort sparserow */ 336 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 337 338 /* If free space is not available, make more free space */ 339 /* Double the amount of total space in the list */ 340 if (current_space->local_remaining<cnzi) { 341 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 342 } 343 344 /* Copy data into free space, and zero out denserows */ 345 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 346 current_space->array += cnzi; 347 current_space->local_used += cnzi; 348 current_space->local_remaining -= cnzi; 349 350 for (j=0;j<ptanzi;j++) { 351 ptadenserow[ptasparserow[j]] = 0; 352 } 353 for (j=0;j<cnzi;j++) { 354 denserow[sparserow[j]] = 0; 355 } 356 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 357 /* For now, we will recompute what is needed. */ 358 ci[i+1+dof] = ci[i+dof] + cnzi; 359 } 360 } 361 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 362 /* Allocate space for cj, initialize cj, and */ 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 365 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 366 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 367 368 /* Allocate space for ca */ 369 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 370 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 371 372 /* put together the new matrix */ 373 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 374 375 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 376 /* Since these are PETSc arrays, change flags to free them as necessary. */ 377 c = (Mat_SeqAIJ *)((*C)->data); 378 c->freedata = PETSC_TRUE; 379 c->nonew = 0; 380 381 /* Clean up. */ 382 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 383 384 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 EXTERN_C_END 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatPtAPNumeric" 391 /*@ 392 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 393 394 Collective on Mat 395 396 Input Parameters: 397 + A - the matrix 398 - P - the projection matrix 399 400 Output Parameters: 401 . C - the product matrix 402 403 Notes: 404 C must have been created by calling MatPtAPSymbolic and must be destroyed by 405 the user using MatDeatroy(). 406 407 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 408 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 409 410 Level: intermediate 411 412 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 413 @*/ 414 int MatPtAPNumeric(Mat A,Mat P,Mat C) { 415 int ierr; 416 char funct[80]; 417 int (*f)(Mat,Mat,Mat); 418 419 PetscFunctionBegin; 420 421 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 422 PetscValidType(A,1); 423 MatPreallocated(A); 424 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 425 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 426 427 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 428 PetscValidType(P,2); 429 MatPreallocated(P); 430 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 431 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 432 433 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 434 PetscValidType(C,3); 435 MatPreallocated(C); 436 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 437 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 438 439 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 440 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 441 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 442 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 443 444 /* Currently only _seqaij_seqaij is implemented, so just query for it. */ 445 /* When other implementations exist, attack the multiple dispatch problem. */ 446 ierr = PetscStrcpy(funct,"MatPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr); 447 ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 448 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for P of type %s",P->type_name); 449 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 450 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for A of type %s",A->type_name); 451 452 ierr = (*f)(A,P,C);CHKERRQ(ierr); 453 454 PetscFunctionReturn(0); 455 } 456 457 EXTERN_C_BEGIN 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 460 int MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 461 int ierr,flops=0; 462 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 463 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 464 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 465 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 466 int *ci=c->i,*cj=c->j,*cjj; 467 int am=A->M,cn=C->N,cm=C->M; 468 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 469 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 470 471 PetscFunctionBegin; 472 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 473 474 /* Allocate temporary array for storage of one row of A*P */ 475 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 476 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 477 478 apj = (int*)(apa + cn); 479 apjdense = apj + cn; 480 481 /* Clear old values in C */ 482 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 483 484 for (i=0;i<am;i++) { 485 /* Form sparse row of A*P */ 486 anzi = ai[i+1] - ai[i]; 487 apnzj = 0; 488 for (j=0;j<anzi;j++) { 489 prow = *aj++; 490 pnzj = pi[prow+1] - pi[prow]; 491 pjj = pj + pi[prow]; 492 paj = pa + pi[prow]; 493 for (k=0;k<pnzj;k++) { 494 if (!apjdense[pjj[k]]) { 495 apjdense[pjj[k]] = -1; 496 apj[apnzj++] = pjj[k]; 497 } 498 apa[pjj[k]] += (*aa)*paj[k]; 499 } 500 flops += 2*pnzj; 501 aa++; 502 } 503 504 /* Sort the j index array for quick sparse axpy. */ 505 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 506 507 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 508 pnzi = pi[i+1] - pi[i]; 509 for (j=0;j<pnzi;j++) { 510 nextap = 0; 511 crow = *pJ++; 512 cjj = cj + ci[crow]; 513 caj = ca + ci[crow]; 514 /* Perform sparse axpy operation. Note cjj includes apj. */ 515 for (k=0;nextap<apnzj;k++) { 516 if (cjj[k]==apj[nextap]) { 517 caj[k] += (*pA)*apa[apj[nextap++]]; 518 } 519 } 520 flops += 2*apnzj; 521 pA++; 522 } 523 524 /* Zero the current row info for A*P */ 525 for (j=0;j<apnzj;j++) { 526 apa[apj[j]] = 0.; 527 apjdense[apj[j]] = 0; 528 } 529 } 530 531 /* Assemble the final matrix and clean up */ 532 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 ierr = PetscFree(apa);CHKERRQ(ierr); 535 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 536 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 EXTERN_C_END 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "RegisterPtAPRoutines_Private" 544 int RegisterPtAPRoutines_Private(Mat A) { 545 int ierr; 546 547 PetscFunctionBegin; 548 if (!MAT_PtAPSymbolic) { 549 ierr = PetscLogEventRegister(&MAT_PtAPSymbolic,"MatPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr); 550 } 551 if (!MAT_PtAPNumeric) { 552 ierr = PetscLogEventRegister(&MAT_PtAPNumeric,"MatPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr); 553 } 554 ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr); 555 556 PetscFunctionReturn(0); 557 } 558