1 /* 2 Defines projective product routines where A is a SeqAIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "petscbt.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatPtAP" 12 /*@ 13 MatPtAP - Creates the matrix projection C = P^T * A * P 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the matrix 19 . P - the projection matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 30 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 31 32 Level: intermediate 33 34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 35 @*/ 36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 37 { 38 PetscErrorCode ierr; 39 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 40 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 42 PetscFunctionBegin; 43 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 44 PetscValidType(A,1); 45 MatPreallocated(A); 46 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 47 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 48 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 49 PetscValidType(P,2); 50 MatPreallocated(P); 51 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 52 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 53 PetscValidPointer(C,3); 54 55 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 56 57 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 58 59 /* For now, we do not dispatch based on the type of A and P */ 60 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 61 fA = A->ops->ptap; 62 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 63 fP = P->ops->ptap; 64 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 65 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 66 67 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 68 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 69 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 75 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 76 { 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 if (scall == MAT_INITIAL_MATRIX){ 81 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 82 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 83 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 84 } 85 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 86 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 87 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 /* 92 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 93 94 Collective on Mat 95 96 Input Parameters: 97 + A - the matrix 98 - P - the projection matrix 99 100 Output Parameters: 101 . C - the (i,j) structure of the product matrix 102 103 Notes: 104 C will be created and must be destroyed by the user with MatDestroy(). 105 106 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 107 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 108 this (i,j) structure by calling MatPtAPNumeric(). 109 110 Level: intermediate 111 112 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 113 */ 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatPtAPSymbolic" 116 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 117 { 118 PetscErrorCode ierr; 119 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 120 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 121 122 PetscFunctionBegin; 123 124 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 125 PetscValidType(A,1); 126 MatPreallocated(A); 127 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 128 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 129 130 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 131 PetscValidType(P,2); 132 MatPreallocated(P); 133 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 134 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 135 136 PetscValidPointer(C,3); 137 138 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 139 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 140 141 /* For now, we do not dispatch based on the type of A and P */ 142 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 143 fA = A->ops->ptapsymbolic; 144 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 145 fP = P->ops->ptapsymbolic; 146 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 147 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 148 149 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 150 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 151 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 152 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 158 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 159 { 160 PetscErrorCode ierr; 161 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 162 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 163 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 164 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 165 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 166 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 167 MatScalar *ca; 168 PetscBT lnkbt; 169 170 PetscFunctionBegin; 171 /* Get ij structure of P^T */ 172 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 173 ptJ=ptj; 174 175 /* Allocate ci array, arrays for fill computation and */ 176 /* free space for accumulating nonzero column info */ 177 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 178 ci[0] = 0; 179 180 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 181 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 182 ptasparserow = ptadenserow + an; 183 184 /* create and initialize a linked list */ 185 nlnk = pn+1; 186 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 187 188 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 189 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 190 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 191 current_space = free_space; 192 193 /* Determine symbolic info for each row of C: */ 194 for (i=0;i<pn;i++) { 195 ptnzi = pti[i+1] - pti[i]; 196 ptanzi = 0; 197 /* Determine symbolic row of PtA: */ 198 for (j=0;j<ptnzi;j++) { 199 arow = *ptJ++; 200 anzj = ai[arow+1] - ai[arow]; 201 ajj = aj + ai[arow]; 202 for (k=0;k<anzj;k++) { 203 if (!ptadenserow[ajj[k]]) { 204 ptadenserow[ajj[k]] = -1; 205 ptasparserow[ptanzi++] = ajj[k]; 206 } 207 } 208 } 209 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 210 ptaj = ptasparserow; 211 cnzi = 0; 212 for (j=0;j<ptanzi;j++) { 213 prow = *ptaj++; 214 pnzj = pi[prow+1] - pi[prow]; 215 pjj = pj + pi[prow]; 216 /* add non-zero cols of P into the sorted linked list lnk */ 217 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 218 cnzi += nlnk; 219 } 220 221 /* If free space is not available, make more free space */ 222 /* Double the amount of total space in the list */ 223 if (current_space->local_remaining<cnzi) { 224 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 225 } 226 227 /* Copy data into free space, and zero out denserows */ 228 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 229 current_space->array += cnzi; 230 current_space->local_used += cnzi; 231 current_space->local_remaining -= cnzi; 232 233 for (j=0;j<ptanzi;j++) { 234 ptadenserow[ptasparserow[j]] = 0; 235 } 236 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 237 /* For now, we will recompute what is needed. */ 238 ci[i+1] = ci[i] + cnzi; 239 } 240 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 241 /* Allocate space for cj, initialize cj, and */ 242 /* destroy list of free space and other temporary array(s) */ 243 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 244 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 245 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 246 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 247 248 /* Allocate space for ca */ 249 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 250 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 251 252 /* put together the new matrix */ 253 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 254 255 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 256 /* Since these are PETSc arrays, change flags to free them as necessary. */ 257 c = (Mat_SeqAIJ *)((*C)->data); 258 c->freedata = PETSC_TRUE; 259 c->nonew = 0; 260 261 /* Clean up. */ 262 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 263 264 PetscFunctionReturn(0); 265 } 266 267 #include "src/mat/impls/maij/maij.h" 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 271 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 272 { 273 /* This routine requires testing -- I don't think it works. */ 274 PetscErrorCode ierr; 275 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 276 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 277 Mat P=pp->AIJ; 278 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 279 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 280 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 281 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 282 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 283 MatScalar *ca; 284 285 PetscFunctionBegin; 286 /* Start timer */ 287 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 288 289 /* Get ij structure of P^T */ 290 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 291 292 /* Allocate ci array, arrays for fill computation and */ 293 /* free space for accumulating nonzero column info */ 294 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 295 ci[0] = 0; 296 297 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 298 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 299 ptasparserow = ptadenserow + an; 300 denserow = ptasparserow + an; 301 sparserow = denserow + pn; 302 303 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 304 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 305 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 306 current_space = free_space; 307 308 /* Determine symbolic info for each row of C: */ 309 for (i=0;i<pn/ppdof;i++) { 310 ptnzi = pti[i+1] - pti[i]; 311 ptanzi = 0; 312 ptJ = ptj + pti[i]; 313 for (dof=0;dof<ppdof;dof++) { 314 /* Determine symbolic row of PtA: */ 315 for (j=0;j<ptnzi;j++) { 316 arow = ptJ[j] + dof; 317 anzj = ai[arow+1] - ai[arow]; 318 ajj = aj + ai[arow]; 319 for (k=0;k<anzj;k++) { 320 if (!ptadenserow[ajj[k]]) { 321 ptadenserow[ajj[k]] = -1; 322 ptasparserow[ptanzi++] = ajj[k]; 323 } 324 } 325 } 326 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 327 ptaj = ptasparserow; 328 cnzi = 0; 329 for (j=0;j<ptanzi;j++) { 330 pdof = *ptaj%dof; 331 prow = (*ptaj++)/dof; 332 pnzj = pi[prow+1] - pi[prow]; 333 pjj = pj + pi[prow]; 334 for (k=0;k<pnzj;k++) { 335 if (!denserow[pjj[k]+pdof]) { 336 denserow[pjj[k]+pdof] = -1; 337 sparserow[cnzi++] = pjj[k]+pdof; 338 } 339 } 340 } 341 342 /* sort sparserow */ 343 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 344 345 /* If free space is not available, make more free space */ 346 /* Double the amount of total space in the list */ 347 if (current_space->local_remaining<cnzi) { 348 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 349 } 350 351 /* Copy data into free space, and zero out denserows */ 352 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 353 current_space->array += cnzi; 354 current_space->local_used += cnzi; 355 current_space->local_remaining -= cnzi; 356 357 for (j=0;j<ptanzi;j++) { 358 ptadenserow[ptasparserow[j]] = 0; 359 } 360 for (j=0;j<cnzi;j++) { 361 denserow[sparserow[j]] = 0; 362 } 363 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 364 /* For now, we will recompute what is needed. */ 365 ci[i+1+dof] = ci[i+dof] + cnzi; 366 } 367 } 368 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 369 /* Allocate space for cj, initialize cj, and */ 370 /* destroy list of free space and other temporary array(s) */ 371 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 372 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 373 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 374 375 /* Allocate space for ca */ 376 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 377 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 381 382 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 383 /* Since these are PETSc arrays, change flags to free them as necessary. */ 384 c = (Mat_SeqAIJ *)((*C)->data); 385 c->freedata = PETSC_TRUE; 386 c->nonew = 0; 387 388 /* Clean up. */ 389 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 390 391 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 392 PetscFunctionReturn(0); 393 } 394 EXTERN_C_END 395 396 /* 397 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 398 399 Collective on Mat 400 401 Input Parameters: 402 + A - the matrix 403 - P - the projection matrix 404 405 Output Parameters: 406 . C - the product matrix 407 408 Notes: 409 C must have been created by calling MatPtAPSymbolic and must be destroyed by 410 the user using MatDeatroy(). 411 412 This routine is currently only implemented for pairs of AIJ matrices and classes 413 which inherit from AIJ. C will be of type MATAIJ. 414 415 Level: intermediate 416 417 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 418 */ 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatPtAPNumeric" 421 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 422 { 423 PetscErrorCode ierr; 424 PetscErrorCode (*fA)(Mat,Mat,Mat); 425 PetscErrorCode (*fP)(Mat,Mat,Mat); 426 427 PetscFunctionBegin; 428 429 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 430 PetscValidType(A,1); 431 MatPreallocated(A); 432 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 433 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 434 435 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 436 PetscValidType(P,2); 437 MatPreallocated(P); 438 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 439 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 440 441 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 442 PetscValidType(C,3); 443 MatPreallocated(C); 444 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 445 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 446 447 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 448 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 449 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 450 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 451 452 /* For now, we do not dispatch based on the type of A and P */ 453 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 454 fA = A->ops->ptapnumeric; 455 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 456 fP = P->ops->ptapnumeric; 457 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 458 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 459 460 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 461 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 462 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 463 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 469 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 470 { 471 PetscErrorCode ierr; 472 PetscInt flops=0; 473 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 474 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 475 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 476 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 477 PetscInt *ci=c->i,*cj=c->j,*cjj; 478 PetscInt am=A->M,cn=C->N,cm=C->M; 479 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 480 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 481 482 PetscFunctionBegin; 483 /* Allocate temporary array for storage of one row of A*P */ 484 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 485 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 486 487 apj = (PetscInt *)(apa + cn); 488 apjdense = apj + cn; 489 490 /* Clear old values in C */ 491 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 492 493 for (i=0;i<am;i++) { 494 /* Form sparse row of A*P */ 495 anzi = ai[i+1] - ai[i]; 496 apnzj = 0; 497 for (j=0;j<anzi;j++) { 498 prow = *aj++; 499 pnzj = pi[prow+1] - pi[prow]; 500 pjj = pj + pi[prow]; 501 paj = pa + pi[prow]; 502 for (k=0;k<pnzj;k++) { 503 if (!apjdense[pjj[k]]) { 504 apjdense[pjj[k]] = -1; 505 apj[apnzj++] = pjj[k]; 506 } 507 apa[pjj[k]] += (*aa)*paj[k]; 508 } 509 flops += 2*pnzj; 510 aa++; 511 } 512 513 /* Sort the j index array for quick sparse axpy. */ 514 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 515 516 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 517 pnzi = pi[i+1] - pi[i]; 518 for (j=0;j<pnzi;j++) { 519 nextap = 0; 520 crow = *pJ++; 521 cjj = cj + ci[crow]; 522 caj = ca + ci[crow]; 523 /* Perform sparse axpy operation. Note cjj includes apj. */ 524 for (k=0;nextap<apnzj;k++) { 525 if (cjj[k]==apj[nextap]) { 526 caj[k] += (*pA)*apa[apj[nextap++]]; 527 } 528 } 529 flops += 2*apnzj; 530 pA++; 531 } 532 533 /* Zero the current row info for A*P */ 534 for (j=0;j<apnzj;j++) { 535 apa[apj[j]] = 0.; 536 apjdense[apj[j]] = 0; 537 } 538 } 539 540 /* Assemble the final matrix and clean up */ 541 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 542 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 543 ierr = PetscFree(apa);CHKERRQ(ierr); 544 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 545 546 PetscFunctionReturn(0); 547 } 548