1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatPtAP" 12 /*@ 13 MatPtAP - Creates the matrix projection C = P^T * A * P 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the matrix 19 . P - the projection matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 30 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 31 32 Level: intermediate 33 34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 35 @*/ 36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37 PetscErrorCode ierr; 38 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40 41 PetscFunctionBegin; 42 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 43 PetscValidType(A,1); 44 MatPreallocated(A); 45 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 46 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 47 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 48 PetscValidType(P,2); 49 MatPreallocated(P); 50 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 51 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 52 PetscValidPointer(C,3); 53 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and P */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->ptap; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62 fP = P->ops->ptap; 63 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65 66 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*); 73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*); 74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 78 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 if (scall == MAT_INITIAL_MATRIX){ 84 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 85 } else if (scall == MAT_REUSE_MATRIX){ 86 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 87 } else { 88 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 89 } 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 95 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 96 { 97 PetscErrorCode ierr; 98 Mat P_seq,A_loc,C_seq; 99 int prstart,prend,m=P->m; 100 IS isrowp,iscolp; 101 102 PetscFunctionBegin; 103 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 104 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 105 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 106 107 /* get A_loc = submatrix of A by taking all local rows of A */ 108 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 109 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 110 111 /* compute C_seq = P_loc^T * A_loc * P_seq */ 112 prend = prstart + m; 113 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 114 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 115 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 116 117 /* add C_seq into mpi C */ 118 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 119 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 125 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 126 { 127 PetscErrorCode ierr; 128 Mat P_seq,A_loc,C_seq; 129 int prstart,prend,m=P->m; 130 IS isrowp,iscolp; 131 Mat_Merge_SeqsToMPI *merge; 132 PetscObjectContainer container; 133 134 PetscFunctionBegin; 135 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 136 if (container) { 137 ierr = PetscObjectContainerGetPointer(container,(void *)&merge);CHKERRQ(ierr); 138 } 139 140 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 141 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 142 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 143 144 /* get A_loc = submatrix of A by taking all local rows of A */ 145 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 146 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 147 148 /* compute C_seq = P_loc^T * A_loc * P_seq */ 149 prend = prstart + m; 150 C_seq = merge->C_seq; 151 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 152 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 153 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 154 155 /* add C_seq into mpi C */ 156 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 157 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 163 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 164 { 165 PetscErrorCode ierr; 166 PetscFunctionBegin; 167 if (scall == MAT_INITIAL_MATRIX){ 168 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 169 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 170 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 171 } 172 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 173 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 174 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatPtAPSymbolic" 180 /* 181 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 182 183 Collective on Mat 184 185 Input Parameters: 186 + A - the matrix 187 - P - the projection matrix 188 189 Output Parameters: 190 . C - the (i,j) structure of the product matrix 191 192 Notes: 193 C will be created and must be destroyed by the user with MatDestroy(). 194 195 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 196 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 197 this (i,j) structure by calling MatPtAPNumeric(). 198 199 Level: intermediate 200 201 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 202 */ 203 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 204 PetscErrorCode ierr; 205 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 206 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 207 208 PetscFunctionBegin; 209 210 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 211 PetscValidType(A,1); 212 MatPreallocated(A); 213 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 214 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 215 216 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 217 PetscValidType(P,2); 218 MatPreallocated(P); 219 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 220 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 221 222 PetscValidPointer(C,3); 223 224 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 225 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 226 227 /* For now, we do not dispatch based on the type of A and P */ 228 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 229 fA = A->ops->ptapsymbolic; 230 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 231 fP = P->ops->ptapsymbolic; 232 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 233 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 234 235 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 236 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 237 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 238 239 PetscFunctionReturn(0); 240 } 241 242 typedef struct { 243 Mat symAP; 244 } Mat_PtAPstruct; 245 246 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 250 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 251 { 252 PetscErrorCode ierr; 253 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 254 255 PetscFunctionBegin; 256 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 257 ierr = PetscFree(ptap);CHKERRQ(ierr); 258 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 264 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 265 PetscErrorCode ierr; 266 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 267 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 268 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 269 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 270 int an=A->N,am=A->M,pn=P->N,pm=P->M; 271 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 272 MatScalar *ca; 273 274 PetscFunctionBegin; 275 /* Get ij structure of P^T */ 276 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 277 ptJ=ptj; 278 279 /* Allocate ci array, arrays for fill computation and */ 280 /* free space for accumulating nonzero column info */ 281 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 282 ci[0] = 0; 283 284 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 285 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 286 ptasparserow = ptadenserow + an; 287 denserow = ptasparserow + an; 288 sparserow = denserow + pn; 289 290 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 291 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 292 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 293 current_space = free_space; 294 295 /* Determine symbolic info for each row of C: */ 296 for (i=0;i<pn;i++) { 297 ptnzi = pti[i+1] - pti[i]; 298 ptanzi = 0; 299 /* Determine symbolic row of PtA: */ 300 for (j=0;j<ptnzi;j++) { 301 arow = *ptJ++; 302 anzj = ai[arow+1] - ai[arow]; 303 ajj = aj + ai[arow]; 304 for (k=0;k<anzj;k++) { 305 if (!ptadenserow[ajj[k]]) { 306 ptadenserow[ajj[k]] = -1; 307 ptasparserow[ptanzi++] = ajj[k]; 308 } 309 } 310 } 311 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 312 ptaj = ptasparserow; 313 cnzi = 0; 314 for (j=0;j<ptanzi;j++) { 315 prow = *ptaj++; 316 pnzj = pi[prow+1] - pi[prow]; 317 pjj = pj + pi[prow]; 318 for (k=0;k<pnzj;k++) { 319 if (!denserow[pjj[k]]) { 320 denserow[pjj[k]] = -1; 321 sparserow[cnzi++] = pjj[k]; 322 } 323 } 324 } 325 326 /* sort sparserow */ 327 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 328 329 /* If free space is not available, make more free space */ 330 /* Double the amount of total space in the list */ 331 if (current_space->local_remaining<cnzi) { 332 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 333 } 334 335 /* Copy data into free space, and zero out denserows */ 336 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 337 current_space->array += cnzi; 338 current_space->local_used += cnzi; 339 current_space->local_remaining -= cnzi; 340 341 for (j=0;j<ptanzi;j++) { 342 ptadenserow[ptasparserow[j]] = 0; 343 } 344 for (j=0;j<cnzi;j++) { 345 denserow[sparserow[j]] = 0; 346 } 347 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 348 /* For now, we will recompute what is needed. */ 349 ci[i+1] = ci[i] + cnzi; 350 } 351 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 352 /* Allocate space for cj, initialize cj, and */ 353 /* destroy list of free space and other temporary array(s) */ 354 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 355 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 356 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 357 358 /* Allocate space for ca */ 359 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 360 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 361 362 /* put together the new matrix */ 363 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 364 365 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 366 /* Since these are PETSc arrays, change flags to free them as necessary. */ 367 c = (Mat_SeqAIJ *)((*C)->data); 368 c->freedata = PETSC_TRUE; 369 c->nonew = 0; 370 371 /* Clean up. */ 372 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 373 374 PetscFunctionReturn(0); 375 } 376 377 #include "src/mat/impls/maij/maij.h" 378 EXTERN_C_BEGIN 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 381 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 382 /* This routine requires testing -- I don't think it works. */ 383 PetscErrorCode ierr; 384 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 385 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 386 Mat P=pp->AIJ; 387 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 388 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 389 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 390 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 391 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 392 MatScalar *ca; 393 394 PetscFunctionBegin; 395 /* Start timer */ 396 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 397 398 /* Get ij structure of P^T */ 399 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 400 401 /* Allocate ci array, arrays for fill computation and */ 402 /* free space for accumulating nonzero column info */ 403 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 404 ci[0] = 0; 405 406 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 407 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 408 ptasparserow = ptadenserow + an; 409 denserow = ptasparserow + an; 410 sparserow = denserow + pn; 411 412 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 413 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 414 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 415 current_space = free_space; 416 417 /* Determine symbolic info for each row of C: */ 418 for (i=0;i<pn/ppdof;i++) { 419 ptnzi = pti[i+1] - pti[i]; 420 ptanzi = 0; 421 ptJ = ptj + pti[i]; 422 for (dof=0;dof<ppdof;dof++) { 423 /* Determine symbolic row of PtA: */ 424 for (j=0;j<ptnzi;j++) { 425 arow = ptJ[j] + dof; 426 anzj = ai[arow+1] - ai[arow]; 427 ajj = aj + ai[arow]; 428 for (k=0;k<anzj;k++) { 429 if (!ptadenserow[ajj[k]]) { 430 ptadenserow[ajj[k]] = -1; 431 ptasparserow[ptanzi++] = ajj[k]; 432 } 433 } 434 } 435 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 436 ptaj = ptasparserow; 437 cnzi = 0; 438 for (j=0;j<ptanzi;j++) { 439 pdof = *ptaj%dof; 440 prow = (*ptaj++)/dof; 441 pnzj = pi[prow+1] - pi[prow]; 442 pjj = pj + pi[prow]; 443 for (k=0;k<pnzj;k++) { 444 if (!denserow[pjj[k]+pdof]) { 445 denserow[pjj[k]+pdof] = -1; 446 sparserow[cnzi++] = pjj[k]+pdof; 447 } 448 } 449 } 450 451 /* sort sparserow */ 452 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 453 454 /* If free space is not available, make more free space */ 455 /* Double the amount of total space in the list */ 456 if (current_space->local_remaining<cnzi) { 457 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 458 } 459 460 /* Copy data into free space, and zero out denserows */ 461 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 462 current_space->array += cnzi; 463 current_space->local_used += cnzi; 464 current_space->local_remaining -= cnzi; 465 466 for (j=0;j<ptanzi;j++) { 467 ptadenserow[ptasparserow[j]] = 0; 468 } 469 for (j=0;j<cnzi;j++) { 470 denserow[sparserow[j]] = 0; 471 } 472 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 473 /* For now, we will recompute what is needed. */ 474 ci[i+1+dof] = ci[i+dof] + cnzi; 475 } 476 } 477 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 478 /* Allocate space for cj, initialize cj, and */ 479 /* destroy list of free space and other temporary array(s) */ 480 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 481 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 482 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 483 484 /* Allocate space for ca */ 485 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 486 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 487 488 /* put together the new matrix */ 489 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 490 491 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 492 /* Since these are PETSc arrays, change flags to free them as necessary. */ 493 c = (Mat_SeqAIJ *)((*C)->data); 494 c->freedata = PETSC_TRUE; 495 c->nonew = 0; 496 497 /* Clean up. */ 498 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 499 500 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 EXTERN_C_END 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatPtAPNumeric" 507 /* 508 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 509 510 Collective on Mat 511 512 Input Parameters: 513 + A - the matrix 514 - P - the projection matrix 515 516 Output Parameters: 517 . C - the product matrix 518 519 Notes: 520 C must have been created by calling MatPtAPSymbolic and must be destroyed by 521 the user using MatDeatroy(). 522 523 This routine is currently only implemented for pairs of AIJ matrices and classes 524 which inherit from AIJ. C will be of type MATAIJ. 525 526 Level: intermediate 527 528 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 529 */ 530 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 531 PetscErrorCode ierr; 532 PetscErrorCode (*fA)(Mat,Mat,Mat); 533 PetscErrorCode (*fP)(Mat,Mat,Mat); 534 535 PetscFunctionBegin; 536 537 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 538 PetscValidType(A,1); 539 MatPreallocated(A); 540 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 541 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 542 543 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 544 PetscValidType(P,2); 545 MatPreallocated(P); 546 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 547 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 548 549 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 550 PetscValidType(C,3); 551 MatPreallocated(C); 552 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 553 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 554 555 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 556 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 557 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 558 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 559 560 /* For now, we do not dispatch based on the type of A and P */ 561 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 562 fA = A->ops->ptapnumeric; 563 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 564 fP = P->ops->ptapnumeric; 565 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 566 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 567 568 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 569 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 570 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 571 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 577 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 578 { 579 PetscErrorCode ierr; 580 int flops=0; 581 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 582 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 583 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 584 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 585 int *ci=c->i,*cj=c->j,*cjj; 586 int am=A->M,cn=C->N,cm=C->M; 587 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 588 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 589 590 PetscFunctionBegin; 591 /* Allocate temporary array for storage of one row of A*P */ 592 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 593 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 594 595 apj = (int *)(apa + cn); 596 apjdense = apj + cn; 597 598 /* Clear old values in C */ 599 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 600 601 for (i=0;i<am;i++) { 602 /* Form sparse row of A*P */ 603 anzi = ai[i+1] - ai[i]; 604 apnzj = 0; 605 for (j=0;j<anzi;j++) { 606 prow = *aj++; 607 pnzj = pi[prow+1] - pi[prow]; 608 pjj = pj + pi[prow]; 609 paj = pa + pi[prow]; 610 for (k=0;k<pnzj;k++) { 611 if (!apjdense[pjj[k]]) { 612 apjdense[pjj[k]] = -1; 613 apj[apnzj++] = pjj[k]; 614 } 615 apa[pjj[k]] += (*aa)*paj[k]; 616 } 617 flops += 2*pnzj; 618 aa++; 619 } 620 621 /* Sort the j index array for quick sparse axpy. */ 622 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 623 624 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 625 pnzi = pi[i+1] - pi[i]; 626 for (j=0;j<pnzi;j++) { 627 nextap = 0; 628 crow = *pJ++; 629 cjj = cj + ci[crow]; 630 caj = ca + ci[crow]; 631 /* Perform sparse axpy operation. Note cjj includes apj. */ 632 for (k=0;nextap<apnzj;k++) { 633 if (cjj[k]==apj[nextap]) { 634 caj[k] += (*pA)*apa[apj[nextap++]]; 635 } 636 } 637 flops += 2*apnzj; 638 pA++; 639 } 640 641 /* Zero the current row info for A*P */ 642 for (j=0;j<apnzj;j++) { 643 apa[apj[j]] = 0.; 644 apjdense[apj[j]] = 0; 645 } 646 } 647 648 /* Assemble the final matrix and clean up */ 649 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 650 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 651 ierr = PetscFree(apa);CHKERRQ(ierr); 652 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 653 654 PetscFunctionReturn(0); 655 } 656 657 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 658 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 661 /*@ 662 MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 663 used by MatPtAP_MPIAIJ_MPIAIJ() 664 665 Collective on Mat 666 667 Input Parameters: 668 + A - the matrix in seqaij format 669 . P - the projection matrix in seqaij format 670 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 671 . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 672 . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 673 674 Output Parameters: 675 . C - the product matrix in seqaij format 676 677 Level: developer 678 679 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 680 @*/ 681 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C) 682 { 683 PetscErrorCode ierr; 684 int flops=0; 685 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 686 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 687 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 688 int *ai=a->i,*aj=a->j,*apj,*apjdense; 689 int *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 690 int *ci=c->i,*cj=c->j,*cjj; 691 int am=A->M,cn=C->N,cm=C->M; 692 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 693 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 694 695 PetscFunctionBegin; 696 pA=p->a+pi[prstart]; 697 /* Allocate temporary array for storage of one row of A*P */ 698 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 699 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 700 701 apj = (int *)(apa + cn); 702 apjdense = apj + cn; 703 704 /* Clear old values in C */ 705 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 706 707 for (i=0;i<am;i++) { 708 /* Form sparse row of A*P */ 709 anzi = ai[i+1] - ai[i]; 710 apnzj = 0; 711 for (j=0;j<anzi;j++) { 712 prow = *aj++; 713 pnzj = pi[prow+1] - pi[prow]; 714 pjj = pj + pi[prow]; 715 paj = pa + pi[prow]; 716 for (k=0;k<pnzj;k++) { 717 if (!apjdense[pjj[k]]) { 718 apjdense[pjj[k]] = -1; 719 apj[apnzj++] = pjj[k]; 720 } 721 apa[pjj[k]] += (*aa)*paj[k]; 722 } 723 flops += 2*pnzj; 724 aa++; 725 } 726 727 /* Sort the j index array for quick sparse axpy. */ 728 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 729 730 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 731 pnzi = pi[i+1+prstart] - pi[i+prstart]; 732 for (j=0;j<pnzi;j++) { 733 nextap = 0; 734 crow = *pJ++; 735 cjj = cj + ci[crow]; 736 caj = ca + ci[crow]; 737 /* Perform sparse axpy operation. Note cjj includes apj. */ 738 for (k=0;nextap<apnzj;k++) { 739 if (cjj[k]==apj[nextap]) { 740 caj[k] += (*pA)*apa[apj[nextap++]]; 741 } 742 } 743 flops += 2*apnzj; 744 pA++; 745 } 746 747 /* Zero the current row info for A*P */ 748 for (j=0;j<apnzj;j++) { 749 apa[apj[j]] = 0.; 750 apjdense[apj[j]] = 0; 751 } 752 } 753 754 /* Assemble the final matrix and clean up */ 755 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 ierr = PetscFree(apa);CHKERRQ(ierr); 758 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 759 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 765 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) { 766 PetscErrorCode ierr; 767 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 768 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 769 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 770 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 771 int an=A->N,am=A->M,pn=P->N,pm=P->M; 772 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 773 MatScalar *ca; 774 Mat *psub,P_sub; 775 IS isrow,iscol; 776 int m = prend - prstart; 777 778 PetscFunctionBegin; 779 /* Get ij structure of P[rstart:rend,:]^T */ 780 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 781 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 782 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 783 ierr = ISDestroy(isrow);CHKERRQ(ierr); 784 ierr = ISDestroy(iscol);CHKERRQ(ierr); 785 P_sub = psub[0]; 786 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 787 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 788 ptJ=ptj; 789 790 /* Allocate ci array, arrays for fill computation and */ 791 /* free space for accumulating nonzero column info */ 792 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 793 ci[0] = 0; 794 795 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 796 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 797 ptasparserow = ptadenserow + an; 798 denserow = ptasparserow + an; 799 sparserow = denserow + pn; 800 801 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 802 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 803 ierr = GetMoreSpace((int)(fill*ai[am]/pm)*pn,&free_space); 804 current_space = free_space; 805 806 /* Determine symbolic info for each row of C: */ 807 for (i=0;i<pn;i++) { 808 ptnzi = pti[i+1] - pti[i]; 809 ptanzi = 0; 810 /* Determine symbolic row of PtA_reduced: */ 811 for (j=0;j<ptnzi;j++) { 812 arow = *ptJ++; 813 anzj = ai[arow+1] - ai[arow]; 814 ajj = aj + ai[arow]; 815 for (k=0;k<anzj;k++) { 816 if (!ptadenserow[ajj[k]]) { 817 ptadenserow[ajj[k]] = -1; 818 ptasparserow[ptanzi++] = ajj[k]; 819 } 820 } 821 } 822 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 823 ptaj = ptasparserow; 824 cnzi = 0; 825 for (j=0;j<ptanzi;j++) { 826 prow = *ptaj++; 827 pnzj = pi[prow+1] - pi[prow]; 828 pjj = pj + pi[prow]; 829 for (k=0;k<pnzj;k++) { 830 if (!denserow[pjj[k]]) { 831 denserow[pjj[k]] = -1; 832 sparserow[cnzi++] = pjj[k]; 833 } 834 } 835 } 836 837 /* sort sparserow */ 838 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 839 840 /* If free space is not available, make more free space */ 841 /* Double the amount of total space in the list */ 842 if (current_space->local_remaining<cnzi) { 843 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 844 } 845 846 /* Copy data into free space, and zero out denserows */ 847 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 848 current_space->array += cnzi; 849 current_space->local_used += cnzi; 850 current_space->local_remaining -= cnzi; 851 852 for (j=0;j<ptanzi;j++) { 853 ptadenserow[ptasparserow[j]] = 0; 854 } 855 for (j=0;j<cnzi;j++) { 856 denserow[sparserow[j]] = 0; 857 } 858 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 859 /* For now, we will recompute what is needed. */ 860 ci[i+1] = ci[i] + cnzi; 861 } 862 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 863 /* Allocate space for cj, initialize cj, and */ 864 /* destroy list of free space and other temporary array(s) */ 865 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 866 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 867 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 868 869 /* Allocate space for ca */ 870 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 871 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 872 873 /* put together the new matrix */ 874 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 875 876 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 877 /* Since these are PETSc arrays, change flags to free them as necessary. */ 878 c = (Mat_SeqAIJ *)((*C)->data); 879 c->freedata = PETSC_TRUE; 880 c->nonew = 0; 881 882 /* Clean up. */ 883 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 884 885 PetscFunctionReturn(0); 886 } 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 890 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C) 891 { 892 PetscErrorCode ierr; 893 PetscFunctionBegin; 894 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 895 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 896 if (scall == MAT_INITIAL_MATRIX){ 897 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 898 } 899 900 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 901 902 PetscFunctionReturn(0); 903 } 904 905