1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatPtAP" 12 /*@ 13 MatPtAP - Creates the matrix projection C = P^T * A * P 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the matrix 19 . P - the projection matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 30 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 31 32 Level: intermediate 33 34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 35 @*/ 36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37 PetscErrorCode ierr; 38 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40 41 PetscFunctionBegin; 42 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 43 PetscValidType(A,1); 44 MatPreallocated(A); 45 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 46 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 47 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 48 PetscValidType(P,2); 49 MatPreallocated(P); 50 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 51 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 52 PetscValidPointer(C,3); 53 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and P */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->ptap; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62 fP = P->ops->ptap; 63 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65 66 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*); 73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*); 74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 78 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 if (scall == MAT_INITIAL_MATRIX){ 84 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 85 } else if (scall == MAT_REUSE_MATRIX){ 86 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 87 } else { 88 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 89 } 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 95 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 96 { 97 PetscErrorCode ierr; 98 Mat P_seq,A_loc,C_seq; 99 int prstart,prend,m=P->m; 100 IS isrowp,iscolp; 101 102 PetscFunctionBegin; 103 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 104 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 105 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 106 107 /* get A_loc = submatrix of A by taking all local rows of A */ 108 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 109 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 110 111 /* compute C_seq = P_loc^T * A_loc * P_seq */ 112 prend = prstart + m; 113 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 114 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 115 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 116 117 /* add C_seq into mpi C */ 118 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 119 /* ierr = MatDestroy(C_seq);CHKERRQ(ierr); */ 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 126 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 127 { 128 PetscErrorCode ierr; 129 Mat P_seq,A_loc,C_seq; 130 int prstart,prend,m=P->m; 131 IS isrowp,iscolp; 132 Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)C->spptr; 133 134 PetscFunctionBegin; 135 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 136 ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 137 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 138 139 /* get A_loc = submatrix of A by taking all local rows of A */ 140 ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 141 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 142 143 /* compute C_seq = P_loc^T * A_loc * P_seq */ 144 prend = prstart + m; 145 C_seq = merge->C_seq; 146 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 147 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 148 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 149 150 /* add C_seq into mpi C */ 151 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 152 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 158 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 159 { 160 PetscErrorCode ierr; 161 PetscFunctionBegin; 162 if (scall == MAT_INITIAL_MATRIX){ 163 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 164 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 165 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 166 } 167 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 168 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 169 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatPtAPSymbolic" 175 /* 176 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 177 178 Collective on Mat 179 180 Input Parameters: 181 + A - the matrix 182 - P - the projection matrix 183 184 Output Parameters: 185 . C - the (i,j) structure of the product matrix 186 187 Notes: 188 C will be created and must be destroyed by the user with MatDestroy(). 189 190 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 191 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 192 this (i,j) structure by calling MatPtAPNumeric(). 193 194 Level: intermediate 195 196 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 197 */ 198 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 199 PetscErrorCode ierr; 200 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 201 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 202 203 PetscFunctionBegin; 204 205 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 206 PetscValidType(A,1); 207 MatPreallocated(A); 208 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 209 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 210 211 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 212 PetscValidType(P,2); 213 MatPreallocated(P); 214 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 215 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 216 217 PetscValidPointer(C,3); 218 219 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 220 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 221 222 /* For now, we do not dispatch based on the type of A and P */ 223 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 224 fA = A->ops->ptapsymbolic; 225 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 226 fP = P->ops->ptapsymbolic; 227 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 228 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 229 230 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 231 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 232 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 233 234 PetscFunctionReturn(0); 235 } 236 237 typedef struct { 238 Mat symAP; 239 } Mat_PtAPstruct; 240 241 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 245 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 246 { 247 PetscErrorCode ierr; 248 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 249 250 PetscFunctionBegin; 251 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 252 ierr = PetscFree(ptap);CHKERRQ(ierr); 253 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 259 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 260 PetscErrorCode ierr; 261 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 262 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 263 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 264 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 265 int an=A->N,am=A->M,pn=P->N,pm=P->M; 266 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 267 MatScalar *ca; 268 269 PetscFunctionBegin; 270 /* Get ij structure of P^T */ 271 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 272 ptJ=ptj; 273 274 /* Allocate ci array, arrays for fill computation and */ 275 /* free space for accumulating nonzero column info */ 276 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 277 ci[0] = 0; 278 279 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 280 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 281 ptasparserow = ptadenserow + an; 282 denserow = ptasparserow + an; 283 sparserow = denserow + pn; 284 285 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 286 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 287 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 288 current_space = free_space; 289 290 /* Determine symbolic info for each row of C: */ 291 for (i=0;i<pn;i++) { 292 ptnzi = pti[i+1] - pti[i]; 293 ptanzi = 0; 294 /* Determine symbolic row of PtA: */ 295 for (j=0;j<ptnzi;j++) { 296 arow = *ptJ++; 297 anzj = ai[arow+1] - ai[arow]; 298 ajj = aj + ai[arow]; 299 for (k=0;k<anzj;k++) { 300 if (!ptadenserow[ajj[k]]) { 301 ptadenserow[ajj[k]] = -1; 302 ptasparserow[ptanzi++] = ajj[k]; 303 } 304 } 305 } 306 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 307 ptaj = ptasparserow; 308 cnzi = 0; 309 for (j=0;j<ptanzi;j++) { 310 prow = *ptaj++; 311 pnzj = pi[prow+1] - pi[prow]; 312 pjj = pj + pi[prow]; 313 for (k=0;k<pnzj;k++) { 314 if (!denserow[pjj[k]]) { 315 denserow[pjj[k]] = -1; 316 sparserow[cnzi++] = pjj[k]; 317 } 318 } 319 } 320 321 /* sort sparserow */ 322 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 323 324 /* If free space is not available, make more free space */ 325 /* Double the amount of total space in the list */ 326 if (current_space->local_remaining<cnzi) { 327 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 328 } 329 330 /* Copy data into free space, and zero out denserows */ 331 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 332 current_space->array += cnzi; 333 current_space->local_used += cnzi; 334 current_space->local_remaining -= cnzi; 335 336 for (j=0;j<ptanzi;j++) { 337 ptadenserow[ptasparserow[j]] = 0; 338 } 339 for (j=0;j<cnzi;j++) { 340 denserow[sparserow[j]] = 0; 341 } 342 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 343 /* For now, we will recompute what is needed. */ 344 ci[i+1] = ci[i] + cnzi; 345 } 346 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 347 /* Allocate space for cj, initialize cj, and */ 348 /* destroy list of free space and other temporary array(s) */ 349 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 350 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 351 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 352 353 /* Allocate space for ca */ 354 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 355 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 356 357 /* put together the new matrix */ 358 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 359 360 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 361 /* Since these are PETSc arrays, change flags to free them as necessary. */ 362 c = (Mat_SeqAIJ *)((*C)->data); 363 c->freedata = PETSC_TRUE; 364 c->nonew = 0; 365 366 /* Clean up. */ 367 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 368 369 PetscFunctionReturn(0); 370 } 371 372 #include "src/mat/impls/maij/maij.h" 373 EXTERN_C_BEGIN 374 #undef __FUNCT__ 375 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 376 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 377 /* This routine requires testing -- I don't think it works. */ 378 PetscErrorCode ierr; 379 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 380 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 381 Mat P=pp->AIJ; 382 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 383 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 384 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 385 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 386 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 387 MatScalar *ca; 388 389 PetscFunctionBegin; 390 /* Start timer */ 391 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 392 393 /* Get ij structure of P^T */ 394 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 395 396 /* Allocate ci array, arrays for fill computation and */ 397 /* free space for accumulating nonzero column info */ 398 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 399 ci[0] = 0; 400 401 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 402 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 403 ptasparserow = ptadenserow + an; 404 denserow = ptasparserow + an; 405 sparserow = denserow + pn; 406 407 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 408 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 409 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 410 current_space = free_space; 411 412 /* Determine symbolic info for each row of C: */ 413 for (i=0;i<pn/ppdof;i++) { 414 ptnzi = pti[i+1] - pti[i]; 415 ptanzi = 0; 416 ptJ = ptj + pti[i]; 417 for (dof=0;dof<ppdof;dof++) { 418 /* Determine symbolic row of PtA: */ 419 for (j=0;j<ptnzi;j++) { 420 arow = ptJ[j] + dof; 421 anzj = ai[arow+1] - ai[arow]; 422 ajj = aj + ai[arow]; 423 for (k=0;k<anzj;k++) { 424 if (!ptadenserow[ajj[k]]) { 425 ptadenserow[ajj[k]] = -1; 426 ptasparserow[ptanzi++] = ajj[k]; 427 } 428 } 429 } 430 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 431 ptaj = ptasparserow; 432 cnzi = 0; 433 for (j=0;j<ptanzi;j++) { 434 pdof = *ptaj%dof; 435 prow = (*ptaj++)/dof; 436 pnzj = pi[prow+1] - pi[prow]; 437 pjj = pj + pi[prow]; 438 for (k=0;k<pnzj;k++) { 439 if (!denserow[pjj[k]+pdof]) { 440 denserow[pjj[k]+pdof] = -1; 441 sparserow[cnzi++] = pjj[k]+pdof; 442 } 443 } 444 } 445 446 /* sort sparserow */ 447 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 448 449 /* If free space is not available, make more free space */ 450 /* Double the amount of total space in the list */ 451 if (current_space->local_remaining<cnzi) { 452 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 453 } 454 455 /* Copy data into free space, and zero out denserows */ 456 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 457 current_space->array += cnzi; 458 current_space->local_used += cnzi; 459 current_space->local_remaining -= cnzi; 460 461 for (j=0;j<ptanzi;j++) { 462 ptadenserow[ptasparserow[j]] = 0; 463 } 464 for (j=0;j<cnzi;j++) { 465 denserow[sparserow[j]] = 0; 466 } 467 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 468 /* For now, we will recompute what is needed. */ 469 ci[i+1+dof] = ci[i+dof] + cnzi; 470 } 471 } 472 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 473 /* Allocate space for cj, initialize cj, and */ 474 /* destroy list of free space and other temporary array(s) */ 475 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 476 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 477 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 478 479 /* Allocate space for ca */ 480 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 481 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 482 483 /* put together the new matrix */ 484 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 485 486 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 487 /* Since these are PETSc arrays, change flags to free them as necessary. */ 488 c = (Mat_SeqAIJ *)((*C)->data); 489 c->freedata = PETSC_TRUE; 490 c->nonew = 0; 491 492 /* Clean up. */ 493 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 494 495 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 EXTERN_C_END 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatPtAPNumeric" 502 /* 503 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 504 505 Collective on Mat 506 507 Input Parameters: 508 + A - the matrix 509 - P - the projection matrix 510 511 Output Parameters: 512 . C - the product matrix 513 514 Notes: 515 C must have been created by calling MatPtAPSymbolic and must be destroyed by 516 the user using MatDeatroy(). 517 518 This routine is currently only implemented for pairs of AIJ matrices and classes 519 which inherit from AIJ. C will be of type MATAIJ. 520 521 Level: intermediate 522 523 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 524 */ 525 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 526 PetscErrorCode ierr; 527 PetscErrorCode (*fA)(Mat,Mat,Mat); 528 PetscErrorCode (*fP)(Mat,Mat,Mat); 529 530 PetscFunctionBegin; 531 532 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 533 PetscValidType(A,1); 534 MatPreallocated(A); 535 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 536 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 537 538 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 539 PetscValidType(P,2); 540 MatPreallocated(P); 541 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 542 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 543 544 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 545 PetscValidType(C,3); 546 MatPreallocated(C); 547 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 548 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549 550 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 551 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 552 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 553 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 554 555 /* For now, we do not dispatch based on the type of A and P */ 556 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 557 fA = A->ops->ptapnumeric; 558 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 559 fP = P->ops->ptapnumeric; 560 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 561 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 562 563 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 564 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 565 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 566 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 572 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 573 { 574 PetscErrorCode ierr; 575 int flops=0; 576 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 577 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 578 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 579 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 580 int *ci=c->i,*cj=c->j,*cjj; 581 int am=A->M,cn=C->N,cm=C->M; 582 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 583 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 584 585 PetscFunctionBegin; 586 /* Allocate temporary array for storage of one row of A*P */ 587 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 588 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 589 590 apj = (int *)(apa + cn); 591 apjdense = apj + cn; 592 593 /* Clear old values in C */ 594 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 595 596 for (i=0;i<am;i++) { 597 /* Form sparse row of A*P */ 598 anzi = ai[i+1] - ai[i]; 599 apnzj = 0; 600 for (j=0;j<anzi;j++) { 601 prow = *aj++; 602 pnzj = pi[prow+1] - pi[prow]; 603 pjj = pj + pi[prow]; 604 paj = pa + pi[prow]; 605 for (k=0;k<pnzj;k++) { 606 if (!apjdense[pjj[k]]) { 607 apjdense[pjj[k]] = -1; 608 apj[apnzj++] = pjj[k]; 609 } 610 apa[pjj[k]] += (*aa)*paj[k]; 611 } 612 flops += 2*pnzj; 613 aa++; 614 } 615 616 /* Sort the j index array for quick sparse axpy. */ 617 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 618 619 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 620 pnzi = pi[i+1] - pi[i]; 621 for (j=0;j<pnzi;j++) { 622 nextap = 0; 623 crow = *pJ++; 624 cjj = cj + ci[crow]; 625 caj = ca + ci[crow]; 626 /* Perform sparse axpy operation. Note cjj includes apj. */ 627 for (k=0;nextap<apnzj;k++) { 628 if (cjj[k]==apj[nextap]) { 629 caj[k] += (*pA)*apa[apj[nextap++]]; 630 } 631 } 632 flops += 2*apnzj; 633 pA++; 634 } 635 636 /* Zero the current row info for A*P */ 637 for (j=0;j<apnzj;j++) { 638 apa[apj[j]] = 0.; 639 apjdense[apj[j]] = 0; 640 } 641 } 642 643 /* Assemble the final matrix and clean up */ 644 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 645 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 646 ierr = PetscFree(apa);CHKERRQ(ierr); 647 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 648 649 PetscFunctionReturn(0); 650 } 651 652 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 656 /*@ 657 MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 658 used by MatPtAP_MPIAIJ_MPIAIJ() 659 660 Collective on Mat 661 662 Input Parameters: 663 + A - the matrix in seqaij format 664 . P - the projection matrix in seqaij format 665 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 666 . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 667 . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 668 669 Output Parameters: 670 . C - the product matrix in seqaij format 671 672 Level: developer 673 674 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 675 @*/ 676 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C) 677 { 678 PetscErrorCode ierr; 679 int flops=0; 680 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 681 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 682 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 683 int *ai=a->i,*aj=a->j,*apj,*apjdense; 684 int *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 685 int *ci=c->i,*cj=c->j,*cjj; 686 int am=A->M,cn=C->N,cm=C->M; 687 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 688 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 689 690 PetscFunctionBegin; 691 pA=p->a+pi[prstart]; 692 /* Allocate temporary array for storage of one row of A*P */ 693 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 694 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 695 696 apj = (int *)(apa + cn); 697 apjdense = apj + cn; 698 699 /* Clear old values in C */ 700 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 701 702 for (i=0;i<am;i++) { 703 /* Form sparse row of A*P */ 704 anzi = ai[i+1] - ai[i]; 705 apnzj = 0; 706 for (j=0;j<anzi;j++) { 707 prow = *aj++; 708 pnzj = pi[prow+1] - pi[prow]; 709 pjj = pj + pi[prow]; 710 paj = pa + pi[prow]; 711 for (k=0;k<pnzj;k++) { 712 if (!apjdense[pjj[k]]) { 713 apjdense[pjj[k]] = -1; 714 apj[apnzj++] = pjj[k]; 715 } 716 apa[pjj[k]] += (*aa)*paj[k]; 717 } 718 flops += 2*pnzj; 719 aa++; 720 } 721 722 /* Sort the j index array for quick sparse axpy. */ 723 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 724 725 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 726 pnzi = pi[i+1+prstart] - pi[i+prstart]; 727 for (j=0;j<pnzi;j++) { 728 nextap = 0; 729 crow = *pJ++; 730 cjj = cj + ci[crow]; 731 caj = ca + ci[crow]; 732 /* Perform sparse axpy operation. Note cjj includes apj. */ 733 for (k=0;nextap<apnzj;k++) { 734 if (cjj[k]==apj[nextap]) { 735 caj[k] += (*pA)*apa[apj[nextap++]]; 736 } 737 } 738 flops += 2*apnzj; 739 pA++; 740 } 741 742 /* Zero the current row info for A*P */ 743 for (j=0;j<apnzj;j++) { 744 apa[apj[j]] = 0.; 745 apjdense[apj[j]] = 0; 746 } 747 } 748 749 /* Assemble the final matrix and clean up */ 750 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 752 ierr = PetscFree(apa);CHKERRQ(ierr); 753 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 754 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 760 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) { 761 PetscErrorCode ierr; 762 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 763 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 764 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 765 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 766 int an=A->N,am=A->M,pn=P->N,pm=P->M; 767 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 768 MatScalar *ca; 769 Mat *psub,P_sub; 770 IS isrow,iscol; 771 int m = prend - prstart; 772 773 PetscFunctionBegin; 774 /* Get ij structure of P[rstart:rend,:]^T */ 775 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 776 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 777 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 778 ierr = ISDestroy(isrow);CHKERRQ(ierr); 779 ierr = ISDestroy(iscol);CHKERRQ(ierr); 780 P_sub = psub[0]; 781 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 782 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 783 ptJ=ptj; 784 785 /* Allocate ci array, arrays for fill computation and */ 786 /* free space for accumulating nonzero column info */ 787 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 788 ci[0] = 0; 789 790 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 791 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 792 ptasparserow = ptadenserow + an; 793 denserow = ptasparserow + an; 794 sparserow = denserow + pn; 795 796 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 797 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 798 ierr = GetMoreSpace((int)(fill*ai[am]/pm)*pn,&free_space); 799 current_space = free_space; 800 801 /* Determine symbolic info for each row of C: */ 802 for (i=0;i<pn;i++) { 803 ptnzi = pti[i+1] - pti[i]; 804 ptanzi = 0; 805 /* Determine symbolic row of PtA_reduced: */ 806 for (j=0;j<ptnzi;j++) { 807 arow = *ptJ++; 808 anzj = ai[arow+1] - ai[arow]; 809 ajj = aj + ai[arow]; 810 for (k=0;k<anzj;k++) { 811 if (!ptadenserow[ajj[k]]) { 812 ptadenserow[ajj[k]] = -1; 813 ptasparserow[ptanzi++] = ajj[k]; 814 } 815 } 816 } 817 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 818 ptaj = ptasparserow; 819 cnzi = 0; 820 for (j=0;j<ptanzi;j++) { 821 prow = *ptaj++; 822 pnzj = pi[prow+1] - pi[prow]; 823 pjj = pj + pi[prow]; 824 for (k=0;k<pnzj;k++) { 825 if (!denserow[pjj[k]]) { 826 denserow[pjj[k]] = -1; 827 sparserow[cnzi++] = pjj[k]; 828 } 829 } 830 } 831 832 /* sort sparserow */ 833 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 834 835 /* If free space is not available, make more free space */ 836 /* Double the amount of total space in the list */ 837 if (current_space->local_remaining<cnzi) { 838 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 839 } 840 841 /* Copy data into free space, and zero out denserows */ 842 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 843 current_space->array += cnzi; 844 current_space->local_used += cnzi; 845 current_space->local_remaining -= cnzi; 846 847 for (j=0;j<ptanzi;j++) { 848 ptadenserow[ptasparserow[j]] = 0; 849 } 850 for (j=0;j<cnzi;j++) { 851 denserow[sparserow[j]] = 0; 852 } 853 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 854 /* For now, we will recompute what is needed. */ 855 ci[i+1] = ci[i] + cnzi; 856 } 857 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 858 /* Allocate space for cj, initialize cj, and */ 859 /* destroy list of free space and other temporary array(s) */ 860 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 861 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 862 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 863 864 /* Allocate space for ca */ 865 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 866 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 867 868 /* put together the new matrix */ 869 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 870 871 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 872 /* Since these are PETSc arrays, change flags to free them as necessary. */ 873 c = (Mat_SeqAIJ *)((*C)->data); 874 c->freedata = PETSC_TRUE; 875 c->nonew = 0; 876 877 /* Clean up. */ 878 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 879 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 885 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C) 886 { 887 PetscErrorCode ierr; 888 PetscFunctionBegin; 889 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 890 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 891 if (scall == MAT_INITIAL_MATRIX){ 892 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 893 } 894 895 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 896 897 PetscFunctionReturn(0); 898 } 899 900