1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 140 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 141 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142 143 /* get P_loc by taking all local rows of P */ 144 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 145 146 } else { 147 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148 } 149 /* now, compute numeric local P^T*A*P */ 150 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160 { 161 PetscErrorCode ierr; 162 Mat C_seq; 163 Mat_MatMatMultMPI *ptap; 164 PetscObjectContainer container; 165 166 PetscFunctionBegin; 167 168 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169 if (container) { 170 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171 } else { 172 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173 } 174 /* compute C_seq = P_loc^T * A_loc * P_seq */ 175 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176 177 /* add C_seq into mpi C */ 178 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179 180 PetscFunctionReturn(0); 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186 { 187 PetscErrorCode ierr; 188 Mat_Merge_SeqsToMPI *merge; 189 Mat_MatMatMultMPI *ptap; 190 PetscObjectContainer cont_merge,cont_ptap; 191 192 PetscFunctionBegin; 193 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 194 if (cont_merge) { 195 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 196 } else { 197 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 198 } 199 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 200 if (cont_ptap) { 201 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 202 } else { 203 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 204 } 205 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 206 ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 212 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 213 { 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (scall == MAT_INITIAL_MATRIX){ 218 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 219 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 220 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 221 } 222 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 223 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 224 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 230 231 Collective on Mat 232 233 Input Parameters: 234 + A - the matrix 235 - P - the projection matrix 236 237 Output Parameters: 238 . C - the (i,j) structure of the product matrix 239 240 Notes: 241 C will be created and must be destroyed by the user with MatDestroy(). 242 243 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 244 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 245 this (i,j) structure by calling MatPtAPNumeric(). 246 247 Level: intermediate 248 249 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 250 */ 251 #undef __FUNCT__ 252 #define __FUNCT__ "MatPtAPSymbolic" 253 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 254 { 255 PetscErrorCode ierr; 256 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 257 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 258 259 PetscFunctionBegin; 260 261 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 262 PetscValidType(A,1); 263 MatPreallocated(A); 264 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 265 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 266 267 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 268 PetscValidType(P,2); 269 MatPreallocated(P); 270 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 271 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 272 273 PetscValidPointer(C,3); 274 275 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 276 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 277 278 /* For now, we do not dispatch based on the type of A and P */ 279 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 280 fA = A->ops->ptapsymbolic; 281 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 282 fP = P->ops->ptapsymbolic; 283 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 284 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 285 286 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 287 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 288 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 289 290 PetscFunctionReturn(0); 291 } 292 293 typedef struct { 294 Mat symAP; 295 } Mat_PtAPstruct; 296 297 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 298 299 #undef __FUNCT__ 300 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 301 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 302 { 303 PetscErrorCode ierr; 304 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 305 306 PetscFunctionBegin; 307 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 308 ierr = PetscFree(ptap);CHKERRQ(ierr); 309 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 315 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 316 { 317 PetscErrorCode ierr; 318 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 320 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 321 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 322 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 323 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 324 MatScalar *ca; 325 PetscBT lnkbt; 326 327 PetscFunctionBegin; 328 /* Get ij structure of P^T */ 329 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 330 ptJ=ptj; 331 332 /* Allocate ci array, arrays for fill computation and */ 333 /* free space for accumulating nonzero column info */ 334 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 335 ci[0] = 0; 336 337 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 338 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 339 ptasparserow = ptadenserow + an; 340 341 /* create and initialize a linked list */ 342 nlnk = pn+1; 343 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 344 345 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 346 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 347 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 348 current_space = free_space; 349 350 /* Determine symbolic info for each row of C: */ 351 for (i=0;i<pn;i++) { 352 ptnzi = pti[i+1] - pti[i]; 353 ptanzi = 0; 354 /* Determine symbolic row of PtA: */ 355 for (j=0;j<ptnzi;j++) { 356 arow = *ptJ++; 357 anzj = ai[arow+1] - ai[arow]; 358 ajj = aj + ai[arow]; 359 for (k=0;k<anzj;k++) { 360 if (!ptadenserow[ajj[k]]) { 361 ptadenserow[ajj[k]] = -1; 362 ptasparserow[ptanzi++] = ajj[k]; 363 } 364 } 365 } 366 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 367 ptaj = ptasparserow; 368 cnzi = 0; 369 for (j=0;j<ptanzi;j++) { 370 prow = *ptaj++; 371 pnzj = pi[prow+1] - pi[prow]; 372 pjj = pj + pi[prow]; 373 /* add non-zero cols of P into the sorted linked list lnk */ 374 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 375 cnzi += nlnk; 376 } 377 378 /* If free space is not available, make more free space */ 379 /* Double the amount of total space in the list */ 380 if (current_space->local_remaining<cnzi) { 381 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 382 } 383 384 /* Copy data into free space, and zero out denserows */ 385 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 386 current_space->array += cnzi; 387 current_space->local_used += cnzi; 388 current_space->local_remaining -= cnzi; 389 390 for (j=0;j<ptanzi;j++) { 391 ptadenserow[ptasparserow[j]] = 0; 392 } 393 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 394 /* For now, we will recompute what is needed. */ 395 ci[i+1] = ci[i] + cnzi; 396 } 397 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 398 /* Allocate space for cj, initialize cj, and */ 399 /* destroy list of free space and other temporary array(s) */ 400 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 401 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 402 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 403 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 404 405 /* Allocate space for ca */ 406 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 407 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 408 409 /* put together the new matrix */ 410 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 411 412 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 413 /* Since these are PETSc arrays, change flags to free them as necessary. */ 414 c = (Mat_SeqAIJ *)((*C)->data); 415 c->freedata = PETSC_TRUE; 416 c->nonew = 0; 417 418 /* Clean up. */ 419 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 420 421 PetscFunctionReturn(0); 422 } 423 424 #include "src/mat/impls/maij/maij.h" 425 EXTERN_C_BEGIN 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 428 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 429 { 430 /* This routine requires testing -- I don't think it works. */ 431 PetscErrorCode ierr; 432 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 433 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 434 Mat P=pp->AIJ; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 436 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 437 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 438 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 439 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 440 MatScalar *ca; 441 442 PetscFunctionBegin; 443 /* Start timer */ 444 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 445 446 /* Get ij structure of P^T */ 447 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 448 449 /* Allocate ci array, arrays for fill computation and */ 450 /* free space for accumulating nonzero column info */ 451 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 452 ci[0] = 0; 453 454 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 455 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 456 ptasparserow = ptadenserow + an; 457 denserow = ptasparserow + an; 458 sparserow = denserow + pn; 459 460 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 461 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 462 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 463 current_space = free_space; 464 465 /* Determine symbolic info for each row of C: */ 466 for (i=0;i<pn/ppdof;i++) { 467 ptnzi = pti[i+1] - pti[i]; 468 ptanzi = 0; 469 ptJ = ptj + pti[i]; 470 for (dof=0;dof<ppdof;dof++) { 471 /* Determine symbolic row of PtA: */ 472 for (j=0;j<ptnzi;j++) { 473 arow = ptJ[j] + dof; 474 anzj = ai[arow+1] - ai[arow]; 475 ajj = aj + ai[arow]; 476 for (k=0;k<anzj;k++) { 477 if (!ptadenserow[ajj[k]]) { 478 ptadenserow[ajj[k]] = -1; 479 ptasparserow[ptanzi++] = ajj[k]; 480 } 481 } 482 } 483 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 484 ptaj = ptasparserow; 485 cnzi = 0; 486 for (j=0;j<ptanzi;j++) { 487 pdof = *ptaj%dof; 488 prow = (*ptaj++)/dof; 489 pnzj = pi[prow+1] - pi[prow]; 490 pjj = pj + pi[prow]; 491 for (k=0;k<pnzj;k++) { 492 if (!denserow[pjj[k]+pdof]) { 493 denserow[pjj[k]+pdof] = -1; 494 sparserow[cnzi++] = pjj[k]+pdof; 495 } 496 } 497 } 498 499 /* sort sparserow */ 500 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 501 502 /* If free space is not available, make more free space */ 503 /* Double the amount of total space in the list */ 504 if (current_space->local_remaining<cnzi) { 505 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 506 } 507 508 /* Copy data into free space, and zero out denserows */ 509 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 510 current_space->array += cnzi; 511 current_space->local_used += cnzi; 512 current_space->local_remaining -= cnzi; 513 514 for (j=0;j<ptanzi;j++) { 515 ptadenserow[ptasparserow[j]] = 0; 516 } 517 for (j=0;j<cnzi;j++) { 518 denserow[sparserow[j]] = 0; 519 } 520 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 521 /* For now, we will recompute what is needed. */ 522 ci[i+1+dof] = ci[i+dof] + cnzi; 523 } 524 } 525 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 526 /* Allocate space for cj, initialize cj, and */ 527 /* destroy list of free space and other temporary array(s) */ 528 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 529 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 530 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 531 532 /* Allocate space for ca */ 533 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 534 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 535 536 /* put together the new matrix */ 537 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 538 539 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 540 /* Since these are PETSc arrays, change flags to free them as necessary. */ 541 c = (Mat_SeqAIJ *)((*C)->data); 542 c->freedata = PETSC_TRUE; 543 c->nonew = 0; 544 545 /* Clean up. */ 546 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 547 548 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 EXTERN_C_END 552 553 /* 554 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 555 556 Collective on Mat 557 558 Input Parameters: 559 + A - the matrix 560 - P - the projection matrix 561 562 Output Parameters: 563 . C - the product matrix 564 565 Notes: 566 C must have been created by calling MatPtAPSymbolic and must be destroyed by 567 the user using MatDeatroy(). 568 569 This routine is currently only implemented for pairs of AIJ matrices and classes 570 which inherit from AIJ. C will be of type MATAIJ. 571 572 Level: intermediate 573 574 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 575 */ 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatPtAPNumeric" 578 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 579 { 580 PetscErrorCode ierr; 581 PetscErrorCode (*fA)(Mat,Mat,Mat); 582 PetscErrorCode (*fP)(Mat,Mat,Mat); 583 584 PetscFunctionBegin; 585 586 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 587 PetscValidType(A,1); 588 MatPreallocated(A); 589 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 590 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 591 592 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 593 PetscValidType(P,2); 594 MatPreallocated(P); 595 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 596 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 597 598 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 599 PetscValidType(C,3); 600 MatPreallocated(C); 601 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 602 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 603 604 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 605 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 606 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 607 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 608 609 /* For now, we do not dispatch based on the type of A and P */ 610 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 611 fA = A->ops->ptapnumeric; 612 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 613 fP = P->ops->ptapnumeric; 614 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 615 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 616 617 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 618 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 619 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 620 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 626 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 627 { 628 PetscErrorCode ierr; 629 PetscInt flops=0; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 631 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 632 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 633 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 634 PetscInt *ci=c->i,*cj=c->j,*cjj; 635 PetscInt am=A->M,cn=C->N,cm=C->M; 636 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 637 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 638 639 PetscFunctionBegin; 640 /* Allocate temporary array for storage of one row of A*P */ 641 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 642 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 643 644 apj = (PetscInt *)(apa + cn); 645 apjdense = apj + cn; 646 647 /* Clear old values in C */ 648 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 649 650 for (i=0;i<am;i++) { 651 /* Form sparse row of A*P */ 652 anzi = ai[i+1] - ai[i]; 653 apnzj = 0; 654 for (j=0;j<anzi;j++) { 655 prow = *aj++; 656 pnzj = pi[prow+1] - pi[prow]; 657 pjj = pj + pi[prow]; 658 paj = pa + pi[prow]; 659 for (k=0;k<pnzj;k++) { 660 if (!apjdense[pjj[k]]) { 661 apjdense[pjj[k]] = -1; 662 apj[apnzj++] = pjj[k]; 663 } 664 apa[pjj[k]] += (*aa)*paj[k]; 665 } 666 flops += 2*pnzj; 667 aa++; 668 } 669 670 /* Sort the j index array for quick sparse axpy. */ 671 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 672 673 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 674 pnzi = pi[i+1] - pi[i]; 675 for (j=0;j<pnzi;j++) { 676 nextap = 0; 677 crow = *pJ++; 678 cjj = cj + ci[crow]; 679 caj = ca + ci[crow]; 680 /* Perform sparse axpy operation. Note cjj includes apj. */ 681 for (k=0;nextap<apnzj;k++) { 682 if (cjj[k]==apj[nextap]) { 683 caj[k] += (*pA)*apa[apj[nextap++]]; 684 } 685 } 686 flops += 2*apnzj; 687 pA++; 688 } 689 690 /* Zero the current row info for A*P */ 691 for (j=0;j<apnzj;j++) { 692 apa[apj[j]] = 0.; 693 apjdense[apj[j]] = 0; 694 } 695 } 696 697 /* Assemble the final matrix and clean up */ 698 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 699 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 700 ierr = PetscFree(apa);CHKERRQ(ierr); 701 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 702 703 PetscFunctionReturn(0); 704 } 705 706 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 707 static PetscEvent logkey_matptapnumeric_local = 0; 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 710 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 711 { 712 PetscErrorCode ierr; 713 PetscInt flops=0; 714 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 715 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 716 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 717 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 718 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 719 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 720 PetscInt *pJ=pj_loc,*pjj; 721 PetscInt *ci=c->i,*cj=c->j,*cjj; 722 PetscInt am=A->m,cn=C->N,cm=C->M; 723 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 724 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 725 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 726 727 PetscFunctionBegin; 728 if (!logkey_matptapnumeric_local) { 729 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 730 } 731 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 732 733 /* Allocate temporary array for storage of one row of A*P */ 734 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 735 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 736 apj = (PetscInt *)(apa + cn); 737 apjdense = apj + cn; 738 739 /* Clear old values in C */ 740 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 741 742 for (i=0;i<am;i++) { 743 /* Form i-th sparse row of A*P */ 744 apnzj = 0; 745 /* diagonal portion of A */ 746 anzi = adi[i+1] - adi[i]; 747 for (j=0;j<anzi;j++) { 748 prow = *adj; 749 adj++; 750 pnzj = pi_loc[prow+1] - pi_loc[prow]; 751 pjj = pj_loc + pi_loc[prow]; 752 paj = pa_loc + pi_loc[prow]; 753 for (k=0;k<pnzj;k++) { 754 if (!apjdense[pjj[k]]) { 755 apjdense[pjj[k]] = -1; 756 apj[apnzj++] = pjj[k]; 757 } 758 apa[pjj[k]] += (*ada)*paj[k]; 759 } 760 flops += 2*pnzj; 761 ada++; 762 } 763 /* off-diagonal portion of A */ 764 anzi = aoi[i+1] - aoi[i]; 765 for (j=0;j<anzi;j++) { 766 col = a->garray[*aoj]; 767 if (col < cstart){ 768 prow = *aoj; 769 } else if (col >= cend){ 770 prow = *aoj; 771 } else { 772 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 773 } 774 aoj++; 775 pnzj = pi_oth[prow+1] - pi_oth[prow]; 776 pjj = pj_oth + pi_oth[prow]; 777 paj = pa_oth + pi_oth[prow]; 778 for (k=0;k<pnzj;k++) { 779 if (!apjdense[pjj[k]]) { 780 apjdense[pjj[k]] = -1; 781 apj[apnzj++] = pjj[k]; 782 } 783 apa[pjj[k]] += (*aoa)*paj[k]; 784 } 785 flops += 2*pnzj; 786 aoa++; 787 } 788 /* Sort the j index array for quick sparse axpy. */ 789 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 790 791 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 792 pnzi = pi_loc[i+1] - pi_loc[i]; 793 for (j=0;j<pnzi;j++) { 794 nextap = 0; 795 crow = *pJ++; 796 cjj = cj + ci[crow]; 797 caj = ca + ci[crow]; 798 /* Perform sparse axpy operation. Note cjj includes apj. */ 799 for (k=0;nextap<apnzj;k++) { 800 if (cjj[k]==apj[nextap]) { 801 caj[k] += (*pA)*apa[apj[nextap++]]; 802 } 803 } 804 flops += 2*apnzj; 805 pA++; 806 } 807 808 /* Zero the current row info for A*P */ 809 for (j=0;j<apnzj;j++) { 810 apa[apj[j]] = 0.; 811 apjdense[apj[j]] = 0; 812 } 813 } 814 815 /* Assemble the final matrix and clean up */ 816 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 817 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818 ierr = PetscFree(apa);CHKERRQ(ierr); 819 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 820 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 static PetscEvent logkey_matptapsymbolic_local = 0; 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 826 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 827 { 828 PetscErrorCode ierr; 829 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 830 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 831 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 832 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 833 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 834 PetscInt *ci,*cj,*ptaj; 835 PetscInt an=A->N,am=A->m,pN=P_loc->N; 836 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 837 PetscInt pm=P_loc->m,nlnk,*lnk; 838 MatScalar *ca; 839 PetscBT lnkbt; 840 PetscInt prend,nprow_loc,nprow_oth; 841 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 842 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 843 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 844 845 PetscFunctionBegin; 846 if (!logkey_matptapsymbolic_local) { 847 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 848 } 849 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 850 851 prend = prstart + pm; 852 853 /* get ij structure of P_loc^T */ 854 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 855 ptJ=ptj; 856 857 /* Allocate ci array, arrays for fill computation and */ 858 /* free space for accumulating nonzero column info */ 859 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 860 ci[0] = 0; 861 862 ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 863 ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 864 ptasparserow_loc = ptadenserow_loc + an; 865 ptadenserow_oth = ptasparserow_loc + an; 866 ptasparserow_oth = ptadenserow_oth + an; 867 868 /* create and initialize a linked list */ 869 nlnk = pN+1; 870 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 871 872 /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 873 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 874 nnz = adi[am] + aoi[am]; 875 ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 876 current_space = free_space; 877 878 /* determine symbolic info for each row of C: */ 879 for (i=0;i<pN;i++) { 880 ptnzi = pti[i+1] - pti[i]; 881 nprow_loc = 0; nprow_oth = 0; 882 /* i-th row of symbolic P_loc^T*A_loc: */ 883 for (j=0;j<ptnzi;j++) { 884 arow = *ptJ++; 885 /* diagonal portion of A */ 886 anzj = adi[arow+1] - adi[arow]; 887 ajj = adj + adi[arow]; 888 for (k=0;k<anzj;k++) { 889 col = ajj[k]+prstart; 890 if (!ptadenserow_loc[col]) { 891 ptadenserow_loc[col] = -1; 892 ptasparserow_loc[nprow_loc++] = col; 893 } 894 } 895 /* off-diagonal portion of A */ 896 anzj = aoi[arow+1] - aoi[arow]; 897 ajj = aoj + aoi[arow]; 898 for (k=0;k<anzj;k++) { 899 col = a->garray[ajj[k]]; /* global col */ 900 if (col < cstart){ 901 col = ajj[k]; 902 } else if (col >= cend){ 903 col = ajj[k] + pm; 904 } else { 905 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 906 } 907 if (!ptadenserow_oth[col]) { 908 ptadenserow_oth[col] = -1; 909 ptasparserow_oth[nprow_oth++] = col; 910 } 911 } 912 } 913 914 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 915 cnzi = 0; 916 /* rows of P_loc */ 917 ptaj = ptasparserow_loc; 918 for (j=0; j<nprow_loc; j++) { 919 prow = *ptaj++; 920 prow -= prstart; /* rm */ 921 pnzj = pi_loc[prow+1] - pi_loc[prow]; 922 pjj = pj_loc + pi_loc[prow]; 923 /* add non-zero cols of P into the sorted linked list lnk */ 924 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 925 cnzi += nlnk; 926 } 927 /* rows of P_oth */ 928 ptaj = ptasparserow_oth; 929 for (j=0; j<nprow_oth; j++) { 930 prow = *ptaj++; 931 if (prow >= prend) prow -= pm; /* rm */ 932 pnzj = pi_oth[prow+1] - pi_oth[prow]; 933 pjj = pj_oth + pi_oth[prow]; 934 /* add non-zero cols of P into the sorted linked list lnk */ 935 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 936 cnzi += nlnk; 937 } 938 939 /* If free space is not available, make more free space */ 940 /* Double the amount of total space in the list */ 941 if (current_space->local_remaining<cnzi) { 942 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 943 } 944 945 /* Copy data into free space, and zero out denserows */ 946 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 947 current_space->array += cnzi; 948 current_space->local_used += cnzi; 949 current_space->local_remaining -= cnzi; 950 951 for (j=0;j<nprow_loc; j++) { 952 ptadenserow_loc[ptasparserow_loc[j]] = 0; 953 } 954 for (j=0;j<nprow_oth; j++) { 955 ptadenserow_oth[ptasparserow_oth[j]] = 0; 956 } 957 958 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 959 /* For now, we will recompute what is needed. */ 960 ci[i+1] = ci[i] + cnzi; 961 } 962 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 963 /* Allocate space for cj, initialize cj, and */ 964 /* destroy list of free space and other temporary array(s) */ 965 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 966 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 967 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 968 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 969 970 /* Allocate space for ca */ 971 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 972 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 973 974 /* put together the new matrix */ 975 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 976 977 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 978 /* Since these are PETSc arrays, change flags to free them as necessary. */ 979 c = (Mat_SeqAIJ *)((*C)->data); 980 c->freedata = PETSC_TRUE; 981 c->nonew = 0; 982 983 /* Clean up. */ 984 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 985 986 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989