1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 <<<<<<< local 21 PetscBool scalable; 22 ======= 23 PetscBool scalable=PETSC_FALSE,scalable_new; 24 >>>>>>> other 25 26 PetscFunctionBegin; 27 if (scall == MAT_INITIAL_MATRIX){ 28 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 29 <<<<<<< local 30 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 31 scalable=PETSC_FALSE; 32 ierr = PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,PETSC_NULL);CHKERRQ(ierr); 33 ierr = PetscOptionsEnd();CHKERRQ(ierr); 34 if (scalable){ 35 ======= 36 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmatmult_scalable",&scalable,PETSC_NULL);CHKERRQ(ierr); 37 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmatmult_scalable_new",&scalable_new,PETSC_NULL);CHKERRQ(ierr); 38 if (scalable_new){ 39 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,C);CHKERRQ(ierr); 40 } else if (scalable) { 41 >>>>>>> other 42 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 43 }else { 44 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 45 } 46 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 47 } 48 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 49 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 50 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 EXTERN_C_END 54 55 /* 56 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 57 Input Parameter: 58 . am, Ai, Aj - number of rows and structure of A 59 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 60 . fill - filll ratio See MatMatMult() 61 62 Output Parameter: 63 . Ci, Cj - structure of C = A*B 64 . nspacedouble - number of extra mallocs 65 */ 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ" 68 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 69 { 70 PetscErrorCode ierr; 71 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 72 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 73 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 74 const PetscInt *aj=Aj,*bi=Bi,*bj=Bj,*bjj; 75 PetscInt *ci,*cj; 76 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,ndouble=0; 77 PetscBT lnkbt; 78 79 PetscFunctionBegin; 80 /* Allocate ci array, arrays for fill computation and */ 81 /* free space for accumulating nonzero column info */ 82 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 83 ci[0] = 0; 84 85 /* create and initialize a linked list */ 86 nlnk = bn+1; 87 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 88 89 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 90 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 91 current_space = free_space; 92 93 /* Determine ci and cj */ 94 for (i=0; i<am; i++) { 95 anzi = Ai[i+1] - Ai[i]; 96 cnzi = 0; 97 aj = Aj + Ai[i]; 98 for (j=0; j<anzi; j++){ 99 brow = aj[j]; 100 bnzj = bi[brow+1] - bi[brow]; 101 bjj = bj + bi[brow]; 102 /* add non-zero cols of B into the sorted linked list lnk */ 103 ierr = PetscLLAddSorted(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 104 cnzi += nlnk; 105 } 106 107 /* If free space is not available, make more free space */ 108 /* Double the amount of total space in the list */ 109 if (current_space->local_remaining<cnzi) { 110 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 111 ndouble++; 112 } 113 114 /* Copy data into free space, then initialize lnk */ 115 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 116 current_space->array += cnzi; 117 current_space->local_used += cnzi; 118 current_space->local_remaining -= cnzi; 119 ci[i+1] = ci[i] + cnzi; 120 } 121 122 /* Column indices are in the list of free space */ 123 /* Allocate space for cj, initialize cj, and */ 124 /* destroy list of free space and other temporary array(s) */ 125 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 126 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 127 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 128 129 *Ci = ci; 130 *Cj = cj; 131 *nspacedouble = ndouble; 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable" 137 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 138 { 139 PetscErrorCode ierr; 140 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 141 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 142 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 143 const PetscInt *aj=Aj,*bi=Bi,*bj; 144 PetscInt *ci,*cj; 145 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 146 PetscBT bt; 147 PetscInt nlnk_max,lnk_max=bn,*lnk,*nlnk; 148 149 PetscFunctionBegin; 150 /* Allocate ci array, arrays for fill computation and */ 151 /* free space for accumulating nonzero column info */ 152 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 153 ci[0] = 0; 154 155 /* create and initialize a condensed linked list */ 156 crmax = a->rmax*b->rmax; 157 nlnk_max = PetscMin(crmax,bn); 158 if (!nlnk_max && bn) { 159 nlnk_max = bn; /* in case rmax is not defined for A or B */ 160 #if defined(PETSC_USE_DEBUGGING) 161 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 162 #endif 163 } 164 #if defined(DEBUG_MATMATMULT) 165 ierr = (PETSC_COMM_SELF,"LLCondensedCreate nlnk_max=%d, bn %d, crmax %d\n",nlnk_max,bn,crmax); 166 #endif 167 ierr = PetscLLCondensedCreate(nlnk_max,lnk_max,lnk,nlnk,bt);CHKERRQ(ierr); 168 169 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 170 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 171 current_space = free_space; 172 173 /* Determine ci and cj */ 174 for (i=0; i<am; i++) { 175 anzi = Ai[i+1] - Ai[i]; 176 cnzi = 0; 177 aj = Aj + Ai[i]; 178 for (j=0; j<anzi; j++){ 179 brow = aj[j]; 180 bnzj = bi[brow+1] - bi[brow]; 181 bj = Bj + bi[brow]; 182 /* add non-zero cols of B into the condensed sorted linked list lnk */ 183 ierr = PetscLLCondensedAddSorted(nlnk_max,lnk_max,bnzj,bj,nlnk,lnk,bt);CHKERRQ(ierr); 184 } 185 cnzi = *nlnk; 186 ci[i+1] = ci[i] + cnzi; 187 188 /* If free space is not available, make more free space */ 189 /* Double the amount of total space in the list */ 190 if (current_space->local_remaining<cnzi) { 191 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 192 ndouble++; 193 } 194 195 /* Copy data into free space, then initialize lnk */ 196 ierr = PetscLLCondensedClean(nlnk_max,lnk_max,cnzi,current_space->array,nlnk,lnk,bt);CHKERRQ(ierr); 197 current_space->array += cnzi; 198 current_space->local_used += cnzi; 199 current_space->local_remaining -= cnzi; 200 } 201 202 /* Column indices are in the list of free space */ 203 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 204 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 205 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 206 ierr = PetscLLCondensedDestroy(lnk,bt);CHKERRQ(ierr); 207 208 *Ci = ci; 209 *Cj = cj; 210 *nspacedouble = ndouble; 211 PetscFunctionReturn(0); 212 } 213 214 /* 215 Does not use bitarray 216 */ 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new" 219 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 220 { 221 PetscErrorCode ierr; 222 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 223 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 224 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 225 const PetscInt *aj=Aj,*bi=Bi,*bj; 226 PetscInt *ci,*cj; 227 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 228 PetscInt lnk_max=bn,*lnk; 229 230 PetscFunctionBegin; 231 /* Allocate ci array, arrays for fill computation and */ 232 /* free space for accumulating nonzero column info */ 233 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 234 ci[0] = 0; 235 236 /* create and initialize a condensed linked list */ 237 crmax = a->rmax*b->rmax; 238 lnk_max = PetscMin(crmax,bn); 239 if (!lnk_max && bn) { 240 lnk_max = bn; /* in case rmax is not defined for A or B */ 241 #if defined(PETSC_USE_DEBUGGING) 242 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 243 #endif 244 } 245 #if defined(DEBUG_MATMATMULT) 246 ierr = (PETSC_COMM_SELF,"LLCondensedCreate lnk_max=%d, bn %d, crmax %d\n",lnk_max,bn,crmax); 247 #endif 248 ierr = PetscLLCondensedCreate_new(lnk_max,lnk);CHKERRQ(ierr); 249 250 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 251 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 252 current_space = free_space; 253 254 /* Determine ci and cj */ 255 for (i=0; i<am; i++) { 256 anzi = Ai[i+1] - Ai[i]; 257 cnzi = 0; 258 aj = Aj + Ai[i]; 259 for (j=0; j<anzi; j++){ 260 brow = aj[j]; 261 bnzj = bi[brow+1] - bi[brow]; 262 bj = Bj + bi[brow]; 263 /* add non-zero cols of B into the condensed sorted linked list lnk */ 264 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 265 ierr = PetscLLCondensedAddSorted_new(bnzj,bj,lnk);CHKERRQ(ierr); 266 } 267 cnzi = *lnk; 268 ci[i+1] = ci[i] + cnzi; 269 270 /* If free space is not available, make more free space */ 271 /* Double the amount of total space in the list */ 272 if (current_space->local_remaining<cnzi) { 273 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 274 ndouble++; 275 } 276 277 /* Copy data into free space, then initialize lnk */ 278 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 279 ierr = PetscLLCondensedClean_new(lnk_max,cnzi,current_space->array,lnk);CHKERRQ(ierr); 280 current_space->array += cnzi; 281 current_space->local_used += cnzi; 282 current_space->local_remaining -= cnzi; 283 } 284 285 /* Column indices are in the list of free space */ 286 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 287 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 288 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 289 ierr = PetscLLCondensedDestroy_new(lnk);CHKERRQ(ierr); 290 291 *Ci = ci; 292 *Cj = cj; 293 *nspacedouble = ndouble; 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 299 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 300 { 301 PetscErrorCode ierr; 302 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 303 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 304 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 305 MatScalar *ca; 306 PetscReal afill; 307 308 PetscFunctionBegin; 309 #if defined(DEBUG_MATMATMULT) 310 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ...\n");CHKERRQ(ierr); 311 #endif 312 /* Get ci and cj */ 313 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 314 315 /* Allocate space for ca */ 316 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 317 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 318 319 /* put together the new symbolic matrix */ 320 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 321 322 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 323 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 324 c = (Mat_SeqAIJ *)((*C)->data); 325 c->free_a = PETSC_TRUE; 326 c->free_ij = PETSC_TRUE; 327 c->nonew = 0; 328 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 329 ierr = PetscMalloc(bn*sizeof(PetscScalar),&c->matmult_abdense);CHKERRQ(ierr); 330 ierr = PetscMemzero(c->matmult_abdense,bn*sizeof(PetscScalar));CHKERRQ(ierr); 331 332 /* set MatInfo */ 333 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 334 if (afill < 1.0) afill = 1.0; 335 c->maxnz = ci[am]; 336 c->nz = ci[am]; 337 (*C)->info.mallocs = nspacedouble; 338 (*C)->info.fill_ratio_given = fill; 339 (*C)->info.fill_ratio_needed = afill; 340 341 #if defined(PETSC_USE_INFO) 342 if (ci[am]) { 343 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 344 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 345 } else { 346 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 347 } 348 #endif 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 354 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 355 { 356 PetscErrorCode ierr; 357 PetscLogDouble flops=0.0; 358 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 359 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 360 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 361 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 362 PetscInt am=A->rmap->n,cm=C->rmap->n; 363 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 364 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 365 PetscScalar *ab_dense=c->matmult_abdense; 366 367 PetscFunctionBegin; 368 /* clean old values in C */ 369 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 370 /* Traverse A row-wise. */ 371 /* Build the ith row in C by summing over nonzero columns in A, */ 372 /* the rows of B corresponding to nonzeros of A. */ 373 for (i=0; i<am; i++) { 374 anzi = ai[i+1] - ai[i]; 375 for (j=0; j<anzi; j++) { 376 brow = aj[j]; 377 bnzi = bi[brow+1] - bi[brow]; 378 bjj = bj + bi[brow]; 379 baj = ba + bi[brow]; 380 /* perform dense axpy */ 381 valtmp = aa[j]; 382 for (k=0; k<bnzi; k++) { 383 ab_dense[bjj[k]] += valtmp*baj[k]; 384 } 385 flops += 2*bnzi; 386 } 387 aj += anzi; aa += anzi; 388 389 cnzi = ci[i+1] - ci[i]; 390 for (k=0; k<cnzi; k++) { 391 ca[k] += ab_dense[cj[k]]; 392 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 393 } 394 flops += cnzi; 395 cj += cnzi; ca += cnzi; 396 } 397 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 405 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 406 { 407 PetscErrorCode ierr; 408 PetscLogDouble flops=0.0; 409 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 410 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 411 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 412 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 413 PetscInt am=A->rmap->N,cm=C->rmap->N; 414 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 415 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 416 PetscInt nextb; 417 418 PetscFunctionBegin; 419 #if defined(DEBUG_MATMATMULT) 420 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr); 421 #endif 422 /* clean old values in C */ 423 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 424 /* Traverse A row-wise. */ 425 /* Build the ith row in C by summing over nonzero columns in A, */ 426 /* the rows of B corresponding to nonzeros of A. */ 427 for (i=0;i<am;i++) { 428 anzi = ai[i+1] - ai[i]; 429 cnzi = ci[i+1] - ci[i]; 430 for (j=0;j<anzi;j++) { 431 brow = aj[j]; 432 bnzi = bi[brow+1] - bi[brow]; 433 bjj = bj + bi[brow]; 434 baj = ba + bi[brow]; 435 /* perform sparse axpy */ 436 valtmp = aa[j]; 437 nextb = 0; 438 for (k=0; nextb<bnzi; k++) { 439 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 440 ca[k] += valtmp*baj[nextb++]; 441 } 442 } 443 flops += 2*bnzi; 444 } 445 aj += anzi; aa += anzi; 446 cj += cnzi; ca += cnzi; 447 } 448 449 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 450 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 451 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new" 457 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,Mat *C) 458 { 459 PetscErrorCode ierr; 460 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 461 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 462 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 463 MatScalar *ca; 464 PetscReal afill; 465 466 PetscFunctionBegin; 467 #if defined(DEBUG_MATMATMULT) 468 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 469 #endif 470 /* Get ci and cj */ 471 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 472 473 /* Allocate space for ca */ 474 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 475 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 476 477 /* put together the new symbolic matrix */ 478 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 479 480 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 481 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 482 c = (Mat_SeqAIJ *)((*C)->data); 483 c->free_a = PETSC_TRUE; 484 c->free_ij = PETSC_TRUE; 485 c->nonew = 0; 486 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 487 488 /* set MatInfo */ 489 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 490 if (afill < 1.0) afill = 1.0; 491 c->maxnz = ci[am]; 492 c->nz = ci[am]; 493 (*C)->info.mallocs = nspacedouble; 494 (*C)->info.fill_ratio_given = fill; 495 (*C)->info.fill_ratio_needed = afill; 496 497 #if defined(PETSC_USE_INFO) 498 if (ci[am]) { 499 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 500 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 501 } else { 502 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 503 } 504 #endif 505 PetscFunctionReturn(0); 506 } 507 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 511 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 512 { 513 PetscErrorCode ierr; 514 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 515 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 516 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 517 MatScalar *ca; 518 PetscReal afill; 519 520 PetscFunctionBegin; 521 #if defined(DEBUG_MATMATMULT) 522 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 523 #endif 524 /* Get ci and cj */ 525 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 526 527 /* Allocate space for ca */ 528 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 529 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 530 531 /* put together the new symbolic matrix */ 532 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 533 534 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 535 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 536 c = (Mat_SeqAIJ *)((*C)->data); 537 c->free_a = PETSC_TRUE; 538 c->free_ij = PETSC_TRUE; 539 c->nonew = 0; 540 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 541 542 /* set MatInfo */ 543 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 544 if (afill < 1.0) afill = 1.0; 545 c->maxnz = ci[am]; 546 c->nz = ci[am]; 547 (*C)->info.mallocs = nspacedouble; 548 (*C)->info.fill_ratio_given = fill; 549 (*C)->info.fill_ratio_needed = afill; 550 551 #if defined(PETSC_USE_INFO) 552 if (ci[am]) { 553 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 554 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 555 } else { 556 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 557 } 558 #endif 559 PetscFunctionReturn(0); 560 } 561 562 563 /* This routine is not used. Should be removed! */ 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 566 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 567 { 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 if (scall == MAT_INITIAL_MATRIX){ 572 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 573 } 574 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 580 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 581 { 582 PetscErrorCode ierr; 583 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 584 585 PetscFunctionBegin; 586 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 587 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 588 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 589 ierr = PetscFree(multtrans);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 595 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 596 { 597 PetscErrorCode ierr; 598 PetscContainer container; 599 Mat_MatMatTransMult *multtrans=PETSC_NULL; 600 601 PetscFunctionBegin; 602 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 603 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 604 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 605 A->ops->destroy = multtrans->destroy; 606 if (A->ops->destroy) { 607 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 608 } 609 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 615 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 616 { 617 PetscErrorCode ierr; 618 Mat Bt; 619 PetscInt *bti,*btj; 620 Mat_MatMatTransMult *multtrans; 621 PetscContainer container; 622 PetscLogDouble t0,tf,etime2=0.0; 623 624 PetscFunctionBegin; 625 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 626 /* create symbolic Bt */ 627 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 628 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 629 630 /* get symbolic C=A*Bt */ 631 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 632 633 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 634 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 635 636 /* attach the supporting struct to C */ 637 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 638 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 639 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 640 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 641 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 642 643 multtrans->usecoloring = PETSC_FALSE; 644 multtrans->destroy = (*C)->ops->destroy; 645 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 646 647 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 648 etime2 += tf - t0; 649 650 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 651 if (multtrans->usecoloring){ 652 /* Create MatTransposeColoring from symbolic C=A*B^T */ 653 MatTransposeColoring matcoloring; 654 ISColoring iscoloring; 655 Mat Bt_dense,C_dense; 656 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 657 658 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 659 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 660 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 661 etime0 += tf - t0; 662 663 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 664 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 665 multtrans->matcoloring = matcoloring; 666 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 667 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 668 etime01 += tf - t0; 669 670 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 671 /* Create Bt_dense and C_dense = A*Bt_dense */ 672 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 673 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 674 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 675 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 676 Bt_dense->assembled = PETSC_TRUE; 677 multtrans->Bt_den = Bt_dense; 678 679 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 680 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 681 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 682 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 683 Bt_dense->assembled = PETSC_TRUE; 684 multtrans->ABt_den = C_dense; 685 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 686 etime1 += tf - t0; 687 688 #if defined(PETSC_USE_INFO) 689 { 690 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 691 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 692 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 693 } 694 #endif 695 } 696 /* clean up */ 697 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 698 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 699 700 701 702 #if defined(INEFFICIENT_ALGORITHM) 703 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 704 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 705 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 706 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 707 PetscInt am=A->rmap->N,bm=B->rmap->N; 708 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 709 MatScalar *ca; 710 PetscBT lnkbt; 711 PetscReal afill; 712 713 /* Allocate row pointer array ci */ 714 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 715 ci[0] = 0; 716 717 /* Create and initialize a linked list for C columns */ 718 nlnk = bm+1; 719 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 720 721 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 722 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 723 current_space = free_space; 724 725 /* Determine symbolic info for each row of the product A*B^T: */ 726 for (i=0; i<am; i++) { 727 anzi = ai[i+1] - ai[i]; 728 cnzi = 0; 729 acol = aj + ai[i]; 730 for (j=0; j<bm; j++){ 731 bnzj = bi[j+1] - bi[j]; 732 bcol= bj + bi[j]; 733 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 734 ka = 0; kb = 0; 735 while (ka < anzi && kb < bnzj){ 736 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 737 if (ka == anzi) break; 738 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 739 if (kb == bnzj) break; 740 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 741 index[0] = j; 742 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 743 cnzi++; 744 break; 745 } 746 } 747 } 748 749 /* If free space is not available, make more free space */ 750 /* Double the amount of total space in the list */ 751 if (current_space->local_remaining<cnzi) { 752 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 753 nspacedouble++; 754 } 755 756 /* Copy data into free space, then initialize lnk */ 757 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 758 current_space->array += cnzi; 759 current_space->local_used += cnzi; 760 current_space->local_remaining -= cnzi; 761 762 ci[i+1] = ci[i] + cnzi; 763 } 764 765 766 /* Column indices are in the list of free space. 767 Allocate array cj, copy column indices to cj, and destroy list of free space */ 768 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 769 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 770 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 771 772 /* Allocate space for ca */ 773 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 774 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 775 776 /* put together the new symbolic matrix */ 777 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 778 779 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 780 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 781 c = (Mat_SeqAIJ *)((*C)->data); 782 c->free_a = PETSC_TRUE; 783 c->free_ij = PETSC_TRUE; 784 c->nonew = 0; 785 786 /* set MatInfo */ 787 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 788 if (afill < 1.0) afill = 1.0; 789 c->maxnz = ci[am]; 790 c->nz = ci[am]; 791 (*C)->info.mallocs = nspacedouble; 792 (*C)->info.fill_ratio_given = fill; 793 (*C)->info.fill_ratio_needed = afill; 794 795 #if defined(PETSC_USE_INFO) 796 if (ci[am]) { 797 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 798 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 799 } else { 800 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 801 } 802 #endif 803 #endif 804 PetscFunctionReturn(0); 805 } 806 807 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 810 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 811 { 812 PetscErrorCode ierr; 813 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 814 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 815 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 816 PetscLogDouble flops=0.0; 817 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval; 818 Mat_MatMatTransMult *multtrans; 819 PetscContainer container; 820 #if defined(USE_ARRAY) 821 MatScalar *spdot; 822 #endif 823 824 PetscFunctionBegin; 825 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 826 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 827 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 828 if (multtrans->usecoloring){ 829 MatTransposeColoring matcoloring = multtrans->matcoloring; 830 Mat Bt_dense; 831 PetscInt m,n; 832 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 833 Mat C_dense = multtrans->ABt_den; 834 835 Bt_dense = multtrans->Bt_den; 836 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 837 838 /* Get Bt_dense by Apply MatTransposeColoring to B */ 839 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 840 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 841 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 842 etime0 += tf - t0; 843 844 /* C_dense = A*Bt_dense */ 845 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 846 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 847 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 848 etime2 += tf - t0; 849 850 /* Recover C from C_dense */ 851 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 852 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 853 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 854 etime1 += tf - t0; 855 #if defined(PETSC_USE_INFO) 856 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 857 #endif 858 PetscFunctionReturn(0); 859 } 860 861 #if defined(USE_ARRAY) 862 /* allocate an array for implementing sparse inner-product */ 863 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 864 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 865 #endif 866 867 /* clear old values in C */ 868 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 869 870 for (i=0; i<cm; i++) { 871 anzi = ai[i+1] - ai[i]; 872 acol = aj + ai[i]; 873 aval = aa + ai[i]; 874 cnzi = ci[i+1] - ci[i]; 875 ccol = cj + ci[i]; 876 cval = ca + ci[i]; 877 for (j=0; j<cnzi; j++){ 878 brow = ccol[j]; 879 bnzj = bi[brow+1] - bi[brow]; 880 bcol = bj + bi[brow]; 881 bval = ba + bi[brow]; 882 883 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 884 #if defined(USE_ARRAY) 885 /* put ba to spdot array */ 886 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 887 /* c(i,j)=A[i,:]*B[j,:]^T */ 888 for (nexta=0; nexta<anzi; nexta++){ 889 cval[j] += spdot[acol[nexta]]*aval[nexta]; 890 } 891 /* zero spdot array */ 892 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 893 #else 894 nexta = 0; nextb = 0; 895 while (nexta<anzi && nextb<bnzj){ 896 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 897 if (nexta == anzi) break; 898 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 899 if (nextb == bnzj) break; 900 if (acol[nexta] == bcol[nextb]){ 901 cval[j] += aval[nexta]*bval[nextb]; 902 nexta++; nextb++; 903 flops += 2; 904 } 905 } 906 #endif 907 } 908 } 909 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 910 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 911 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 912 #if defined(USE_ARRAY) 913 ierr = PetscFree(spdot); 914 #endif 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 920 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 if (scall == MAT_INITIAL_MATRIX){ 925 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 926 } 927 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 933 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 934 { 935 PetscErrorCode ierr; 936 Mat At; 937 PetscInt *ati,*atj; 938 939 PetscFunctionBegin; 940 /* create symbolic At */ 941 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 942 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 943 944 /* get symbolic C=At*B */ 945 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 946 947 /* clean up */ 948 ierr = MatDestroy(&At);CHKERRQ(ierr); 949 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 955 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 956 { 957 PetscErrorCode ierr; 958 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 959 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 960 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 961 PetscLogDouble flops=0.0; 962 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 963 964 PetscFunctionBegin; 965 /* clear old values in C */ 966 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 967 968 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 969 for (i=0;i<am;i++) { 970 bj = b->j + bi[i]; 971 ba = b->a + bi[i]; 972 bnzi = bi[i+1] - bi[i]; 973 anzi = ai[i+1] - ai[i]; 974 for (j=0; j<anzi; j++) { 975 nextb = 0; 976 crow = *aj++; 977 cjj = cj + ci[crow]; 978 caj = ca + ci[crow]; 979 /* perform sparse axpy operation. Note cjj includes bj. */ 980 for (k=0; nextb<bnzi; k++) { 981 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 982 caj[k] += (*aa)*(*(ba+nextb)); 983 nextb++; 984 } 985 } 986 flops += 2*bnzi; 987 aa++; 988 } 989 } 990 991 /* Assemble the final matrix and clean up */ 992 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 993 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 EXTERN_C_BEGIN 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 1001 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1002 { 1003 PetscErrorCode ierr; 1004 1005 PetscFunctionBegin; 1006 if (scall == MAT_INITIAL_MATRIX){ 1007 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 1008 } 1009 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 EXTERN_C_END 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 1016 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 1017 { 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1022 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 1023 PetscFunctionReturn(0); 1024 } 1025 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 1028 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1029 { 1030 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1031 PetscErrorCode ierr; 1032 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1033 MatScalar *aa; 1034 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 1035 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 1036 1037 PetscFunctionBegin; 1038 if (!cm || !cn) PetscFunctionReturn(0); 1039 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 1040 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1041 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1042 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1043 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1044 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1045 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1046 colam = col*am; 1047 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1048 r1 = r2 = r3 = r4 = 0.0; 1049 n = a->i[i+1] - a->i[i]; 1050 aj = a->j + a->i[i]; 1051 aa = a->a + a->i[i]; 1052 for (j=0; j<n; j++) { 1053 r1 += (*aa)*b1[*aj]; 1054 r2 += (*aa)*b2[*aj]; 1055 r3 += (*aa)*b3[*aj]; 1056 r4 += (*aa++)*b4[*aj++]; 1057 } 1058 c[colam + i] = r1; 1059 c[colam + am + i] = r2; 1060 c[colam + am2 + i] = r3; 1061 c[colam + am3 + i] = r4; 1062 } 1063 b1 += bm4; 1064 b2 += bm4; 1065 b3 += bm4; 1066 b4 += bm4; 1067 } 1068 for (;col<cn; col++){ /* over extra columns of C */ 1069 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1070 r1 = 0.0; 1071 n = a->i[i+1] - a->i[i]; 1072 aj = a->j + a->i[i]; 1073 aa = a->a + a->i[i]; 1074 1075 for (j=0; j<n; j++) { 1076 r1 += (*aa++)*b1[*aj++]; 1077 } 1078 c[col*am + i] = r1; 1079 } 1080 b1 += bm; 1081 } 1082 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1083 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1084 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1085 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1086 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 /* 1091 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1095 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1096 { 1097 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1100 MatScalar *aa; 1101 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1102 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1103 1104 PetscFunctionBegin; 1105 if (!cm || !cn) PetscFunctionReturn(0); 1106 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1107 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1108 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1109 1110 if (a->compressedrow.use){ /* use compressed row format */ 1111 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1112 colam = col*am; 1113 arm = a->compressedrow.nrows; 1114 ii = a->compressedrow.i; 1115 ridx = a->compressedrow.rindex; 1116 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1117 r1 = r2 = r3 = r4 = 0.0; 1118 n = ii[i+1] - ii[i]; 1119 aj = a->j + ii[i]; 1120 aa = a->a + ii[i]; 1121 for (j=0; j<n; j++) { 1122 r1 += (*aa)*b1[*aj]; 1123 r2 += (*aa)*b2[*aj]; 1124 r3 += (*aa)*b3[*aj]; 1125 r4 += (*aa++)*b4[*aj++]; 1126 } 1127 c[colam + ridx[i]] += r1; 1128 c[colam + am + ridx[i]] += r2; 1129 c[colam + am2 + ridx[i]] += r3; 1130 c[colam + am3 + ridx[i]] += r4; 1131 } 1132 b1 += bm4; 1133 b2 += bm4; 1134 b3 += bm4; 1135 b4 += bm4; 1136 } 1137 for (;col<cn; col++){ /* over extra columns of C */ 1138 colam = col*am; 1139 arm = a->compressedrow.nrows; 1140 ii = a->compressedrow.i; 1141 ridx = a->compressedrow.rindex; 1142 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1143 r1 = 0.0; 1144 n = ii[i+1] - ii[i]; 1145 aj = a->j + ii[i]; 1146 aa = a->a + ii[i]; 1147 1148 for (j=0; j<n; j++) { 1149 r1 += (*aa++)*b1[*aj++]; 1150 } 1151 c[col*am + ridx[i]] += r1; 1152 } 1153 b1 += bm; 1154 } 1155 } else { 1156 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1157 colam = col*am; 1158 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1159 r1 = r2 = r3 = r4 = 0.0; 1160 n = a->i[i+1] - a->i[i]; 1161 aj = a->j + a->i[i]; 1162 aa = a->a + a->i[i]; 1163 for (j=0; j<n; j++) { 1164 r1 += (*aa)*b1[*aj]; 1165 r2 += (*aa)*b2[*aj]; 1166 r3 += (*aa)*b3[*aj]; 1167 r4 += (*aa++)*b4[*aj++]; 1168 } 1169 c[colam + i] += r1; 1170 c[colam + am + i] += r2; 1171 c[colam + am2 + i] += r3; 1172 c[colam + am3 + i] += r4; 1173 } 1174 b1 += bm4; 1175 b2 += bm4; 1176 b3 += bm4; 1177 b4 += bm4; 1178 } 1179 for (;col<cn; col++){ /* over extra columns of C */ 1180 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1181 r1 = 0.0; 1182 n = a->i[i+1] - a->i[i]; 1183 aj = a->j + a->i[i]; 1184 aa = a->a + a->i[i]; 1185 1186 for (j=0; j<n; j++) { 1187 r1 += (*aa++)*b1[*aj++]; 1188 } 1189 c[col*am + i] += r1; 1190 } 1191 b1 += bm; 1192 } 1193 } 1194 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1195 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1196 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1202 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1203 { 1204 PetscErrorCode ierr; 1205 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1206 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1207 PetscInt *bi=b->i,*bj=b->j; 1208 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1209 MatScalar *btval,*btval_den,*ba=b->a; 1210 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1211 1212 PetscFunctionBegin; 1213 btval_den=btdense->v; 1214 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1215 for (k=0; k<ncolors; k++) { 1216 ncolumns = coloring->ncolumns[k]; 1217 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1218 col = *(columns + colorforcol[k] + l); 1219 btcol = bj + bi[col]; 1220 btval = ba + bi[col]; 1221 anz = bi[col+1] - bi[col]; 1222 for (j=0; j<anz; j++){ 1223 brow = btcol[j]; 1224 btval_den[brow] = btval[j]; 1225 } 1226 } 1227 btval_den += m; 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1234 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1235 { 1236 PetscErrorCode ierr; 1237 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1238 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1239 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1240 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1241 1242 PetscFunctionBegin; 1243 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1244 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1245 cp_den = ca_den; 1246 for (k=0; k<ncolors; k++) { 1247 nrows = matcoloring->nrows[k]; 1248 row = rows + colorforrow[k]; 1249 idx = spidx + colorforrow[k]; 1250 for (l=0; l<nrows; l++){ 1251 ca[idx[l]] = cp_den[row[l]]; 1252 } 1253 cp_den += m; 1254 } 1255 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 /* 1260 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1261 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1262 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1263 */ 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1266 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1267 { 1268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1269 PetscErrorCode ierr; 1270 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1271 PetscInt nz = a->i[m],row,*jj,mr,col; 1272 PetscInt *cspidx; 1273 1274 PetscFunctionBegin; 1275 *nn = n; 1276 if (!ia) PetscFunctionReturn(0); 1277 if (symmetric) { 1278 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1279 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1280 } else { 1281 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1282 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1284 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1285 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1286 jj = a->j; 1287 for (i=0; i<nz; i++) { 1288 collengths[jj[i]]++; 1289 } 1290 cia[0] = oshift; 1291 for (i=0; i<n; i++) { 1292 cia[i+1] = cia[i] + collengths[i]; 1293 } 1294 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1295 jj = a->j; 1296 for (row=0; row<m; row++) { 1297 mr = a->i[row+1] - a->i[row]; 1298 for (i=0; i<mr; i++) { 1299 col = *jj++; 1300 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1301 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1302 } 1303 } 1304 ierr = PetscFree(collengths);CHKERRQ(ierr); 1305 *ia = cia; *ja = cja; 1306 *spidx = cspidx; 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1313 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1314 { 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 if (!ia) PetscFunctionReturn(0); 1319 1320 ierr = PetscFree(*ia);CHKERRQ(ierr); 1321 ierr = PetscFree(*ja);CHKERRQ(ierr); 1322 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1328 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1329 { 1330 PetscErrorCode ierr; 1331 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1332 const PetscInt *is; 1333 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1334 IS *isa; 1335 PetscBool done; 1336 PetscBool flg1,flg2; 1337 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1338 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1339 PetscInt *colorforcol,*columns,*columns_i; 1340 1341 PetscFunctionBegin; 1342 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1343 1344 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1345 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1346 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1347 if (flg1 || flg2) { 1348 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1349 } 1350 1351 N = mat->cmap->N/bs; 1352 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1353 c->N = mat->cmap->N/bs; 1354 c->m = mat->rmap->N/bs; 1355 c->rstart = 0; 1356 1357 c->ncolors = nis; 1358 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1359 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1360 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1361 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1362 colorforrow[0] = 0; 1363 rows_i = rows; 1364 columnsforspidx_i = columnsforspidx; 1365 1366 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1367 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1368 colorforcol[0] = 0; 1369 columns_i = columns; 1370 1371 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1372 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1373 1374 cm = c->m; 1375 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1376 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1377 for (i=0; i<nis; i++) { 1378 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1379 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1380 c->ncolumns[i] = n; 1381 if (n) { 1382 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1383 } 1384 colorforcol[i+1] = colorforcol[i] + n; 1385 columns_i += n; 1386 1387 /* fast, crude version requires O(N*N) work */ 1388 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1389 1390 /* loop over columns*/ 1391 for (j=0; j<n; j++) { 1392 col = is[j]; 1393 row_idx = cj + ci[col]; 1394 m = ci[col+1] - ci[col]; 1395 /* loop over columns marking them in rowhit */ 1396 for (k=0; k<m; k++) { 1397 idxhit[*row_idx] = spidx[ci[col] + k]; 1398 rowhit[*row_idx++] = col + 1; 1399 } 1400 } 1401 /* count the number of hits */ 1402 nrows = 0; 1403 for (j=0; j<cm; j++) { 1404 if (rowhit[j]) nrows++; 1405 } 1406 c->nrows[i] = nrows; 1407 colorforrow[i+1] = colorforrow[i] + nrows; 1408 1409 nrows = 0; 1410 for (j=0; j<cm; j++) { 1411 if (rowhit[j]) { 1412 rows_i[nrows] = j; 1413 columnsforspidx_i[nrows] = idxhit[j]; 1414 nrows++; 1415 } 1416 } 1417 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1418 rows_i += nrows; columnsforspidx_i += nrows; 1419 } 1420 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1421 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1422 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1423 #if defined(PETSC_USE_DEBUG) 1424 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1425 #endif 1426 1427 c->colorforrow = colorforrow; 1428 c->rows = rows; 1429 c->columnsforspidx = columnsforspidx; 1430 c->colorforcol = colorforcol; 1431 c->columns = columns; 1432 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435