1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 PetscBool scalable=PETSC_FALSE,scalable_new=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 if (scall == MAT_INITIAL_MATRIX){ 24 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 25 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 26 scalable=PETSC_FALSE; 27 ierr = PetscOptionsBool("-matmatmult_scalable","Use a scalable but slower C=A*B","",scalable,&scalable,PETSC_NULL);CHKERRQ(ierr); 28 if (scalable){ 29 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 30 } 31 ierr = PetscOptionsBool("-matmatmult_scalable_new","Use a scalable but slower C=A*B","",scalable_new,&scalable_new,PETSC_NULL);CHKERRQ(ierr); 32 if (scalable_new){ 33 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,C);CHKERRQ(ierr); 34 }else { 35 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 36 } 37 ierr = PetscOptionsEnd();CHKERRQ(ierr); 38 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 39 } 40 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 41 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 42 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 43 PetscFunctionReturn(0); 44 } 45 EXTERN_C_END 46 47 /* 48 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 49 Input Parameter: 50 . am, Ai, Aj - number of rows and structure of A 51 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 52 . fill - filll ratio See MatMatMult() 53 54 Output Parameter: 55 . Ci, Cj - structure of C = A*B 56 . nspacedouble - number of extra mallocs 57 */ 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ" 60 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 61 { 62 PetscErrorCode ierr; 63 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 64 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 65 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 66 const PetscInt *aj=Aj,*bi=Bi,*bj=Bj,*bjj; 67 PetscInt *ci,*cj; 68 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,ndouble=0; 69 PetscBT lnkbt; 70 71 PetscFunctionBegin; 72 /* Allocate ci array, arrays for fill computation and */ 73 /* free space for accumulating nonzero column info */ 74 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 75 ci[0] = 0; 76 77 /* create and initialize a linked list */ 78 nlnk = bn+1; 79 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 80 81 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 82 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 83 current_space = free_space; 84 85 /* Determine ci and cj */ 86 for (i=0; i<am; i++) { 87 anzi = Ai[i+1] - Ai[i]; 88 cnzi = 0; 89 aj = Aj + Ai[i]; 90 for (j=0; j<anzi; j++){ 91 brow = aj[j]; 92 bnzj = bi[brow+1] - bi[brow]; 93 bjj = bj + bi[brow]; 94 /* add non-zero cols of B into the sorted linked list lnk */ 95 ierr = PetscLLAddSorted(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 96 cnzi += nlnk; 97 } 98 99 /* If free space is not available, make more free space */ 100 /* Double the amount of total space in the list */ 101 if (current_space->local_remaining<cnzi) { 102 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 103 ndouble++; 104 } 105 106 /* Copy data into free space, then initialize lnk */ 107 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 108 current_space->array += cnzi; 109 current_space->local_used += cnzi; 110 current_space->local_remaining -= cnzi; 111 ci[i+1] = ci[i] + cnzi; 112 } 113 114 /* Column indices are in the list of free space */ 115 /* Allocate space for cj, initialize cj, and */ 116 /* destroy list of free space and other temporary array(s) */ 117 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 118 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 119 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 120 121 *Ci = ci; 122 *Cj = cj; 123 *nspacedouble = ndouble; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable" 129 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 130 { 131 PetscErrorCode ierr; 132 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 133 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 134 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 135 const PetscInt *aj=Aj,*bi=Bi,*bj; 136 PetscInt *ci,*cj; 137 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 138 PetscBT bt; 139 PetscInt nlnk_max,lnk_max=bn,*lnk,*nlnk; 140 141 PetscFunctionBegin; 142 /* Allocate ci array, arrays for fill computation and */ 143 /* free space for accumulating nonzero column info */ 144 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 145 ci[0] = 0; 146 147 /* create and initialize a condensed linked list */ 148 crmax = a->rmax*b->rmax; 149 nlnk_max = PetscMin(crmax,bn); 150 if (!nlnk_max && bn) { 151 nlnk_max = bn; /* in case rmax is not defined for A or B */ 152 #if defined(PETSC_USE_DEBUGGING) 153 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 154 #endif 155 } 156 #if defined(DEBUG_MATMATMULT) 157 ierr = (PETSC_COMM_SELF,"LLCondensedCreate nlnk_max=%d, bn %d, crmax %d\n",nlnk_max,bn,crmax); 158 #endif 159 ierr = PetscLLCondensedCreate(nlnk_max,lnk_max,lnk,nlnk,bt);CHKERRQ(ierr); 160 161 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 162 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 163 current_space = free_space; 164 165 /* Determine ci and cj */ 166 for (i=0; i<am; i++) { 167 anzi = Ai[i+1] - Ai[i]; 168 cnzi = 0; 169 aj = Aj + Ai[i]; 170 for (j=0; j<anzi; j++){ 171 brow = aj[j]; 172 bnzj = bi[brow+1] - bi[brow]; 173 bj = Bj + bi[brow]; 174 /* add non-zero cols of B into the condensed sorted linked list lnk */ 175 ierr = PetscLLCondensedAddSorted(nlnk_max,lnk_max,bnzj,bj,nlnk,lnk,bt);CHKERRQ(ierr); 176 } 177 cnzi = *nlnk; 178 ci[i+1] = ci[i] + cnzi; 179 180 /* If free space is not available, make more free space */ 181 /* Double the amount of total space in the list */ 182 if (current_space->local_remaining<cnzi) { 183 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 184 ndouble++; 185 } 186 187 /* Copy data into free space, then initialize lnk */ 188 ierr = PetscLLCondensedClean(nlnk_max,lnk_max,cnzi,current_space->array,nlnk,lnk,bt);CHKERRQ(ierr); 189 current_space->array += cnzi; 190 current_space->local_used += cnzi; 191 current_space->local_remaining -= cnzi; 192 } 193 194 /* Column indices are in the list of free space */ 195 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 196 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 197 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 198 ierr = PetscLLCondensedDestroy(lnk,bt);CHKERRQ(ierr); 199 200 *Ci = ci; 201 *Cj = cj; 202 *nspacedouble = ndouble; 203 PetscFunctionReturn(0); 204 } 205 206 /* 207 Does not use bitarray 208 */ 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new" 211 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 212 { 213 PetscErrorCode ierr; 214 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 215 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 216 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 217 const PetscInt *aj=Aj,*bi=Bi,*bj; 218 PetscInt *ci,*cj; 219 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 220 PetscInt lnk_max=bn,*lnk; 221 222 PetscFunctionBegin; 223 /* Allocate ci array, arrays for fill computation and */ 224 /* free space for accumulating nonzero column info */ 225 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 226 ci[0] = 0; 227 228 /* create and initialize a condensed linked list */ 229 crmax = a->rmax*b->rmax; 230 lnk_max = PetscMin(crmax,bn); 231 if (!lnk_max && bn) { 232 lnk_max = bn; /* in case rmax is not defined for A or B */ 233 #if defined(PETSC_USE_DEBUGGING) 234 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 235 #endif 236 } 237 #if defined(DEBUG_MATMATMULT) 238 ierr = (PETSC_COMM_SELF,"LLCondensedCreate lnk_max=%d, bn %d, crmax %d\n",lnk_max,bn,crmax); 239 #endif 240 ierr = PetscLLCondensedCreate_new(lnk_max,&lnk);CHKERRQ(ierr); 241 242 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 243 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 244 current_space = free_space; 245 246 /* Determine ci and cj */ 247 for (i=0; i<am; i++) { 248 anzi = Ai[i+1] - Ai[i]; 249 cnzi = 0; 250 aj = Aj + Ai[i]; 251 for (j=0; j<anzi; j++){ 252 brow = aj[j]; 253 bnzj = bi[brow+1] - bi[brow]; 254 bj = Bj + bi[brow]; 255 /* add non-zero cols of B into the condensed sorted linked list lnk */ 256 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 257 ierr = PetscLLCondensedAddSorted_new(bnzj,bj,lnk);CHKERRQ(ierr); 258 } 259 cnzi = lnk[0]; // lnk[1]; 260 ci[i+1] = ci[i] + cnzi; 261 262 /* If free space is not available, make more free space */ 263 /* Double the amount of total space in the list */ 264 if (current_space->local_remaining<cnzi) { 265 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 266 ndouble++; 267 } 268 269 /* Copy data into free space, then initialize lnk */ 270 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 271 ierr = PetscLLCondensedClean_new(cnzi,current_space->array,lnk);CHKERRQ(ierr); 272 PetscIntView(cnzi,current_space->array,0); 273 current_space->array += cnzi; 274 current_space->local_used += cnzi; 275 current_space->local_remaining -= cnzi; 276 } 277 278 /* Column indices are in the list of free space */ 279 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 280 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 281 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 282 ierr = PetscLLCondensedDestroy_new(lnk);CHKERRQ(ierr); 283 284 *Ci = ci; 285 *Cj = cj; 286 *nspacedouble = ndouble; 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 292 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 293 { 294 PetscErrorCode ierr; 295 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 296 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 297 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 298 MatScalar *ca; 299 PetscReal afill; 300 301 PetscFunctionBegin; 302 #if defined(DEBUG_MATMATMULT) 303 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ...\n");CHKERRQ(ierr); 304 #endif 305 /* Get ci and cj */ 306 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 307 308 /* Allocate space for ca */ 309 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 310 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 311 312 /* put together the new symbolic matrix */ 313 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 314 315 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 316 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 317 c = (Mat_SeqAIJ *)((*C)->data); 318 c->free_a = PETSC_TRUE; 319 c->free_ij = PETSC_TRUE; 320 c->nonew = 0; 321 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 322 ierr = PetscMalloc(bn*sizeof(PetscScalar),&c->matmult_abdense);CHKERRQ(ierr); 323 ierr = PetscMemzero(c->matmult_abdense,bn*sizeof(PetscScalar));CHKERRQ(ierr); 324 325 /* set MatInfo */ 326 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 327 if (afill < 1.0) afill = 1.0; 328 c->maxnz = ci[am]; 329 c->nz = ci[am]; 330 (*C)->info.mallocs = nspacedouble; 331 (*C)->info.fill_ratio_given = fill; 332 (*C)->info.fill_ratio_needed = afill; 333 334 #if defined(PETSC_USE_INFO) 335 if (ci[am]) { 336 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 337 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 338 } else { 339 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 340 } 341 #endif 342 PetscFunctionReturn(0); 343 } 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 347 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 348 { 349 PetscErrorCode ierr; 350 PetscLogDouble flops=0.0; 351 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 352 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 353 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 354 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 355 PetscInt am=A->rmap->n,cm=C->rmap->n; 356 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 357 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 358 PetscScalar *ab_dense=c->matmult_abdense; 359 360 PetscFunctionBegin; 361 /* clean old values in C */ 362 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 363 /* Traverse A row-wise. */ 364 /* Build the ith row in C by summing over nonzero columns in A, */ 365 /* the rows of B corresponding to nonzeros of A. */ 366 for (i=0; i<am; i++) { 367 anzi = ai[i+1] - ai[i]; 368 for (j=0; j<anzi; j++) { 369 brow = aj[j]; 370 bnzi = bi[brow+1] - bi[brow]; 371 bjj = bj + bi[brow]; 372 baj = ba + bi[brow]; 373 /* perform dense axpy */ 374 valtmp = aa[j]; 375 for (k=0; k<bnzi; k++) { 376 ab_dense[bjj[k]] += valtmp*baj[k]; 377 } 378 flops += 2*bnzi; 379 } 380 aj += anzi; aa += anzi; 381 382 cnzi = ci[i+1] - ci[i]; 383 for (k=0; k<cnzi; k++) { 384 ca[k] += ab_dense[cj[k]]; 385 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 386 } 387 flops += cnzi; 388 cj += cnzi; ca += cnzi; 389 } 390 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 391 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 392 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 398 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 399 { 400 PetscErrorCode ierr; 401 PetscLogDouble flops=0.0; 402 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 403 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 404 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 405 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 406 PetscInt am=A->rmap->N,cm=C->rmap->N; 407 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 408 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 409 PetscInt nextb; 410 411 PetscFunctionBegin; 412 #if defined(DEBUG_MATMATMULT) 413 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr); 414 #endif 415 /* clean old values in C */ 416 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 417 /* Traverse A row-wise. */ 418 /* Build the ith row in C by summing over nonzero columns in A, */ 419 /* the rows of B corresponding to nonzeros of A. */ 420 for (i=0;i<am;i++) { 421 anzi = ai[i+1] - ai[i]; 422 cnzi = ci[i+1] - ci[i]; 423 for (j=0;j<anzi;j++) { 424 brow = aj[j]; 425 bnzi = bi[brow+1] - bi[brow]; 426 bjj = bj + bi[brow]; 427 baj = ba + bi[brow]; 428 /* perform sparse axpy */ 429 valtmp = aa[j]; 430 nextb = 0; 431 for (k=0; nextb<bnzi; k++) { 432 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 433 ca[k] += valtmp*baj[nextb++]; 434 } 435 } 436 flops += 2*bnzi; 437 } 438 aj += anzi; aa += anzi; 439 cj += cnzi; ca += cnzi; 440 } 441 442 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 443 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 444 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new" 450 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,Mat *C) 451 { 452 PetscErrorCode ierr; 453 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 454 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 455 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 456 MatScalar *ca; 457 PetscReal afill; 458 459 PetscFunctionBegin; 460 #if defined(DEBUG_MATMATMULT) 461 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 462 #endif 463 /* Get ci and cj */ 464 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 465 466 /* Allocate space for ca */ 467 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 468 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 469 470 /* put together the new symbolic matrix */ 471 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 472 473 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 474 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 475 c = (Mat_SeqAIJ *)((*C)->data); 476 c->free_a = PETSC_TRUE; 477 c->free_ij = PETSC_TRUE; 478 c->nonew = 0; 479 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 480 481 /* set MatInfo */ 482 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 483 if (afill < 1.0) afill = 1.0; 484 c->maxnz = ci[am]; 485 c->nz = ci[am]; 486 (*C)->info.mallocs = nspacedouble; 487 (*C)->info.fill_ratio_given = fill; 488 (*C)->info.fill_ratio_needed = afill; 489 490 #if defined(PETSC_USE_INFO) 491 if (ci[am]) { 492 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 493 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 494 } else { 495 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 496 } 497 #endif 498 PetscFunctionReturn(0); 499 } 500 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 504 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 505 { 506 PetscErrorCode ierr; 507 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 508 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 509 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 510 MatScalar *ca; 511 PetscReal afill; 512 513 PetscFunctionBegin; 514 #if defined(DEBUG_MATMATMULT) 515 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 516 #endif 517 /* Get ci and cj */ 518 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 519 520 /* Allocate space for ca */ 521 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 522 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 523 524 /* put together the new symbolic matrix */ 525 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 526 527 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 528 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 529 c = (Mat_SeqAIJ *)((*C)->data); 530 c->free_a = PETSC_TRUE; 531 c->free_ij = PETSC_TRUE; 532 c->nonew = 0; 533 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 534 535 /* set MatInfo */ 536 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 537 if (afill < 1.0) afill = 1.0; 538 c->maxnz = ci[am]; 539 c->nz = ci[am]; 540 (*C)->info.mallocs = nspacedouble; 541 (*C)->info.fill_ratio_given = fill; 542 (*C)->info.fill_ratio_needed = afill; 543 544 #if defined(PETSC_USE_INFO) 545 if (ci[am]) { 546 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 547 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 548 } else { 549 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 550 } 551 #endif 552 PetscFunctionReturn(0); 553 } 554 555 556 /* This routine is not used. Should be removed! */ 557 #undef __FUNCT__ 558 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 559 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 560 { 561 PetscErrorCode ierr; 562 563 PetscFunctionBegin; 564 if (scall == MAT_INITIAL_MATRIX){ 565 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 566 } 567 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 573 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 574 { 575 PetscErrorCode ierr; 576 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 577 578 PetscFunctionBegin; 579 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 580 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 581 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 582 ierr = PetscFree(multtrans);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 588 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 589 { 590 PetscErrorCode ierr; 591 PetscContainer container; 592 Mat_MatMatTransMult *multtrans=PETSC_NULL; 593 594 PetscFunctionBegin; 595 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 596 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 597 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 598 A->ops->destroy = multtrans->destroy; 599 if (A->ops->destroy) { 600 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 601 } 602 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 608 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 609 { 610 PetscErrorCode ierr; 611 Mat Bt; 612 PetscInt *bti,*btj; 613 Mat_MatMatTransMult *multtrans; 614 PetscContainer container; 615 PetscLogDouble t0,tf,etime2=0.0; 616 617 PetscFunctionBegin; 618 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 619 /* create symbolic Bt */ 620 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 621 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 622 623 /* get symbolic C=A*Bt */ 624 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 625 626 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 627 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 628 629 /* attach the supporting struct to C */ 630 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 631 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 632 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 633 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 634 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 635 636 multtrans->usecoloring = PETSC_FALSE; 637 multtrans->destroy = (*C)->ops->destroy; 638 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 639 640 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 641 etime2 += tf - t0; 642 643 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 644 if (multtrans->usecoloring){ 645 /* Create MatTransposeColoring from symbolic C=A*B^T */ 646 MatTransposeColoring matcoloring; 647 ISColoring iscoloring; 648 Mat Bt_dense,C_dense; 649 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 650 651 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 652 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 653 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 654 etime0 += tf - t0; 655 656 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 657 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 658 multtrans->matcoloring = matcoloring; 659 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 660 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 661 etime01 += tf - t0; 662 663 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 664 /* Create Bt_dense and C_dense = A*Bt_dense */ 665 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 666 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 667 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 668 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 669 Bt_dense->assembled = PETSC_TRUE; 670 multtrans->Bt_den = Bt_dense; 671 672 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 673 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 674 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 675 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 676 Bt_dense->assembled = PETSC_TRUE; 677 multtrans->ABt_den = C_dense; 678 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 679 etime1 += tf - t0; 680 681 #if defined(PETSC_USE_INFO) 682 { 683 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 684 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 685 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 686 } 687 #endif 688 } 689 /* clean up */ 690 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 691 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 692 693 694 695 #if defined(INEFFICIENT_ALGORITHM) 696 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 697 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 698 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 699 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 700 PetscInt am=A->rmap->N,bm=B->rmap->N; 701 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 702 MatScalar *ca; 703 PetscBT lnkbt; 704 PetscReal afill; 705 706 /* Allocate row pointer array ci */ 707 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 708 ci[0] = 0; 709 710 /* Create and initialize a linked list for C columns */ 711 nlnk = bm+1; 712 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 713 714 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 715 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 716 current_space = free_space; 717 718 /* Determine symbolic info for each row of the product A*B^T: */ 719 for (i=0; i<am; i++) { 720 anzi = ai[i+1] - ai[i]; 721 cnzi = 0; 722 acol = aj + ai[i]; 723 for (j=0; j<bm; j++){ 724 bnzj = bi[j+1] - bi[j]; 725 bcol= bj + bi[j]; 726 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 727 ka = 0; kb = 0; 728 while (ka < anzi && kb < bnzj){ 729 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 730 if (ka == anzi) break; 731 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 732 if (kb == bnzj) break; 733 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 734 index[0] = j; 735 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 736 cnzi++; 737 break; 738 } 739 } 740 } 741 742 /* If free space is not available, make more free space */ 743 /* Double the amount of total space in the list */ 744 if (current_space->local_remaining<cnzi) { 745 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 746 nspacedouble++; 747 } 748 749 /* Copy data into free space, then initialize lnk */ 750 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 751 current_space->array += cnzi; 752 current_space->local_used += cnzi; 753 current_space->local_remaining -= cnzi; 754 755 ci[i+1] = ci[i] + cnzi; 756 } 757 758 759 /* Column indices are in the list of free space. 760 Allocate array cj, copy column indices to cj, and destroy list of free space */ 761 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 762 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 763 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 764 765 /* Allocate space for ca */ 766 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 767 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 768 769 /* put together the new symbolic matrix */ 770 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 771 772 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 773 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 774 c = (Mat_SeqAIJ *)((*C)->data); 775 c->free_a = PETSC_TRUE; 776 c->free_ij = PETSC_TRUE; 777 c->nonew = 0; 778 779 /* set MatInfo */ 780 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 781 if (afill < 1.0) afill = 1.0; 782 c->maxnz = ci[am]; 783 c->nz = ci[am]; 784 (*C)->info.mallocs = nspacedouble; 785 (*C)->info.fill_ratio_given = fill; 786 (*C)->info.fill_ratio_needed = afill; 787 788 #if defined(PETSC_USE_INFO) 789 if (ci[am]) { 790 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 791 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 792 } else { 793 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 794 } 795 #endif 796 #endif 797 PetscFunctionReturn(0); 798 } 799 800 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 803 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 804 { 805 PetscErrorCode ierr; 806 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 807 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 808 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 809 PetscLogDouble flops=0.0; 810 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval; 811 Mat_MatMatTransMult *multtrans; 812 PetscContainer container; 813 #if defined(USE_ARRAY) 814 MatScalar *spdot; 815 #endif 816 817 PetscFunctionBegin; 818 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 819 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 820 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 821 if (multtrans->usecoloring){ 822 MatTransposeColoring matcoloring = multtrans->matcoloring; 823 Mat Bt_dense; 824 PetscInt m,n; 825 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 826 Mat C_dense = multtrans->ABt_den; 827 828 Bt_dense = multtrans->Bt_den; 829 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 830 831 /* Get Bt_dense by Apply MatTransposeColoring to B */ 832 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 833 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 834 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 835 etime0 += tf - t0; 836 837 /* C_dense = A*Bt_dense */ 838 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 839 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 840 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 841 etime2 += tf - t0; 842 843 /* Recover C from C_dense */ 844 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 845 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 846 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 847 etime1 += tf - t0; 848 #if defined(PETSC_USE_INFO) 849 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 850 #endif 851 PetscFunctionReturn(0); 852 } 853 854 #if defined(USE_ARRAY) 855 /* allocate an array for implementing sparse inner-product */ 856 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 857 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 858 #endif 859 860 /* clear old values in C */ 861 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 862 863 for (i=0; i<cm; i++) { 864 anzi = ai[i+1] - ai[i]; 865 acol = aj + ai[i]; 866 aval = aa + ai[i]; 867 cnzi = ci[i+1] - ci[i]; 868 ccol = cj + ci[i]; 869 cval = ca + ci[i]; 870 for (j=0; j<cnzi; j++){ 871 brow = ccol[j]; 872 bnzj = bi[brow+1] - bi[brow]; 873 bcol = bj + bi[brow]; 874 bval = ba + bi[brow]; 875 876 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 877 #if defined(USE_ARRAY) 878 /* put ba to spdot array */ 879 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 880 /* c(i,j)=A[i,:]*B[j,:]^T */ 881 for (nexta=0; nexta<anzi; nexta++){ 882 cval[j] += spdot[acol[nexta]]*aval[nexta]; 883 } 884 /* zero spdot array */ 885 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 886 #else 887 nexta = 0; nextb = 0; 888 while (nexta<anzi && nextb<bnzj){ 889 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 890 if (nexta == anzi) break; 891 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 892 if (nextb == bnzj) break; 893 if (acol[nexta] == bcol[nextb]){ 894 cval[j] += aval[nexta]*bval[nextb]; 895 nexta++; nextb++; 896 flops += 2; 897 } 898 } 899 #endif 900 } 901 } 902 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 904 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 905 #if defined(USE_ARRAY) 906 ierr = PetscFree(spdot); 907 #endif 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 913 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 914 PetscErrorCode ierr; 915 916 PetscFunctionBegin; 917 if (scall == MAT_INITIAL_MATRIX){ 918 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 919 } 920 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 926 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 927 { 928 PetscErrorCode ierr; 929 Mat At; 930 PetscInt *ati,*atj; 931 932 PetscFunctionBegin; 933 /* create symbolic At */ 934 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 935 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 936 937 /* get symbolic C=At*B */ 938 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 939 940 /* clean up */ 941 ierr = MatDestroy(&At);CHKERRQ(ierr); 942 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 948 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 949 { 950 PetscErrorCode ierr; 951 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 952 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 953 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 954 PetscLogDouble flops=0.0; 955 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 956 957 PetscFunctionBegin; 958 /* clear old values in C */ 959 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 960 961 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 962 for (i=0;i<am;i++) { 963 bj = b->j + bi[i]; 964 ba = b->a + bi[i]; 965 bnzi = bi[i+1] - bi[i]; 966 anzi = ai[i+1] - ai[i]; 967 for (j=0; j<anzi; j++) { 968 nextb = 0; 969 crow = *aj++; 970 cjj = cj + ci[crow]; 971 caj = ca + ci[crow]; 972 /* perform sparse axpy operation. Note cjj includes bj. */ 973 for (k=0; nextb<bnzi; k++) { 974 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 975 caj[k] += (*aa)*(*(ba+nextb)); 976 nextb++; 977 } 978 } 979 flops += 2*bnzi; 980 aa++; 981 } 982 } 983 984 /* Assemble the final matrix and clean up */ 985 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 EXTERN_C_BEGIN 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 994 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 995 { 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 if (scall == MAT_INITIAL_MATRIX){ 1000 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 1001 } 1002 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 EXTERN_C_END 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 1009 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 1010 { 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1015 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 1021 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1022 { 1023 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1024 PetscErrorCode ierr; 1025 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1026 MatScalar *aa; 1027 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 1028 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 1029 1030 PetscFunctionBegin; 1031 if (!cm || !cn) PetscFunctionReturn(0); 1032 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 1033 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1034 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1035 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1036 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1037 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1038 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1039 colam = col*am; 1040 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1041 r1 = r2 = r3 = r4 = 0.0; 1042 n = a->i[i+1] - a->i[i]; 1043 aj = a->j + a->i[i]; 1044 aa = a->a + a->i[i]; 1045 for (j=0; j<n; j++) { 1046 r1 += (*aa)*b1[*aj]; 1047 r2 += (*aa)*b2[*aj]; 1048 r3 += (*aa)*b3[*aj]; 1049 r4 += (*aa++)*b4[*aj++]; 1050 } 1051 c[colam + i] = r1; 1052 c[colam + am + i] = r2; 1053 c[colam + am2 + i] = r3; 1054 c[colam + am3 + i] = r4; 1055 } 1056 b1 += bm4; 1057 b2 += bm4; 1058 b3 += bm4; 1059 b4 += bm4; 1060 } 1061 for (;col<cn; col++){ /* over extra columns of C */ 1062 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1063 r1 = 0.0; 1064 n = a->i[i+1] - a->i[i]; 1065 aj = a->j + a->i[i]; 1066 aa = a->a + a->i[i]; 1067 1068 for (j=0; j<n; j++) { 1069 r1 += (*aa++)*b1[*aj++]; 1070 } 1071 c[col*am + i] = r1; 1072 } 1073 b1 += bm; 1074 } 1075 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1076 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1077 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1078 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1079 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 /* 1084 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1085 */ 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1088 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1089 { 1090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1093 MatScalar *aa; 1094 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1095 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1096 1097 PetscFunctionBegin; 1098 if (!cm || !cn) PetscFunctionReturn(0); 1099 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1100 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1101 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1102 1103 if (a->compressedrow.use){ /* use compressed row format */ 1104 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1105 colam = col*am; 1106 arm = a->compressedrow.nrows; 1107 ii = a->compressedrow.i; 1108 ridx = a->compressedrow.rindex; 1109 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1110 r1 = r2 = r3 = r4 = 0.0; 1111 n = ii[i+1] - ii[i]; 1112 aj = a->j + ii[i]; 1113 aa = a->a + ii[i]; 1114 for (j=0; j<n; j++) { 1115 r1 += (*aa)*b1[*aj]; 1116 r2 += (*aa)*b2[*aj]; 1117 r3 += (*aa)*b3[*aj]; 1118 r4 += (*aa++)*b4[*aj++]; 1119 } 1120 c[colam + ridx[i]] += r1; 1121 c[colam + am + ridx[i]] += r2; 1122 c[colam + am2 + ridx[i]] += r3; 1123 c[colam + am3 + ridx[i]] += r4; 1124 } 1125 b1 += bm4; 1126 b2 += bm4; 1127 b3 += bm4; 1128 b4 += bm4; 1129 } 1130 for (;col<cn; col++){ /* over extra columns of C */ 1131 colam = col*am; 1132 arm = a->compressedrow.nrows; 1133 ii = a->compressedrow.i; 1134 ridx = a->compressedrow.rindex; 1135 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1136 r1 = 0.0; 1137 n = ii[i+1] - ii[i]; 1138 aj = a->j + ii[i]; 1139 aa = a->a + ii[i]; 1140 1141 for (j=0; j<n; j++) { 1142 r1 += (*aa++)*b1[*aj++]; 1143 } 1144 c[col*am + ridx[i]] += r1; 1145 } 1146 b1 += bm; 1147 } 1148 } else { 1149 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1150 colam = col*am; 1151 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1152 r1 = r2 = r3 = r4 = 0.0; 1153 n = a->i[i+1] - a->i[i]; 1154 aj = a->j + a->i[i]; 1155 aa = a->a + a->i[i]; 1156 for (j=0; j<n; j++) { 1157 r1 += (*aa)*b1[*aj]; 1158 r2 += (*aa)*b2[*aj]; 1159 r3 += (*aa)*b3[*aj]; 1160 r4 += (*aa++)*b4[*aj++]; 1161 } 1162 c[colam + i] += r1; 1163 c[colam + am + i] += r2; 1164 c[colam + am2 + i] += r3; 1165 c[colam + am3 + i] += r4; 1166 } 1167 b1 += bm4; 1168 b2 += bm4; 1169 b3 += bm4; 1170 b4 += bm4; 1171 } 1172 for (;col<cn; col++){ /* over extra columns of C */ 1173 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1174 r1 = 0.0; 1175 n = a->i[i+1] - a->i[i]; 1176 aj = a->j + a->i[i]; 1177 aa = a->a + a->i[i]; 1178 1179 for (j=0; j<n; j++) { 1180 r1 += (*aa++)*b1[*aj++]; 1181 } 1182 c[col*am + i] += r1; 1183 } 1184 b1 += bm; 1185 } 1186 } 1187 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1188 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1189 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1195 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1196 { 1197 PetscErrorCode ierr; 1198 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1199 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1200 PetscInt *bi=b->i,*bj=b->j; 1201 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1202 MatScalar *btval,*btval_den,*ba=b->a; 1203 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1204 1205 PetscFunctionBegin; 1206 btval_den=btdense->v; 1207 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1208 for (k=0; k<ncolors; k++) { 1209 ncolumns = coloring->ncolumns[k]; 1210 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1211 col = *(columns + colorforcol[k] + l); 1212 btcol = bj + bi[col]; 1213 btval = ba + bi[col]; 1214 anz = bi[col+1] - bi[col]; 1215 for (j=0; j<anz; j++){ 1216 brow = btcol[j]; 1217 btval_den[brow] = btval[j]; 1218 } 1219 } 1220 btval_den += m; 1221 } 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1227 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1228 { 1229 PetscErrorCode ierr; 1230 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1231 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1232 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1233 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1234 1235 PetscFunctionBegin; 1236 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1237 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1238 cp_den = ca_den; 1239 for (k=0; k<ncolors; k++) { 1240 nrows = matcoloring->nrows[k]; 1241 row = rows + colorforrow[k]; 1242 idx = spidx + colorforrow[k]; 1243 for (l=0; l<nrows; l++){ 1244 ca[idx[l]] = cp_den[row[l]]; 1245 } 1246 cp_den += m; 1247 } 1248 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /* 1253 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1254 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1255 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1256 */ 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1259 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1260 { 1261 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1262 PetscErrorCode ierr; 1263 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1264 PetscInt nz = a->i[m],row,*jj,mr,col; 1265 PetscInt *cspidx; 1266 1267 PetscFunctionBegin; 1268 *nn = n; 1269 if (!ia) PetscFunctionReturn(0); 1270 if (symmetric) { 1271 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1272 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1273 } else { 1274 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1275 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1276 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1277 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1278 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1279 jj = a->j; 1280 for (i=0; i<nz; i++) { 1281 collengths[jj[i]]++; 1282 } 1283 cia[0] = oshift; 1284 for (i=0; i<n; i++) { 1285 cia[i+1] = cia[i] + collengths[i]; 1286 } 1287 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1288 jj = a->j; 1289 for (row=0; row<m; row++) { 1290 mr = a->i[row+1] - a->i[row]; 1291 for (i=0; i<mr; i++) { 1292 col = *jj++; 1293 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1294 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1295 } 1296 } 1297 ierr = PetscFree(collengths);CHKERRQ(ierr); 1298 *ia = cia; *ja = cja; 1299 *spidx = cspidx; 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1306 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1307 { 1308 PetscErrorCode ierr; 1309 1310 PetscFunctionBegin; 1311 if (!ia) PetscFunctionReturn(0); 1312 1313 ierr = PetscFree(*ia);CHKERRQ(ierr); 1314 ierr = PetscFree(*ja);CHKERRQ(ierr); 1315 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1321 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1322 { 1323 PetscErrorCode ierr; 1324 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1325 const PetscInt *is; 1326 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1327 IS *isa; 1328 PetscBool done; 1329 PetscBool flg1,flg2; 1330 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1331 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1332 PetscInt *colorforcol,*columns,*columns_i; 1333 1334 PetscFunctionBegin; 1335 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1336 1337 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1338 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1339 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1340 if (flg1 || flg2) { 1341 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1342 } 1343 1344 N = mat->cmap->N/bs; 1345 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1346 c->N = mat->cmap->N/bs; 1347 c->m = mat->rmap->N/bs; 1348 c->rstart = 0; 1349 1350 c->ncolors = nis; 1351 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1352 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1353 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1354 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1355 colorforrow[0] = 0; 1356 rows_i = rows; 1357 columnsforspidx_i = columnsforspidx; 1358 1359 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1360 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1361 colorforcol[0] = 0; 1362 columns_i = columns; 1363 1364 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1365 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1366 1367 cm = c->m; 1368 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1369 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1370 for (i=0; i<nis; i++) { 1371 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1372 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1373 c->ncolumns[i] = n; 1374 if (n) { 1375 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1376 } 1377 colorforcol[i+1] = colorforcol[i] + n; 1378 columns_i += n; 1379 1380 /* fast, crude version requires O(N*N) work */ 1381 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1382 1383 /* loop over columns*/ 1384 for (j=0; j<n; j++) { 1385 col = is[j]; 1386 row_idx = cj + ci[col]; 1387 m = ci[col+1] - ci[col]; 1388 /* loop over columns marking them in rowhit */ 1389 for (k=0; k<m; k++) { 1390 idxhit[*row_idx] = spidx[ci[col] + k]; 1391 rowhit[*row_idx++] = col + 1; 1392 } 1393 } 1394 /* count the number of hits */ 1395 nrows = 0; 1396 for (j=0; j<cm; j++) { 1397 if (rowhit[j]) nrows++; 1398 } 1399 c->nrows[i] = nrows; 1400 colorforrow[i+1] = colorforrow[i] + nrows; 1401 1402 nrows = 0; 1403 for (j=0; j<cm; j++) { 1404 if (rowhit[j]) { 1405 rows_i[nrows] = j; 1406 columnsforspidx_i[nrows] = idxhit[j]; 1407 nrows++; 1408 } 1409 } 1410 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1411 rows_i += nrows; columnsforspidx_i += nrows; 1412 } 1413 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1414 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1415 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1416 #if defined(PETSC_USE_DEBUG) 1417 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1418 #endif 1419 1420 c->colorforrow = colorforrow; 1421 c->rows = rows; 1422 c->columnsforspidx = columnsforspidx; 1423 c->colorforcol = colorforcol; 1424 c->columns = columns; 1425 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428