1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 971 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 972 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 974 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 976 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 977 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 978 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } else if (format == PETSC_VIEWER_ASCII_INFO) { 981 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 984 PetscFunctionReturn(0); 985 } 986 } 987 988 if (isdraw) { 989 PetscDraw draw; 990 PetscBool isnull; 991 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 992 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 993 } 994 995 if (size == 1) { 996 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 997 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 998 } else { 999 /* assemble the entire matrix onto first processor. */ 1000 Mat A; 1001 Mat_SeqBAIJ *Aloc; 1002 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1003 MatScalar *a; 1004 1005 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1006 /* Perhaps this should be the type of mat? */ 1007 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1008 if (!rank) { 1009 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1010 } else { 1011 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1012 } 1013 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1014 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1015 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1016 1017 /* copy over the A part */ 1018 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1019 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1020 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1021 1022 for (i=0; i<mbs; i++) { 1023 rvals[0] = bs*(baij->rstartbs + i); 1024 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1025 for (j=ai[i]; j<ai[i+1]; j++) { 1026 col = (baij->cstartbs+aj[j])*bs; 1027 for (k=0; k<bs; k++) { 1028 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1029 col++; a += bs; 1030 } 1031 } 1032 } 1033 /* copy over the B part */ 1034 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1035 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1036 for (i=0; i<mbs; i++) { 1037 rvals[0] = bs*(baij->rstartbs + i); 1038 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1039 for (j=ai[i]; j<ai[i+1]; j++) { 1040 col = baij->garray[aj[j]]*bs; 1041 for (k=0; k<bs; k++) { 1042 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1043 col++; a += bs; 1044 } 1045 } 1046 } 1047 ierr = PetscFree(rvals);CHKERRQ(ierr); 1048 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1049 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1050 /* 1051 Everyone has to call to draw the matrix since the graphics waits are 1052 synchronized across all processors that share the PetscDraw object 1053 */ 1054 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1055 if (!rank) { 1056 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1057 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1058 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1059 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1060 } 1061 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1062 ierr = MatDestroy(A);CHKERRQ(ierr); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1069 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1070 { 1071 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1072 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1073 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1074 PetscErrorCode ierr; 1075 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1076 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1077 int fd; 1078 PetscScalar *column_values; 1079 FILE *file; 1080 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1081 PetscInt message_count,flowcontrolcount; 1082 1083 PetscFunctionBegin; 1084 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1085 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1086 nz = bs2*(A->nz + B->nz); 1087 rlen = mat->rmap->n; 1088 if (!rank) { 1089 header[0] = MAT_FILE_CLASSID; 1090 header[1] = mat->rmap->N; 1091 header[2] = mat->cmap->N; 1092 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1093 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1094 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1095 /* get largest number of rows any processor has */ 1096 range = mat->rmap->range; 1097 for (i=1; i<size; i++) { 1098 rlen = PetscMax(rlen,range[i+1] - range[i]); 1099 } 1100 } else { 1101 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1102 } 1103 1104 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1105 /* compute lengths of each row */ 1106 for (i=0; i<a->mbs; i++) { 1107 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1108 } 1109 /* store the row lengths to the file */ 1110 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1111 if (!rank) { 1112 MPI_Status status; 1113 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1114 rlen = (range[1] - range[0])/bs; 1115 for (i=0; i<rlen; i++) { 1116 for (j=0; j<bs; j++) { 1117 row_lens[i*bs+j] = bs*crow_lens[i]; 1118 } 1119 } 1120 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1121 for (i=1; i<size; i++) { 1122 rlen = (range[i+1] - range[i])/bs; 1123 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1124 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1125 for (k=0; k<rlen; k++) { 1126 for (j=0; j<bs; j++) { 1127 row_lens[k*bs+j] = bs*crow_lens[k]; 1128 } 1129 } 1130 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1131 } 1132 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1133 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1134 } else { 1135 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1136 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1137 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1138 } 1139 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1140 1141 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1142 information needed to make it for each row from a block row. This does require more communication but still not more than 1143 the communication needed for the nonzero values */ 1144 nzmax = nz; /* space a largest processor needs */ 1145 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1146 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1147 cnt = 0; 1148 for (i=0; i<a->mbs; i++) { 1149 pcnt = cnt; 1150 for (j=B->i[i]; j<B->i[i+1]; j++) { 1151 if ( (col = garray[B->j[j]]) > cstart) break; 1152 for (l=0; l<bs; l++) { 1153 column_indices[cnt++] = bs*col+l; 1154 } 1155 } 1156 for (k=A->i[i]; k<A->i[i+1]; k++) { 1157 for (l=0; l<bs; l++) { 1158 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1159 } 1160 } 1161 for (; j<B->i[i+1]; j++) { 1162 for (l=0; l<bs; l++) { 1163 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1164 } 1165 } 1166 len = cnt - pcnt; 1167 for (k=1; k<bs; k++) { 1168 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1169 cnt += len; 1170 } 1171 } 1172 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1173 1174 /* store the columns to the file */ 1175 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1176 if (!rank) { 1177 MPI_Status status; 1178 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1179 for (i=1; i<size; i++) { 1180 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1181 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1182 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1183 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1184 } 1185 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1186 } else { 1187 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1188 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1189 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1190 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1191 } 1192 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1193 1194 /* load up the numerical values */ 1195 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1196 cnt = 0; 1197 for (i=0; i<a->mbs; i++) { 1198 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1199 for (j=B->i[i]; j<B->i[i+1]; j++) { 1200 if ( garray[B->j[j]] > cstart) break; 1201 for (l=0; l<bs; l++) { 1202 for (ll=0; ll<bs; ll++) { 1203 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1204 } 1205 } 1206 cnt += bs; 1207 } 1208 for (k=A->i[i]; k<A->i[i+1]; k++) { 1209 for (l=0; l<bs; l++) { 1210 for (ll=0; ll<bs; ll++) { 1211 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1212 } 1213 } 1214 cnt += bs; 1215 } 1216 for (; j<B->i[i+1]; j++) { 1217 for (l=0; l<bs; l++) { 1218 for (ll=0; ll<bs; ll++) { 1219 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1220 } 1221 } 1222 cnt += bs; 1223 } 1224 cnt += (bs-1)*rlen; 1225 } 1226 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1227 1228 /* store the column values to the file */ 1229 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1230 if (!rank) { 1231 MPI_Status status; 1232 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1233 for (i=1; i<size; i++) { 1234 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1235 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1236 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1237 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1238 } 1239 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1240 } else { 1241 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1242 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1243 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1244 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1245 } 1246 ierr = PetscFree(column_values);CHKERRQ(ierr); 1247 1248 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1249 if (file) { 1250 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1251 } 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatView_MPIBAIJ" 1257 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1258 { 1259 PetscErrorCode ierr; 1260 PetscBool iascii,isdraw,issocket,isbinary; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1264 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1267 if (iascii || isdraw || issocket) { 1268 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1269 } else if (isbinary) { 1270 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1271 } else { 1272 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1279 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1280 { 1281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 #if defined(PETSC_USE_LOG) 1286 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1287 #endif 1288 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1289 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1290 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1291 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1292 #if defined (PETSC_USE_CTABLE) 1293 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1294 #else 1295 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1296 #endif 1297 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1298 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1299 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1300 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1301 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1302 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1304 ierr = PetscFree(baij);CHKERRQ(ierr); 1305 1306 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatMult_MPIBAIJ" 1319 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1320 { 1321 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 PetscInt nt; 1324 1325 PetscFunctionBegin; 1326 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1327 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1328 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1329 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1330 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1331 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1332 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1339 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1340 { 1341 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1347 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1354 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1355 { 1356 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 PetscBool merged; 1359 1360 PetscFunctionBegin; 1361 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1362 /* do nondiagonal part */ 1363 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1364 if (!merged) { 1365 /* send it on its way */ 1366 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* receive remote parts: note this assumes the values are not actually */ 1370 /* inserted in yy until the next line */ 1371 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1372 } else { 1373 /* do local part */ 1374 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1375 /* send it on its way */ 1376 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377 /* values actually were received in the Begin() but we need to call this nop */ 1378 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1385 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1386 { 1387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 /* do nondiagonal part */ 1392 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1393 /* send it on its way */ 1394 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1395 /* do local part */ 1396 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1397 /* receive remote parts: note this assumes the values are not actually */ 1398 /* inserted in yy until the next line, which is true for my implementation*/ 1399 /* but is not perhaps always true. */ 1400 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 /* 1405 This only works correctly for square matrices where the subblock A->A is the 1406 diagonal block 1407 */ 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1410 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1417 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatScale_MPIBAIJ" 1423 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1424 { 1425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1430 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1436 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1437 { 1438 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1439 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1440 PetscErrorCode ierr; 1441 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1442 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1443 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1444 1445 PetscFunctionBegin; 1446 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1447 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1448 mat->getrowactive = PETSC_TRUE; 1449 1450 if (!mat->rowvalues && (idx || v)) { 1451 /* 1452 allocate enough space to hold information from the longest row. 1453 */ 1454 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1455 PetscInt max = 1,mbs = mat->mbs,tmp; 1456 for (i=0; i<mbs; i++) { 1457 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1458 if (max < tmp) { max = tmp; } 1459 } 1460 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1461 } 1462 lrow = row - brstart; 1463 1464 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1465 if (!v) {pvA = 0; pvB = 0;} 1466 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1467 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1468 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1469 nztot = nzA + nzB; 1470 1471 cmap = mat->garray; 1472 if (v || idx) { 1473 if (nztot) { 1474 /* Sort by increasing column numbers, assuming A and B already sorted */ 1475 PetscInt imark = -1; 1476 if (v) { 1477 *v = v_p = mat->rowvalues; 1478 for (i=0; i<nzB; i++) { 1479 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1480 else break; 1481 } 1482 imark = i; 1483 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1484 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1485 } 1486 if (idx) { 1487 *idx = idx_p = mat->rowindices; 1488 if (imark > -1) { 1489 for (i=0; i<imark; i++) { 1490 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1491 } 1492 } else { 1493 for (i=0; i<nzB; i++) { 1494 if (cmap[cworkB[i]/bs] < cstart) 1495 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1496 else break; 1497 } 1498 imark = i; 1499 } 1500 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1501 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1502 } 1503 } else { 1504 if (idx) *idx = 0; 1505 if (v) *v = 0; 1506 } 1507 } 1508 *nz = nztot; 1509 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1510 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1516 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1517 { 1518 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1519 1520 PetscFunctionBegin; 1521 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1522 baij->getrowactive = PETSC_FALSE; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1528 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1529 { 1530 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1535 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1541 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1542 { 1543 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1544 Mat A = a->A,B = a->B; 1545 PetscErrorCode ierr; 1546 PetscReal isend[5],irecv[5]; 1547 1548 PetscFunctionBegin; 1549 info->block_size = (PetscReal)matin->rmap->bs; 1550 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1551 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1552 isend[3] = info->memory; isend[4] = info->mallocs; 1553 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1554 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1555 isend[3] += info->memory; isend[4] += info->mallocs; 1556 if (flag == MAT_LOCAL) { 1557 info->nz_used = isend[0]; 1558 info->nz_allocated = isend[1]; 1559 info->nz_unneeded = isend[2]; 1560 info->memory = isend[3]; 1561 info->mallocs = isend[4]; 1562 } else if (flag == MAT_GLOBAL_MAX) { 1563 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1564 info->nz_used = irecv[0]; 1565 info->nz_allocated = irecv[1]; 1566 info->nz_unneeded = irecv[2]; 1567 info->memory = irecv[3]; 1568 info->mallocs = irecv[4]; 1569 } else if (flag == MAT_GLOBAL_SUM) { 1570 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else { 1577 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1578 } 1579 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1580 info->fill_ratio_needed = 0; 1581 info->factor_mallocs = 0; 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1587 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1588 { 1589 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 switch (op) { 1594 case MAT_NEW_NONZERO_LOCATIONS: 1595 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1596 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1597 case MAT_KEEP_NONZERO_PATTERN: 1598 case MAT_NEW_NONZERO_LOCATION_ERR: 1599 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1600 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1601 break; 1602 case MAT_ROW_ORIENTED: 1603 a->roworiented = flg; 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_NEW_DIAGONALS: 1608 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1609 break; 1610 case MAT_IGNORE_OFF_PROC_ENTRIES: 1611 a->donotstash = flg; 1612 break; 1613 case MAT_USE_HASH_TABLE: 1614 a->ht_flag = flg; 1615 break; 1616 case MAT_SYMMETRIC: 1617 case MAT_STRUCTURALLY_SYMMETRIC: 1618 case MAT_HERMITIAN: 1619 case MAT_SYMMETRY_ETERNAL: 1620 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1621 break; 1622 default: 1623 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1630 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1633 Mat_SeqBAIJ *Aloc; 1634 Mat B; 1635 PetscErrorCode ierr; 1636 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1637 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1638 MatScalar *a; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1643 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1644 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1645 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1646 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1647 } else { 1648 B = *matout; 1649 } 1650 1651 /* copy over the A part */ 1652 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1653 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1654 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1655 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = (baij->cstartbs+aj[j])*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 col++; a += bs; 1664 } 1665 } 1666 } 1667 /* copy over the B part */ 1668 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1669 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1670 for (i=0; i<mbs; i++) { 1671 rvals[0] = bs*(baij->rstartbs + i); 1672 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1673 for (j=ai[i]; j<ai[i+1]; j++) { 1674 col = baij->garray[aj[j]]*bs; 1675 for (k=0; k<bs; k++) { 1676 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1677 col++; a += bs; 1678 } 1679 } 1680 } 1681 ierr = PetscFree(rvals);CHKERRQ(ierr); 1682 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 1685 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1686 *matout = B; 1687 } else { 1688 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1689 } 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1695 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1696 { 1697 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1698 Mat a = baij->A,b = baij->B; 1699 PetscErrorCode ierr; 1700 PetscInt s1,s2,s3; 1701 1702 PetscFunctionBegin; 1703 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1704 if (rr) { 1705 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1706 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1707 /* Overlap communication with computation. */ 1708 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1709 } 1710 if (ll) { 1711 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1712 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1713 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1714 } 1715 /* scale the diagonal block */ 1716 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1717 1718 if (rr) { 1719 /* Do a scatter end and then right scale the off-diagonal block */ 1720 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1721 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1722 } 1723 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1730 { 1731 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1732 PetscErrorCode ierr; 1733 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1734 PetscInt i,*owners = A->rmap->range; 1735 PetscInt *nprocs,j,idx,nsends,row; 1736 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1737 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1738 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1739 MPI_Comm comm = ((PetscObject)A)->comm; 1740 MPI_Request *send_waits,*recv_waits; 1741 MPI_Status recv_status,*send_status; 1742 const PetscScalar *xx; 1743 PetscScalar *bb; 1744 #if defined(PETSC_DEBUG) 1745 PetscBool found = PETSC_FALSE; 1746 #endif 1747 1748 PetscFunctionBegin; 1749 /* first count number of contributors to each processor */ 1750 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1751 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1753 j = 0; 1754 for (i=0; i<N; i++) { 1755 if (lastidx > (idx = rows[i])) j = 0; 1756 lastidx = idx; 1757 for (; j<size; j++) { 1758 if (idx >= owners[j] && idx < owners[j+1]) { 1759 nprocs[2*j]++; 1760 nprocs[2*j+1] = 1; 1761 owner[i] = j; 1762 #if defined(PETSC_DEBUG) 1763 found = PETSC_TRUE; 1764 #endif 1765 break; 1766 } 1767 } 1768 #if defined(PETSC_DEBUG) 1769 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1770 found = PETSC_FALSE; 1771 #endif 1772 } 1773 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1774 1775 if (A->nooffproczerorows) { 1776 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1777 nrecvs = nsends; 1778 nmax = N; 1779 } else { 1780 /* inform other processors of number of messages and max length*/ 1781 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1782 } 1783 1784 /* post receives: */ 1785 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1786 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1787 for (i=0; i<nrecvs; i++) { 1788 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1789 } 1790 1791 /* do sends: 1792 1) starts[i] gives the starting index in svalues for stuff going to 1793 the ith processor 1794 */ 1795 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1796 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1797 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1798 starts[0] = 0; 1799 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1800 for (i=0; i<N; i++) { 1801 svalues[starts[owner[i]]++] = rows[i]; 1802 } 1803 1804 starts[0] = 0; 1805 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1806 count = 0; 1807 for (i=0; i<size; i++) { 1808 if (nprocs[2*i+1]) { 1809 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1810 } 1811 } 1812 ierr = PetscFree(starts);CHKERRQ(ierr); 1813 1814 base = owners[rank]; 1815 1816 /* wait on receives */ 1817 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1818 count = nrecvs; 1819 slen = 0; 1820 while (count) { 1821 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1822 /* unpack receives into our local space */ 1823 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1824 source[imdex] = recv_status.MPI_SOURCE; 1825 lens[imdex] = n; 1826 slen += n; 1827 count--; 1828 } 1829 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1830 1831 /* move the data into the send scatter */ 1832 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1833 count = 0; 1834 for (i=0; i<nrecvs; i++) { 1835 values = rvalues + i*nmax; 1836 for (j=0; j<lens[i]; j++) { 1837 lrows[count++] = values[j] - base; 1838 } 1839 } 1840 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1841 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1842 ierr = PetscFree(owner);CHKERRQ(ierr); 1843 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1844 1845 /* fix right hand side if needed */ 1846 if (x && b) { 1847 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1848 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1849 for (i=0; i<slen; i++) { 1850 bb[lrows[i]] = diag*xx[lrows[i]]; 1851 } 1852 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1853 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1854 } 1855 1856 /* actually zap the local rows */ 1857 /* 1858 Zero the required rows. If the "diagonal block" of the matrix 1859 is square and the user wishes to set the diagonal we use separate 1860 code so that MatSetValues() is not called for each diagonal allocating 1861 new memory, thus calling lots of mallocs and slowing things down. 1862 1863 */ 1864 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1865 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1866 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1867 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1868 } else if (diag != 0.0) { 1869 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1872 for (i=0; i<slen; i++) { 1873 row = lrows[i] + rstart_bs; 1874 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1875 } 1876 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1877 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1878 } else { 1879 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1880 } 1881 1882 ierr = PetscFree(lrows);CHKERRQ(ierr); 1883 1884 /* wait on sends */ 1885 if (nsends) { 1886 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1887 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1888 ierr = PetscFree(send_status);CHKERRQ(ierr); 1889 } 1890 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1891 ierr = PetscFree(svalues);CHKERRQ(ierr); 1892 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatEqual_MPIBAIJ" 1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1913 { 1914 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1915 Mat a,b,c,d; 1916 PetscBool flg; 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 a = matA->A; b = matA->B; 1921 c = matB->A; d = matB->B; 1922 1923 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1924 if (flg) { 1925 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1926 } 1927 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatCopy_MPIBAIJ" 1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1934 { 1935 PetscErrorCode ierr; 1936 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1937 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1938 1939 PetscFunctionBegin; 1940 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1941 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1942 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1943 } else { 1944 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1945 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1952 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1964 { 1965 PetscErrorCode ierr; 1966 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1967 PetscBLASInt bnz,one=1; 1968 Mat_SeqBAIJ *x,*y; 1969 1970 PetscFunctionBegin; 1971 if (str == SAME_NONZERO_PATTERN) { 1972 PetscScalar alpha = a; 1973 x = (Mat_SeqBAIJ *)xx->A->data; 1974 y = (Mat_SeqBAIJ *)yy->A->data; 1975 bnz = PetscBLASIntCast(x->nz); 1976 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1977 x = (Mat_SeqBAIJ *)xx->B->data; 1978 y = (Mat_SeqBAIJ *)yy->B->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 } else { 1982 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1989 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1990 { 1991 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1992 PetscInt rbs,cbs; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1997 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1998 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1999 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2000 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2001 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2007 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2008 { 2009 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2014 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2020 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2021 { 2022 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2023 PetscErrorCode ierr; 2024 2025 PetscFunctionBegin; 2026 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2027 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2033 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2034 { 2035 PetscErrorCode ierr; 2036 IS iscol_local; 2037 PetscInt csize; 2038 2039 PetscFunctionBegin; 2040 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2041 if (call == MAT_REUSE_MATRIX) { 2042 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2043 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2044 } else { 2045 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2046 } 2047 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2048 if (call == MAT_INITIAL_MATRIX) { 2049 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2050 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2051 } 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2057 /* 2058 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2059 in local and then by concatenating the local matrices the end result. 2060 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2061 */ 2062 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2063 { 2064 PetscErrorCode ierr; 2065 PetscMPIInt rank,size; 2066 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2067 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2068 Mat *local,M,Mreuse; 2069 MatScalar *vwork,*aa; 2070 MPI_Comm comm = ((PetscObject)mat)->comm; 2071 Mat_SeqBAIJ *aij; 2072 2073 2074 PetscFunctionBegin; 2075 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2076 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2077 2078 if (call == MAT_REUSE_MATRIX) { 2079 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2080 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2081 local = &Mreuse; 2082 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2083 } else { 2084 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2085 Mreuse = *local; 2086 ierr = PetscFree(local);CHKERRQ(ierr); 2087 } 2088 2089 /* 2090 m - number of local rows 2091 n - number of columns (same on all processors) 2092 rstart - first row in new global matrix generated 2093 */ 2094 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2095 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2096 m = m/bs; 2097 n = n/bs; 2098 2099 if (call == MAT_INITIAL_MATRIX) { 2100 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2101 ii = aij->i; 2102 jj = aij->j; 2103 2104 /* 2105 Determine the number of non-zeros in the diagonal and off-diagonal 2106 portions of the matrix in order to do correct preallocation 2107 */ 2108 2109 /* first get start and end of "diagonal" columns */ 2110 if (csize == PETSC_DECIDE) { 2111 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2112 if (mglobal == n*bs) { /* square matrix */ 2113 nlocal = m; 2114 } else { 2115 nlocal = n/size + ((n % size) > rank); 2116 } 2117 } else { 2118 nlocal = csize/bs; 2119 } 2120 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2121 rstart = rend - nlocal; 2122 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2123 2124 /* next, compute all the lengths */ 2125 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2126 olens = dlens + m; 2127 for (i=0; i<m; i++) { 2128 jend = ii[i+1] - ii[i]; 2129 olen = 0; 2130 dlen = 0; 2131 for (j=0; j<jend; j++) { 2132 if (*jj < rstart || *jj >= rend) olen++; 2133 else dlen++; 2134 jj++; 2135 } 2136 olens[i] = olen; 2137 dlens[i] = dlen; 2138 } 2139 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2140 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2141 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2142 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2143 ierr = PetscFree(dlens);CHKERRQ(ierr); 2144 } else { 2145 PetscInt ml,nl; 2146 2147 M = *newmat; 2148 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2149 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2150 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2151 /* 2152 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2153 rather than the slower MatSetValues(). 2154 */ 2155 M->was_assembled = PETSC_TRUE; 2156 M->assembled = PETSC_FALSE; 2157 } 2158 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2159 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2160 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2161 ii = aij->i; 2162 jj = aij->j; 2163 aa = aij->a; 2164 for (i=0; i<m; i++) { 2165 row = rstart/bs + i; 2166 nz = ii[i+1] - ii[i]; 2167 cwork = jj; jj += nz; 2168 vwork = aa; aa += nz; 2169 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2170 } 2171 2172 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2174 *newmat = M; 2175 2176 /* save submatrix used in processor for next request */ 2177 if (call == MAT_INITIAL_MATRIX) { 2178 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2179 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2180 } 2181 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatPermute_MPIBAIJ" 2187 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2188 { 2189 MPI_Comm comm,pcomm; 2190 PetscInt first,local_size,nrows; 2191 const PetscInt *rows; 2192 PetscMPIInt size; 2193 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2198 /* make a collective version of 'rowp' */ 2199 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2200 if (pcomm==comm) { 2201 crowp = rowp; 2202 } else { 2203 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2204 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2205 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2206 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2207 } 2208 /* collect the global row permutation and invert it */ 2209 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2210 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2211 if (pcomm!=comm) { 2212 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2213 } 2214 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2215 /* get the local target indices */ 2216 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2217 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2218 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2219 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2220 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2221 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2222 /* the column permutation is so much easier; 2223 make a local version of 'colp' and invert it */ 2224 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2225 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2226 if (size==1) { 2227 lcolp = colp; 2228 } else { 2229 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2230 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2231 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2232 } 2233 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2234 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2235 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2236 if (size>1) { 2237 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2238 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2239 } 2240 /* now we just get the submatrix */ 2241 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2242 /* clean up */ 2243 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2244 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2250 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2251 { 2252 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2253 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2254 2255 PetscFunctionBegin; 2256 if (nghosts) { *nghosts = B->nbs;} 2257 if (ghosts) {*ghosts = baij->garray;} 2258 PetscFunctionReturn(0); 2259 } 2260 2261 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2265 /* 2266 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2267 */ 2268 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2269 { 2270 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2271 PetscErrorCode ierr; 2272 PetscMPIInt size,*ncolsonproc,*disp,nn; 2273 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2274 const PetscInt *is; 2275 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2276 PetscInt *rowhit,M,cstart,cend,colb; 2277 PetscInt *columnsforrow,l; 2278 IS *isa; 2279 PetscBool done,flg; 2280 ISLocalToGlobalMapping map = mat->cbmapping; 2281 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2282 2283 PetscFunctionBegin; 2284 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2285 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2286 2287 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2288 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2289 M = mat->rmap->n/bs; 2290 cstart = mat->cmap->rstart/bs; 2291 cend = mat->cmap->rend/bs; 2292 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2293 c->N = mat->cmap->N/bs; 2294 c->m = mat->rmap->n/bs; 2295 c->rstart = mat->rmap->rstart/bs; 2296 2297 c->ncolors = nis; 2298 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2299 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2300 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2301 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2302 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2303 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2304 2305 /* Allow access to data structures of local part of matrix */ 2306 if (!baij->colmap) { 2307 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2308 } 2309 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2310 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2311 2312 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2313 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2314 2315 for (i=0; i<nis; i++) { 2316 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2317 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2318 c->ncolumns[i] = n; 2319 if (n) { 2320 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2321 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2322 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2323 } else { 2324 c->columns[i] = 0; 2325 } 2326 2327 if (ctype == IS_COLORING_GLOBAL){ 2328 /* Determine the total (parallel) number of columns of this color */ 2329 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2330 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2331 2332 nn = PetscMPIIntCast(n); 2333 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2334 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2335 if (!nctot) { 2336 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2337 } 2338 2339 disp[0] = 0; 2340 for (j=1; j<size; j++) { 2341 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2342 } 2343 2344 /* Get complete list of columns for color on each processor */ 2345 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2346 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2347 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2348 } else if (ctype == IS_COLORING_GHOSTED){ 2349 /* Determine local number of columns of this color on this process, including ghost points */ 2350 nctot = n; 2351 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2352 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2353 } else { 2354 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2355 } 2356 2357 /* 2358 Mark all rows affect by these columns 2359 */ 2360 /* Temporary option to allow for debugging/testing */ 2361 flg = PETSC_FALSE; 2362 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2363 if (!flg) {/*-----------------------------------------------------------------------------*/ 2364 /* crude, fast version */ 2365 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2366 /* loop over columns*/ 2367 for (j=0; j<nctot; j++) { 2368 if (ctype == IS_COLORING_GHOSTED) { 2369 col = ltog[cols[j]]; 2370 } else { 2371 col = cols[j]; 2372 } 2373 if (col >= cstart && col < cend) { 2374 /* column is in diagonal block of matrix */ 2375 rows = A_cj + A_ci[col-cstart]; 2376 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2377 } else { 2378 #if defined (PETSC_USE_CTABLE) 2379 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2380 colb --; 2381 #else 2382 colb = baij->colmap[col] - 1; 2383 #endif 2384 if (colb == -1) { 2385 m = 0; 2386 } else { 2387 colb = colb/bs; 2388 rows = B_cj + B_ci[colb]; 2389 m = B_ci[colb+1] - B_ci[colb]; 2390 } 2391 } 2392 /* loop over columns marking them in rowhit */ 2393 for (k=0; k<m; k++) { 2394 rowhit[*rows++] = col + 1; 2395 } 2396 } 2397 2398 /* count the number of hits */ 2399 nrows = 0; 2400 for (j=0; j<M; j++) { 2401 if (rowhit[j]) nrows++; 2402 } 2403 c->nrows[i] = nrows; 2404 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2405 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2406 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2407 nrows = 0; 2408 for (j=0; j<M; j++) { 2409 if (rowhit[j]) { 2410 c->rows[i][nrows] = j; 2411 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2412 nrows++; 2413 } 2414 } 2415 } else {/*-------------------------------------------------------------------------------*/ 2416 /* slow version, using rowhit as a linked list */ 2417 PetscInt currentcol,fm,mfm; 2418 rowhit[M] = M; 2419 nrows = 0; 2420 /* loop over columns*/ 2421 for (j=0; j<nctot; j++) { 2422 if (ctype == IS_COLORING_GHOSTED) { 2423 col = ltog[cols[j]]; 2424 } else { 2425 col = cols[j]; 2426 } 2427 if (col >= cstart && col < cend) { 2428 /* column is in diagonal block of matrix */ 2429 rows = A_cj + A_ci[col-cstart]; 2430 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2431 } else { 2432 #if defined (PETSC_USE_CTABLE) 2433 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2434 colb --; 2435 #else 2436 colb = baij->colmap[col] - 1; 2437 #endif 2438 if (colb == -1) { 2439 m = 0; 2440 } else { 2441 colb = colb/bs; 2442 rows = B_cj + B_ci[colb]; 2443 m = B_ci[colb+1] - B_ci[colb]; 2444 } 2445 } 2446 2447 /* loop over columns marking them in rowhit */ 2448 fm = M; /* fm points to first entry in linked list */ 2449 for (k=0; k<m; k++) { 2450 currentcol = *rows++; 2451 /* is it already in the list? */ 2452 do { 2453 mfm = fm; 2454 fm = rowhit[fm]; 2455 } while (fm < currentcol); 2456 /* not in list so add it */ 2457 if (fm != currentcol) { 2458 nrows++; 2459 columnsforrow[currentcol] = col; 2460 /* next three lines insert new entry into linked list */ 2461 rowhit[mfm] = currentcol; 2462 rowhit[currentcol] = fm; 2463 fm = currentcol; 2464 /* fm points to present position in list since we know the columns are sorted */ 2465 } else { 2466 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2467 } 2468 } 2469 } 2470 c->nrows[i] = nrows; 2471 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2472 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2473 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2474 /* now store the linked list of rows into c->rows[i] */ 2475 nrows = 0; 2476 fm = rowhit[M]; 2477 do { 2478 c->rows[i][nrows] = fm; 2479 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2480 fm = rowhit[fm]; 2481 } while (fm < M); 2482 } /* ---------------------------------------------------------------------------------------*/ 2483 ierr = PetscFree(cols);CHKERRQ(ierr); 2484 } 2485 2486 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2487 /* 2488 vscale will contain the "diagonal" on processor scalings followed by the off processor 2489 */ 2490 if (ctype == IS_COLORING_GLOBAL) { 2491 PetscInt *garray; 2492 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2493 for (i=0; i<baij->B->cmap->n/bs; i++) { 2494 for (j=0; j<bs; j++) { 2495 garray[i*bs+j] = bs*baij->garray[i]+j; 2496 } 2497 } 2498 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2499 ierr = PetscFree(garray);CHKERRQ(ierr); 2500 CHKMEMQ; 2501 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2502 for (k=0; k<c->ncolors; k++) { 2503 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2504 for (l=0; l<c->nrows[k]; l++) { 2505 col = c->columnsforrow[k][l]; 2506 if (col >= cstart && col < cend) { 2507 /* column is in diagonal block of matrix */ 2508 colb = col - cstart; 2509 } else { 2510 /* column is in "off-processor" part */ 2511 #if defined (PETSC_USE_CTABLE) 2512 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2513 colb --; 2514 #else 2515 colb = baij->colmap[col] - 1; 2516 #endif 2517 colb = colb/bs; 2518 colb += cend - cstart; 2519 } 2520 c->vscaleforrow[k][l] = colb; 2521 } 2522 } 2523 } else if (ctype == IS_COLORING_GHOSTED) { 2524 /* Get gtol mapping */ 2525 PetscInt N = mat->cmap->N, *gtol; 2526 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2527 for (i=0; i<N; i++) gtol[i] = -1; 2528 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2529 2530 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2531 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2532 for (k=0; k<c->ncolors; k++) { 2533 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2534 for (l=0; l<c->nrows[k]; l++) { 2535 col = c->columnsforrow[k][l]; /* global column index */ 2536 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2537 } 2538 } 2539 ierr = PetscFree(gtol);CHKERRQ(ierr); 2540 } 2541 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2542 2543 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2544 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2545 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2546 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2547 CHKMEMQ; 2548 PetscFunctionReturn(0); 2549 } 2550 2551 #undef __FUNCT__ 2552 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2553 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2554 { 2555 Mat B; 2556 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2557 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2558 Mat_SeqAIJ *b; 2559 PetscErrorCode ierr; 2560 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2561 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2562 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2563 2564 PetscFunctionBegin; 2565 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2566 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2567 2568 /* ---------------------------------------------------------------- 2569 Tell every processor the number of nonzeros per row 2570 */ 2571 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2572 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2573 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2574 } 2575 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2576 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2577 displs = recvcounts + size; 2578 for (i=0; i<size; i++) { 2579 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2580 displs[i] = A->rmap->range[i]/bs; 2581 } 2582 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2583 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2584 #else 2585 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2586 #endif 2587 /* --------------------------------------------------------------- 2588 Create the sequential matrix of the same type as the local block diagonal 2589 */ 2590 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2591 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2592 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2593 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2594 b = (Mat_SeqAIJ *)B->data; 2595 2596 /*-------------------------------------------------------------------- 2597 Copy my part of matrix column indices over 2598 */ 2599 sendcount = ad->nz + bd->nz; 2600 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2601 a_jsendbuf = ad->j; 2602 b_jsendbuf = bd->j; 2603 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2604 cnt = 0; 2605 for (i=0; i<n; i++) { 2606 2607 /* put in lower diagonal portion */ 2608 m = bd->i[i+1] - bd->i[i]; 2609 while (m > 0) { 2610 /* is it above diagonal (in bd (compressed) numbering) */ 2611 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2612 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2613 m--; 2614 } 2615 2616 /* put in diagonal portion */ 2617 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2618 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2619 } 2620 2621 /* put in upper diagonal portion */ 2622 while (m-- > 0) { 2623 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2624 } 2625 } 2626 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2627 2628 /*-------------------------------------------------------------------- 2629 Gather all column indices to all processors 2630 */ 2631 for (i=0; i<size; i++) { 2632 recvcounts[i] = 0; 2633 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2634 recvcounts[i] += lens[j]; 2635 } 2636 } 2637 displs[0] = 0; 2638 for (i=1; i<size; i++) { 2639 displs[i] = displs[i-1] + recvcounts[i-1]; 2640 } 2641 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2642 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2643 #else 2644 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2645 #endif 2646 /*-------------------------------------------------------------------- 2647 Assemble the matrix into useable form (note numerical values not yet set) 2648 */ 2649 /* set the b->ilen (length of each row) values */ 2650 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2651 /* set the b->i indices */ 2652 b->i[0] = 0; 2653 for (i=1; i<=A->rmap->N/bs; i++) { 2654 b->i[i] = b->i[i-1] + lens[i-1]; 2655 } 2656 ierr = PetscFree(lens);CHKERRQ(ierr); 2657 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2659 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2660 2661 if (A->symmetric){ 2662 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2663 } else if (A->hermitian) { 2664 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2665 } else if (A->structurally_symmetric) { 2666 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2667 } 2668 *newmat = B; 2669 PetscFunctionReturn(0); 2670 } 2671 2672 #undef __FUNCT__ 2673 #define __FUNCT__ "MatSOR_MPIBAIJ" 2674 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2675 { 2676 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2677 PetscErrorCode ierr; 2678 Vec bb1 = 0; 2679 2680 PetscFunctionBegin; 2681 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2682 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2683 } 2684 2685 if (flag == SOR_APPLY_UPPER) { 2686 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2691 if (flag & SOR_ZERO_INITIAL_GUESS) { 2692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2693 its--; 2694 } 2695 2696 while (its--) { 2697 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2698 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2699 2700 /* update rhs: bb1 = bb - B*x */ 2701 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2702 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2703 2704 /* local sweep */ 2705 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2706 } 2707 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2708 if (flag & SOR_ZERO_INITIAL_GUESS) { 2709 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2710 its--; 2711 } 2712 while (its--) { 2713 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2714 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2715 2716 /* update rhs: bb1 = bb - B*x */ 2717 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2718 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2719 2720 /* local sweep */ 2721 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2722 } 2723 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2724 if (flag & SOR_ZERO_INITIAL_GUESS) { 2725 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2726 its--; 2727 } 2728 while (its--) { 2729 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2730 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2731 2732 /* update rhs: bb1 = bb - B*x */ 2733 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2734 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2735 2736 /* local sweep */ 2737 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2738 } 2739 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2740 2741 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2742 PetscFunctionReturn(0); 2743 } 2744 2745 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2746 2747 2748 /* -------------------------------------------------------------------*/ 2749 static struct _MatOps MatOps_Values = { 2750 MatSetValues_MPIBAIJ, 2751 MatGetRow_MPIBAIJ, 2752 MatRestoreRow_MPIBAIJ, 2753 MatMult_MPIBAIJ, 2754 /* 4*/ MatMultAdd_MPIBAIJ, 2755 MatMultTranspose_MPIBAIJ, 2756 MatMultTransposeAdd_MPIBAIJ, 2757 0, 2758 0, 2759 0, 2760 /*10*/ 0, 2761 0, 2762 0, 2763 MatSOR_MPIBAIJ, 2764 MatTranspose_MPIBAIJ, 2765 /*15*/ MatGetInfo_MPIBAIJ, 2766 MatEqual_MPIBAIJ, 2767 MatGetDiagonal_MPIBAIJ, 2768 MatDiagonalScale_MPIBAIJ, 2769 MatNorm_MPIBAIJ, 2770 /*20*/ MatAssemblyBegin_MPIBAIJ, 2771 MatAssemblyEnd_MPIBAIJ, 2772 MatSetOption_MPIBAIJ, 2773 MatZeroEntries_MPIBAIJ, 2774 /*24*/ MatZeroRows_MPIBAIJ, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2780 0, 2781 0, 2782 0, 2783 0, 2784 /*34*/ MatDuplicate_MPIBAIJ, 2785 0, 2786 0, 2787 0, 2788 0, 2789 /*39*/ MatAXPY_MPIBAIJ, 2790 MatGetSubMatrices_MPIBAIJ, 2791 MatIncreaseOverlap_MPIBAIJ, 2792 MatGetValues_MPIBAIJ, 2793 MatCopy_MPIBAIJ, 2794 /*44*/ 0, 2795 MatScale_MPIBAIJ, 2796 0, 2797 0, 2798 0, 2799 /*49*/ MatSetBlockSize_MPIBAIJ, 2800 0, 2801 0, 2802 0, 2803 0, 2804 /*54*/ MatFDColoringCreate_MPIBAIJ, 2805 0, 2806 MatSetUnfactored_MPIBAIJ, 2807 MatPermute_MPIBAIJ, 2808 MatSetValuesBlocked_MPIBAIJ, 2809 /*59*/ MatGetSubMatrix_MPIBAIJ, 2810 MatDestroy_MPIBAIJ, 2811 MatView_MPIBAIJ, 2812 0, 2813 0, 2814 /*64*/ 0, 2815 0, 2816 0, 2817 0, 2818 0, 2819 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2820 0, 2821 0, 2822 0, 2823 0, 2824 /*74*/ 0, 2825 MatFDColoringApply_BAIJ, 2826 0, 2827 0, 2828 0, 2829 /*79*/ 0, 2830 0, 2831 0, 2832 0, 2833 MatLoad_MPIBAIJ, 2834 /*84*/ 0, 2835 0, 2836 0, 2837 0, 2838 0, 2839 /*89*/ 0, 2840 0, 2841 0, 2842 0, 2843 0, 2844 /*94*/ 0, 2845 0, 2846 0, 2847 0, 2848 0, 2849 /*99*/ 0, 2850 0, 2851 0, 2852 0, 2853 0, 2854 /*104*/0, 2855 MatRealPart_MPIBAIJ, 2856 MatImaginaryPart_MPIBAIJ, 2857 0, 2858 0, 2859 /*109*/0, 2860 0, 2861 0, 2862 0, 2863 0, 2864 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2865 0, 2866 MatGetGhosts_MPIBAIJ, 2867 0, 2868 0, 2869 /*119*/0, 2870 0, 2871 0, 2872 0 2873 }; 2874 2875 EXTERN_C_BEGIN 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2878 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2879 { 2880 PetscFunctionBegin; 2881 *a = ((Mat_MPIBAIJ *)A->data)->A; 2882 *iscopy = PETSC_FALSE; 2883 PetscFunctionReturn(0); 2884 } 2885 EXTERN_C_END 2886 2887 EXTERN_C_BEGIN 2888 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2889 EXTERN_C_END 2890 2891 EXTERN_C_BEGIN 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2894 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2895 { 2896 PetscInt m,rstart,cstart,cend; 2897 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2898 const PetscInt *JJ=0; 2899 PetscScalar *values=0; 2900 PetscErrorCode ierr; 2901 2902 PetscFunctionBegin; 2903 2904 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2905 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2906 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2909 m = B->rmap->n/bs; 2910 rstart = B->rmap->rstart/bs; 2911 cstart = B->cmap->rstart/bs; 2912 cend = B->cmap->rend/bs; 2913 2914 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2915 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2916 for (i=0; i<m; i++) { 2917 nz = ii[i+1] - ii[i]; 2918 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2919 nz_max = PetscMax(nz_max,nz); 2920 JJ = jj + ii[i]; 2921 for (j=0; j<nz; j++) { 2922 if (*JJ >= cstart) break; 2923 JJ++; 2924 } 2925 d = 0; 2926 for (; j<nz; j++) { 2927 if (*JJ++ >= cend) break; 2928 d++; 2929 } 2930 d_nnz[i] = d; 2931 o_nnz[i] = nz - d; 2932 } 2933 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2934 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2935 2936 values = (PetscScalar*)V; 2937 if (!values) { 2938 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2939 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2940 } 2941 for (i=0; i<m; i++) { 2942 PetscInt row = i + rstart; 2943 PetscInt ncols = ii[i+1] - ii[i]; 2944 const PetscInt *icols = jj + ii[i]; 2945 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2946 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2947 } 2948 2949 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2950 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2951 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2952 2953 PetscFunctionReturn(0); 2954 } 2955 EXTERN_C_END 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2959 /*@C 2960 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2961 (the default parallel PETSc format). 2962 2963 Collective on MPI_Comm 2964 2965 Input Parameters: 2966 + A - the matrix 2967 . bs - the block size 2968 . i - the indices into j for the start of each local row (starts with zero) 2969 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2970 - v - optional values in the matrix 2971 2972 Level: developer 2973 2974 .keywords: matrix, aij, compressed row, sparse, parallel 2975 2976 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2977 @*/ 2978 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2979 { 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 EXTERN_C_BEGIN 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2990 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2991 { 2992 Mat_MPIBAIJ *b; 2993 PetscErrorCode ierr; 2994 PetscInt i, newbs = PetscAbs(bs); 2995 2996 PetscFunctionBegin; 2997 if (bs < 0) { 2998 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2999 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3000 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3001 bs = PetscAbs(bs); 3002 } 3003 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3004 bs = newbs; 3005 3006 3007 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3008 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3009 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3010 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3011 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3012 3013 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3014 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3015 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3016 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3017 3018 if (d_nnz) { 3019 for (i=0; i<B->rmap->n/bs; i++) { 3020 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3021 } 3022 } 3023 if (o_nnz) { 3024 for (i=0; i<B->rmap->n/bs; i++) { 3025 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3026 } 3027 } 3028 3029 b = (Mat_MPIBAIJ*)B->data; 3030 b->bs2 = bs*bs; 3031 b->mbs = B->rmap->n/bs; 3032 b->nbs = B->cmap->n/bs; 3033 b->Mbs = B->rmap->N/bs; 3034 b->Nbs = B->cmap->N/bs; 3035 3036 for (i=0; i<=b->size; i++) { 3037 b->rangebs[i] = B->rmap->range[i]/bs; 3038 } 3039 b->rstartbs = B->rmap->rstart/bs; 3040 b->rendbs = B->rmap->rend/bs; 3041 b->cstartbs = B->cmap->rstart/bs; 3042 b->cendbs = B->cmap->rend/bs; 3043 3044 if (!B->preallocated) { 3045 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3046 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3047 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3048 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3049 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3050 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3051 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3052 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3053 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3054 } 3055 3056 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3057 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3058 B->preallocated = PETSC_TRUE; 3059 PetscFunctionReturn(0); 3060 } 3061 EXTERN_C_END 3062 3063 EXTERN_C_BEGIN 3064 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3065 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3066 EXTERN_C_END 3067 3068 3069 EXTERN_C_BEGIN 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3072 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3073 { 3074 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3075 PetscErrorCode ierr; 3076 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3077 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3078 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3079 3080 PetscFunctionBegin; 3081 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3082 ii[0] = 0; 3083 CHKMEMQ; 3084 for (i=0; i<M; i++) { 3085 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3086 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3087 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3088 /* remove one from count of matrix has diagonal */ 3089 for (j=id[i]; j<id[i+1]; j++) { 3090 if (jd[j] == i) {ii[i+1]--;break;} 3091 } 3092 CHKMEMQ; 3093 } 3094 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3095 cnt = 0; 3096 for (i=0; i<M; i++) { 3097 for (j=io[i]; j<io[i+1]; j++) { 3098 if (garray[jo[j]] > rstart) break; 3099 jj[cnt++] = garray[jo[j]]; 3100 CHKMEMQ; 3101 } 3102 for (k=id[i]; k<id[i+1]; k++) { 3103 if (jd[k] != i) { 3104 jj[cnt++] = rstart + jd[k]; 3105 CHKMEMQ; 3106 } 3107 } 3108 for (;j<io[i+1]; j++) { 3109 jj[cnt++] = garray[jo[j]]; 3110 CHKMEMQ; 3111 } 3112 } 3113 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3114 PetscFunctionReturn(0); 3115 } 3116 EXTERN_C_END 3117 3118 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3119 EXTERN_C_BEGIN 3120 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3121 EXTERN_C_END 3122 3123 EXTERN_C_BEGIN 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3126 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3127 { 3128 PetscErrorCode ierr; 3129 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3130 Mat B; 3131 Mat_MPIAIJ *b; 3132 3133 PetscFunctionBegin; 3134 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3135 3136 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3137 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3138 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3139 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3140 b = (Mat_MPIAIJ*) B->data; 3141 3142 ierr = MatDestroy(b->A);CHKERRQ(ierr); 3143 ierr = MatDestroy(b->B);CHKERRQ(ierr); 3144 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3145 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3146 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3147 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3148 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3149 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 if (reuse == MAT_REUSE_MATRIX) { 3152 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3153 } else { 3154 *newmat = B; 3155 } 3156 PetscFunctionReturn(0); 3157 } 3158 EXTERN_C_END 3159 3160 EXTERN_C_BEGIN 3161 #if defined(PETSC_HAVE_MUMPS) 3162 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3163 #endif 3164 EXTERN_C_END 3165 3166 /*MC 3167 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3168 3169 Options Database Keys: 3170 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3171 . -mat_block_size <bs> - set the blocksize used to store the matrix 3172 - -mat_use_hash_table <fact> 3173 3174 Level: beginner 3175 3176 .seealso: MatCreateMPIBAIJ 3177 M*/ 3178 3179 EXTERN_C_BEGIN 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "MatCreate_MPIBAIJ" 3182 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3183 { 3184 Mat_MPIBAIJ *b; 3185 PetscErrorCode ierr; 3186 PetscBool flg; 3187 3188 PetscFunctionBegin; 3189 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3190 B->data = (void*)b; 3191 3192 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3193 B->assembled = PETSC_FALSE; 3194 3195 B->insertmode = NOT_SET_VALUES; 3196 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3197 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3198 3199 /* build local table of row and column ownerships */ 3200 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3201 3202 /* build cache for off array entries formed */ 3203 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3204 b->donotstash = PETSC_FALSE; 3205 b->colmap = PETSC_NULL; 3206 b->garray = PETSC_NULL; 3207 b->roworiented = PETSC_TRUE; 3208 3209 /* stuff used in block assembly */ 3210 b->barray = 0; 3211 3212 /* stuff used for matrix vector multiply */ 3213 b->lvec = 0; 3214 b->Mvctx = 0; 3215 3216 /* stuff for MatGetRow() */ 3217 b->rowindices = 0; 3218 b->rowvalues = 0; 3219 b->getrowactive = PETSC_FALSE; 3220 3221 /* hash table stuff */ 3222 b->ht = 0; 3223 b->hd = 0; 3224 b->ht_size = 0; 3225 b->ht_flag = PETSC_FALSE; 3226 b->ht_fact = 0; 3227 b->ht_total_ct = 0; 3228 b->ht_insert_ct = 0; 3229 3230 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3231 b->ijonly = PETSC_FALSE; 3232 3233 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3234 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3235 if (flg) { 3236 PetscReal fact = 1.39; 3237 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3238 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3239 if (fact <= 1.0) fact = 1.39; 3240 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3241 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3242 } 3243 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3244 3245 #if defined(PETSC_HAVE_MUMPS) 3246 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3247 #endif 3248 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3249 "MatConvert_MPIBAIJ_MPIAdj", 3250 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3251 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3252 "MatConvert_MPIBAIJ_MPIAIJ", 3253 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3255 "MatStoreValues_MPIBAIJ", 3256 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3257 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3258 "MatRetrieveValues_MPIBAIJ", 3259 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3260 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3261 "MatGetDiagonalBlock_MPIBAIJ", 3262 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3264 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3265 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3267 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3268 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3270 "MatDiagonalScaleLocal_MPIBAIJ", 3271 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3273 "MatSetHashTableFactor_MPIBAIJ", 3274 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3275 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 EXTERN_C_END 3279 3280 /*MC 3281 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3282 3283 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3284 and MATMPIBAIJ otherwise. 3285 3286 Options Database Keys: 3287 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3288 3289 Level: beginner 3290 3291 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3292 M*/ 3293 3294 EXTERN_C_BEGIN 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "MatCreate_BAIJ" 3297 PetscErrorCode MatCreate_BAIJ(Mat A) 3298 { 3299 PetscErrorCode ierr; 3300 PetscMPIInt size; 3301 3302 PetscFunctionBegin; 3303 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3304 if (size == 1) { 3305 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3306 } else { 3307 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3308 } 3309 PetscFunctionReturn(0); 3310 } 3311 EXTERN_C_END 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3315 /*@C 3316 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3317 (block compressed row). For good matrix assembly performance 3318 the user should preallocate the matrix storage by setting the parameters 3319 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3320 performance can be increased by more than a factor of 50. 3321 3322 Collective on Mat 3323 3324 Input Parameters: 3325 + A - the matrix 3326 . bs - size of blockk 3327 . d_nz - number of block nonzeros per block row in diagonal portion of local 3328 submatrix (same for all local rows) 3329 . d_nnz - array containing the number of block nonzeros in the various block rows 3330 of the in diagonal portion of the local (possibly different for each block 3331 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3332 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3333 submatrix (same for all local rows). 3334 - o_nnz - array containing the number of nonzeros in the various block rows of the 3335 off-diagonal portion of the local submatrix (possibly different for 3336 each block row) or PETSC_NULL. 3337 3338 If the *_nnz parameter is given then the *_nz parameter is ignored 3339 3340 Options Database Keys: 3341 + -mat_block_size - size of the blocks to use 3342 - -mat_use_hash_table <fact> 3343 3344 Notes: 3345 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3346 than it must be used on all processors that share the object for that argument. 3347 3348 Storage Information: 3349 For a square global matrix we define each processor's diagonal portion 3350 to be its local rows and the corresponding columns (a square submatrix); 3351 each processor's off-diagonal portion encompasses the remainder of the 3352 local matrix (a rectangular submatrix). 3353 3354 The user can specify preallocated storage for the diagonal part of 3355 the local submatrix with either d_nz or d_nnz (not both). Set 3356 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3357 memory allocation. Likewise, specify preallocated storage for the 3358 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3359 3360 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3361 the figure below we depict these three local rows and all columns (0-11). 3362 3363 .vb 3364 0 1 2 3 4 5 6 7 8 9 10 11 3365 ------------------- 3366 row 3 | o o o d d d o o o o o o 3367 row 4 | o o o d d d o o o o o o 3368 row 5 | o o o d d d o o o o o o 3369 ------------------- 3370 .ve 3371 3372 Thus, any entries in the d locations are stored in the d (diagonal) 3373 submatrix, and any entries in the o locations are stored in the 3374 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3375 stored simply in the MATSEQBAIJ format for compressed row storage. 3376 3377 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3378 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3379 In general, for PDE problems in which most nonzeros are near the diagonal, 3380 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3381 or you will get TERRIBLE performance; see the users' manual chapter on 3382 matrices. 3383 3384 You can call MatGetInfo() to get information on how effective the preallocation was; 3385 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3386 You can also run with the option -info and look for messages with the string 3387 malloc in them to see if additional memory allocation was needed. 3388 3389 Level: intermediate 3390 3391 .keywords: matrix, block, aij, compressed row, sparse, parallel 3392 3393 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3394 @*/ 3395 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3396 { 3397 PetscErrorCode ierr; 3398 3399 PetscFunctionBegin; 3400 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } 3403 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatCreateMPIBAIJ" 3406 /*@C 3407 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3408 (block compressed row). For good matrix assembly performance 3409 the user should preallocate the matrix storage by setting the parameters 3410 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3411 performance can be increased by more than a factor of 50. 3412 3413 Collective on MPI_Comm 3414 3415 Input Parameters: 3416 + comm - MPI communicator 3417 . bs - size of blockk 3418 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3419 This value should be the same as the local size used in creating the 3420 y vector for the matrix-vector product y = Ax. 3421 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3422 This value should be the same as the local size used in creating the 3423 x vector for the matrix-vector product y = Ax. 3424 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3425 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3426 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3427 submatrix (same for all local rows) 3428 . d_nnz - array containing the number of nonzero blocks in the various block rows 3429 of the in diagonal portion of the local (possibly different for each block 3430 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3431 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3432 submatrix (same for all local rows). 3433 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3434 off-diagonal portion of the local submatrix (possibly different for 3435 each block row) or PETSC_NULL. 3436 3437 Output Parameter: 3438 . A - the matrix 3439 3440 Options Database Keys: 3441 + -mat_block_size - size of the blocks to use 3442 - -mat_use_hash_table <fact> 3443 3444 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3445 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3446 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3447 3448 Notes: 3449 If the *_nnz parameter is given then the *_nz parameter is ignored 3450 3451 A nonzero block is any block that as 1 or more nonzeros in it 3452 3453 The user MUST specify either the local or global matrix dimensions 3454 (possibly both). 3455 3456 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3457 than it must be used on all processors that share the object for that argument. 3458 3459 Storage Information: 3460 For a square global matrix we define each processor's diagonal portion 3461 to be its local rows and the corresponding columns (a square submatrix); 3462 each processor's off-diagonal portion encompasses the remainder of the 3463 local matrix (a rectangular submatrix). 3464 3465 The user can specify preallocated storage for the diagonal part of 3466 the local submatrix with either d_nz or d_nnz (not both). Set 3467 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3468 memory allocation. Likewise, specify preallocated storage for the 3469 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3470 3471 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3472 the figure below we depict these three local rows and all columns (0-11). 3473 3474 .vb 3475 0 1 2 3 4 5 6 7 8 9 10 11 3476 ------------------- 3477 row 3 | o o o d d d o o o o o o 3478 row 4 | o o o d d d o o o o o o 3479 row 5 | o o o d d d o o o o o o 3480 ------------------- 3481 .ve 3482 3483 Thus, any entries in the d locations are stored in the d (diagonal) 3484 submatrix, and any entries in the o locations are stored in the 3485 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3486 stored simply in the MATSEQBAIJ format for compressed row storage. 3487 3488 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3489 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3490 In general, for PDE problems in which most nonzeros are near the diagonal, 3491 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3492 or you will get TERRIBLE performance; see the users' manual chapter on 3493 matrices. 3494 3495 Level: intermediate 3496 3497 .keywords: matrix, block, aij, compressed row, sparse, parallel 3498 3499 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3500 @*/ 3501 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3502 { 3503 PetscErrorCode ierr; 3504 PetscMPIInt size; 3505 3506 PetscFunctionBegin; 3507 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3508 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3509 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3510 if (size > 1) { 3511 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3512 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3513 } else { 3514 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3515 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3516 } 3517 PetscFunctionReturn(0); 3518 } 3519 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3522 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3523 { 3524 Mat mat; 3525 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3526 PetscErrorCode ierr; 3527 PetscInt len=0; 3528 3529 PetscFunctionBegin; 3530 *newmat = 0; 3531 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3532 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3533 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3534 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3535 3536 mat->factortype = matin->factortype; 3537 mat->preallocated = PETSC_TRUE; 3538 mat->assembled = PETSC_TRUE; 3539 mat->insertmode = NOT_SET_VALUES; 3540 3541 a = (Mat_MPIBAIJ*)mat->data; 3542 mat->rmap->bs = matin->rmap->bs; 3543 a->bs2 = oldmat->bs2; 3544 a->mbs = oldmat->mbs; 3545 a->nbs = oldmat->nbs; 3546 a->Mbs = oldmat->Mbs; 3547 a->Nbs = oldmat->Nbs; 3548 3549 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3550 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3551 3552 a->size = oldmat->size; 3553 a->rank = oldmat->rank; 3554 a->donotstash = oldmat->donotstash; 3555 a->roworiented = oldmat->roworiented; 3556 a->rowindices = 0; 3557 a->rowvalues = 0; 3558 a->getrowactive = PETSC_FALSE; 3559 a->barray = 0; 3560 a->rstartbs = oldmat->rstartbs; 3561 a->rendbs = oldmat->rendbs; 3562 a->cstartbs = oldmat->cstartbs; 3563 a->cendbs = oldmat->cendbs; 3564 3565 /* hash table stuff */ 3566 a->ht = 0; 3567 a->hd = 0; 3568 a->ht_size = 0; 3569 a->ht_flag = oldmat->ht_flag; 3570 a->ht_fact = oldmat->ht_fact; 3571 a->ht_total_ct = 0; 3572 a->ht_insert_ct = 0; 3573 3574 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3575 if (oldmat->colmap) { 3576 #if defined (PETSC_USE_CTABLE) 3577 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3578 #else 3579 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3580 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3581 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3582 #endif 3583 } else a->colmap = 0; 3584 3585 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3586 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3587 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3588 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3589 } else a->garray = 0; 3590 3591 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3592 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3593 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3594 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3595 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3596 3597 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3598 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3599 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3600 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3601 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3602 *newmat = mat; 3603 3604 PetscFunctionReturn(0); 3605 } 3606 3607 #undef __FUNCT__ 3608 #define __FUNCT__ "MatLoad_MPIBAIJ" 3609 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3610 { 3611 PetscErrorCode ierr; 3612 int fd; 3613 PetscInt i,nz,j,rstart,rend; 3614 PetscScalar *vals,*buf; 3615 MPI_Comm comm = ((PetscObject)viewer)->comm; 3616 MPI_Status status; 3617 PetscMPIInt rank,size,maxnz; 3618 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3619 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3620 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3621 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3622 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3623 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3624 3625 PetscFunctionBegin; 3626 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3627 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3628 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3629 3630 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3631 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3632 if (!rank) { 3633 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3634 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3635 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3636 } 3637 3638 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3639 3640 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3641 M = header[1]; N = header[2]; 3642 3643 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3644 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3645 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3646 3647 /* If global sizes are set, check if they are consistent with that given in the file */ 3648 if (sizesset) { 3649 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3650 } 3651 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3652 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3653 3654 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3655 3656 /* 3657 This code adds extra rows to make sure the number of rows is 3658 divisible by the blocksize 3659 */ 3660 Mbs = M/bs; 3661 extra_rows = bs - M + bs*Mbs; 3662 if (extra_rows == bs) extra_rows = 0; 3663 else Mbs++; 3664 if (extra_rows && !rank) { 3665 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3666 } 3667 3668 /* determine ownership of all rows */ 3669 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3670 mbs = Mbs/size + ((Mbs % size) > rank); 3671 m = mbs*bs; 3672 } else { /* User set */ 3673 m = newmat->rmap->n; 3674 mbs = m/bs; 3675 } 3676 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3677 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3678 3679 /* process 0 needs enough room for process with most rows */ 3680 if (!rank) { 3681 mmax = rowners[1]; 3682 for (i=2; i<size; i++) { 3683 mmax = PetscMax(mmax,rowners[i]); 3684 } 3685 mmax*=bs; 3686 } else mmax = m; 3687 3688 rowners[0] = 0; 3689 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3690 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3691 rstart = rowners[rank]; 3692 rend = rowners[rank+1]; 3693 3694 /* distribute row lengths to all processors */ 3695 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3696 if (!rank) { 3697 mend = m; 3698 if (size == 1) mend = mend - extra_rows; 3699 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3700 for (j=mend; j<m; j++) locrowlens[j] = 1; 3701 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3702 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3703 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3704 for (j=0; j<m; j++) { 3705 procsnz[0] += locrowlens[j]; 3706 } 3707 for (i=1; i<size; i++) { 3708 mend = browners[i+1] - browners[i]; 3709 if (i == size-1) mend = mend - extra_rows; 3710 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3711 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3712 /* calculate the number of nonzeros on each processor */ 3713 for (j=0; j<browners[i+1]-browners[i]; j++) { 3714 procsnz[i] += rowlengths[j]; 3715 } 3716 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3719 } else { 3720 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3721 } 3722 3723 if (!rank) { 3724 /* determine max buffer needed and allocate it */ 3725 maxnz = procsnz[0]; 3726 for (i=1; i<size; i++) { 3727 maxnz = PetscMax(maxnz,procsnz[i]); 3728 } 3729 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3730 3731 /* read in my part of the matrix column indices */ 3732 nz = procsnz[0]; 3733 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3734 mycols = ibuf; 3735 if (size == 1) nz -= extra_rows; 3736 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3737 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3738 3739 /* read in every ones (except the last) and ship off */ 3740 for (i=1; i<size-1; i++) { 3741 nz = procsnz[i]; 3742 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3743 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3744 } 3745 /* read in the stuff for the last proc */ 3746 if (size != 1) { 3747 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3748 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3749 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3750 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3751 } 3752 ierr = PetscFree(cols);CHKERRQ(ierr); 3753 } else { 3754 /* determine buffer space needed for message */ 3755 nz = 0; 3756 for (i=0; i<m; i++) { 3757 nz += locrowlens[i]; 3758 } 3759 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3760 mycols = ibuf; 3761 /* receive message of column indices*/ 3762 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3763 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3764 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3765 } 3766 3767 /* loop over local rows, determining number of off diagonal entries */ 3768 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3769 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3770 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3771 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3772 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3773 rowcount = 0; nzcount = 0; 3774 for (i=0; i<mbs; i++) { 3775 dcount = 0; 3776 odcount = 0; 3777 for (j=0; j<bs; j++) { 3778 kmax = locrowlens[rowcount]; 3779 for (k=0; k<kmax; k++) { 3780 tmp = mycols[nzcount++]/bs; 3781 if (!mask[tmp]) { 3782 mask[tmp] = 1; 3783 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3784 else masked1[dcount++] = tmp; 3785 } 3786 } 3787 rowcount++; 3788 } 3789 3790 dlens[i] = dcount; 3791 odlens[i] = odcount; 3792 3793 /* zero out the mask elements we set */ 3794 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3795 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3796 } 3797 3798 3799 if (!sizesset) { 3800 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3801 } 3802 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3803 3804 if (!rank) { 3805 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3806 /* read in my part of the matrix numerical values */ 3807 nz = procsnz[0]; 3808 vals = buf; 3809 mycols = ibuf; 3810 if (size == 1) nz -= extra_rows; 3811 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3812 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3813 3814 /* insert into matrix */ 3815 jj = rstart*bs; 3816 for (i=0; i<m; i++) { 3817 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3818 mycols += locrowlens[i]; 3819 vals += locrowlens[i]; 3820 jj++; 3821 } 3822 /* read in other processors (except the last one) and ship out */ 3823 for (i=1; i<size-1; i++) { 3824 nz = procsnz[i]; 3825 vals = buf; 3826 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3827 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3828 } 3829 /* the last proc */ 3830 if (size != 1){ 3831 nz = procsnz[i] - extra_rows; 3832 vals = buf; 3833 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3834 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3835 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3836 } 3837 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3838 } else { 3839 /* receive numeric values */ 3840 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3841 3842 /* receive message of values*/ 3843 vals = buf; 3844 mycols = ibuf; 3845 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3846 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3847 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3848 3849 /* insert into matrix */ 3850 jj = rstart*bs; 3851 for (i=0; i<m; i++) { 3852 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3853 mycols += locrowlens[i]; 3854 vals += locrowlens[i]; 3855 jj++; 3856 } 3857 } 3858 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3859 ierr = PetscFree(buf);CHKERRQ(ierr); 3860 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3861 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3862 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3863 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3864 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 3867 PetscFunctionReturn(0); 3868 } 3869 3870 #undef __FUNCT__ 3871 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3872 /*@ 3873 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3874 3875 Input Parameters: 3876 . mat - the matrix 3877 . fact - factor 3878 3879 Not Collective, each process can use a different factor 3880 3881 Level: advanced 3882 3883 Notes: 3884 This can also be set by the command line option: -mat_use_hash_table <fact> 3885 3886 .keywords: matrix, hashtable, factor, HT 3887 3888 .seealso: MatSetOption() 3889 @*/ 3890 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3891 { 3892 PetscErrorCode ierr; 3893 3894 PetscFunctionBegin; 3895 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 EXTERN_C_BEGIN 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3902 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3903 { 3904 Mat_MPIBAIJ *baij; 3905 3906 PetscFunctionBegin; 3907 baij = (Mat_MPIBAIJ*)mat->data; 3908 baij->ht_fact = fact; 3909 PetscFunctionReturn(0); 3910 } 3911 EXTERN_C_END 3912 3913 #undef __FUNCT__ 3914 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3915 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3916 { 3917 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3918 PetscFunctionBegin; 3919 *Ad = a->A; 3920 *Ao = a->B; 3921 *colmap = a->garray; 3922 PetscFunctionReturn(0); 3923 } 3924 3925 /* 3926 Special version for direct calls from Fortran (to eliminate two function call overheads 3927 */ 3928 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3929 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3930 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3931 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3932 #endif 3933 3934 #undef __FUNCT__ 3935 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3936 /*@C 3937 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3938 3939 Collective on Mat 3940 3941 Input Parameters: 3942 + mat - the matrix 3943 . min - number of input rows 3944 . im - input rows 3945 . nin - number of input columns 3946 . in - input columns 3947 . v - numerical values input 3948 - addvin - INSERT_VALUES or ADD_VALUES 3949 3950 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3951 3952 Level: advanced 3953 3954 .seealso: MatSetValuesBlocked() 3955 @*/ 3956 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3957 { 3958 /* convert input arguments to C version */ 3959 Mat mat = *matin; 3960 PetscInt m = *min, n = *nin; 3961 InsertMode addv = *addvin; 3962 3963 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3964 const MatScalar *value; 3965 MatScalar *barray=baij->barray; 3966 PetscBool roworiented = baij->roworiented; 3967 PetscErrorCode ierr; 3968 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3969 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3970 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3971 3972 PetscFunctionBegin; 3973 /* tasks normally handled by MatSetValuesBlocked() */ 3974 if (mat->insertmode == NOT_SET_VALUES) { 3975 mat->insertmode = addv; 3976 } 3977 #if defined(PETSC_USE_DEBUG) 3978 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3979 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3980 #endif 3981 if (mat->assembled) { 3982 mat->was_assembled = PETSC_TRUE; 3983 mat->assembled = PETSC_FALSE; 3984 } 3985 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3986 3987 3988 if(!barray) { 3989 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3990 baij->barray = barray; 3991 } 3992 3993 if (roworiented) { 3994 stepval = (n-1)*bs; 3995 } else { 3996 stepval = (m-1)*bs; 3997 } 3998 for (i=0; i<m; i++) { 3999 if (im[i] < 0) continue; 4000 #if defined(PETSC_USE_DEBUG) 4001 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4002 #endif 4003 if (im[i] >= rstart && im[i] < rend) { 4004 row = im[i] - rstart; 4005 for (j=0; j<n; j++) { 4006 /* If NumCol = 1 then a copy is not required */ 4007 if ((roworiented) && (n == 1)) { 4008 barray = (MatScalar*)v + i*bs2; 4009 } else if((!roworiented) && (m == 1)) { 4010 barray = (MatScalar*)v + j*bs2; 4011 } else { /* Here a copy is required */ 4012 if (roworiented) { 4013 value = v + i*(stepval+bs)*bs + j*bs; 4014 } else { 4015 value = v + j*(stepval+bs)*bs + i*bs; 4016 } 4017 for (ii=0; ii<bs; ii++,value+=stepval) { 4018 for (jj=0; jj<bs; jj++) { 4019 *barray++ = *value++; 4020 } 4021 } 4022 barray -=bs2; 4023 } 4024 4025 if (in[j] >= cstart && in[j] < cend){ 4026 col = in[j] - cstart; 4027 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4028 } 4029 else if (in[j] < 0) continue; 4030 #if defined(PETSC_USE_DEBUG) 4031 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4032 #endif 4033 else { 4034 if (mat->was_assembled) { 4035 if (!baij->colmap) { 4036 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4037 } 4038 4039 #if defined(PETSC_USE_DEBUG) 4040 #if defined (PETSC_USE_CTABLE) 4041 { PetscInt data; 4042 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4043 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4044 } 4045 #else 4046 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4047 #endif 4048 #endif 4049 #if defined (PETSC_USE_CTABLE) 4050 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4051 col = (col - 1)/bs; 4052 #else 4053 col = (baij->colmap[in[j]] - 1)/bs; 4054 #endif 4055 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4056 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4057 col = in[j]; 4058 } 4059 } 4060 else col = in[j]; 4061 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4062 } 4063 } 4064 } else { 4065 if (!baij->donotstash) { 4066 if (roworiented) { 4067 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4068 } else { 4069 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4070 } 4071 } 4072 } 4073 } 4074 4075 /* task normally handled by MatSetValuesBlocked() */ 4076 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079 4080 #undef __FUNCT__ 4081 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4082 /*@ 4083 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4084 CSR format the local rows. 4085 4086 Collective on MPI_Comm 4087 4088 Input Parameters: 4089 + comm - MPI communicator 4090 . bs - the block size, only a block size of 1 is supported 4091 . m - number of local rows (Cannot be PETSC_DECIDE) 4092 . n - This value should be the same as the local size used in creating the 4093 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4094 calculated if N is given) For square matrices n is almost always m. 4095 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4096 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4097 . i - row indices 4098 . j - column indices 4099 - a - matrix values 4100 4101 Output Parameter: 4102 . mat - the matrix 4103 4104 Level: intermediate 4105 4106 Notes: 4107 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4108 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4109 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4110 4111 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4112 4113 .keywords: matrix, aij, compressed row, sparse, parallel 4114 4115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4116 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4117 @*/ 4118 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4119 { 4120 PetscErrorCode ierr; 4121 4122 4123 PetscFunctionBegin; 4124 if (i[0]) { 4125 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4126 } 4127 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4128 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4129 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4130 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4131 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4132 PetscFunctionReturn(0); 4133 } 4134