1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 971 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 972 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 974 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 976 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 977 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 978 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } else if (format == PETSC_VIEWER_ASCII_INFO) { 981 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 984 PetscFunctionReturn(0); 985 } 986 } 987 988 if (isdraw) { 989 PetscDraw draw; 990 PetscBool isnull; 991 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 992 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 993 } 994 995 if (size == 1) { 996 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 997 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 998 } else { 999 /* assemble the entire matrix onto first processor. */ 1000 Mat A; 1001 Mat_SeqBAIJ *Aloc; 1002 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1003 MatScalar *a; 1004 1005 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1006 /* Perhaps this should be the type of mat? */ 1007 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1008 if (!rank) { 1009 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1010 } else { 1011 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1012 } 1013 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1014 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1015 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1016 1017 /* copy over the A part */ 1018 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1019 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1020 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1021 1022 for (i=0; i<mbs; i++) { 1023 rvals[0] = bs*(baij->rstartbs + i); 1024 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1025 for (j=ai[i]; j<ai[i+1]; j++) { 1026 col = (baij->cstartbs+aj[j])*bs; 1027 for (k=0; k<bs; k++) { 1028 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1029 col++; a += bs; 1030 } 1031 } 1032 } 1033 /* copy over the B part */ 1034 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1035 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1036 for (i=0; i<mbs; i++) { 1037 rvals[0] = bs*(baij->rstartbs + i); 1038 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1039 for (j=ai[i]; j<ai[i+1]; j++) { 1040 col = baij->garray[aj[j]]*bs; 1041 for (k=0; k<bs; k++) { 1042 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1043 col++; a += bs; 1044 } 1045 } 1046 } 1047 ierr = PetscFree(rvals);CHKERRQ(ierr); 1048 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1049 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1050 /* 1051 Everyone has to call to draw the matrix since the graphics waits are 1052 synchronized across all processors that share the PetscDraw object 1053 */ 1054 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1055 if (!rank) { 1056 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1057 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1058 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1059 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1060 } 1061 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1062 ierr = MatDestroy(A);CHKERRQ(ierr); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1069 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1070 { 1071 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1072 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1073 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1074 PetscErrorCode ierr; 1075 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1076 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1077 int fd; 1078 PetscScalar *column_values; 1079 FILE *file; 1080 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1081 PetscInt message_count,flowcontrolcount; 1082 1083 PetscFunctionBegin; 1084 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1085 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1086 nz = bs2*(A->nz + B->nz); 1087 rlen = mat->rmap->n; 1088 if (!rank) { 1089 header[0] = MAT_FILE_CLASSID; 1090 header[1] = mat->rmap->N; 1091 header[2] = mat->cmap->N; 1092 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1093 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1094 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1095 /* get largest number of rows any processor has */ 1096 range = mat->rmap->range; 1097 for (i=1; i<size; i++) { 1098 rlen = PetscMax(rlen,range[i+1] - range[i]); 1099 } 1100 } else { 1101 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1102 } 1103 1104 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1105 /* compute lengths of each row */ 1106 for (i=0; i<a->mbs; i++) { 1107 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1108 } 1109 /* store the row lengths to the file */ 1110 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1111 if (!rank) { 1112 MPI_Status status; 1113 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1114 rlen = (range[1] - range[0])/bs; 1115 for (i=0; i<rlen; i++) { 1116 for (j=0; j<bs; j++) { 1117 row_lens[i*bs+j] = bs*crow_lens[i]; 1118 } 1119 } 1120 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1121 for (i=1; i<size; i++) { 1122 rlen = (range[i+1] - range[i])/bs; 1123 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1124 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1125 for (k=0; k<rlen; k++) { 1126 for (j=0; j<bs; j++) { 1127 row_lens[k*bs+j] = bs*crow_lens[k]; 1128 } 1129 } 1130 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1131 } 1132 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1133 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1134 } else { 1135 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1136 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1137 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1138 } 1139 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1140 1141 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1142 information needed to make it for each row from a block row. This does require more communication but still not more than 1143 the communication needed for the nonzero values */ 1144 nzmax = nz; /* space a largest processor needs */ 1145 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1146 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1147 cnt = 0; 1148 for (i=0; i<a->mbs; i++) { 1149 pcnt = cnt; 1150 for (j=B->i[i]; j<B->i[i+1]; j++) { 1151 if ( (col = garray[B->j[j]]) > cstart) break; 1152 for (l=0; l<bs; l++) { 1153 column_indices[cnt++] = bs*col+l; 1154 } 1155 } 1156 for (k=A->i[i]; k<A->i[i+1]; k++) { 1157 for (l=0; l<bs; l++) { 1158 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1159 } 1160 } 1161 for (; j<B->i[i+1]; j++) { 1162 for (l=0; l<bs; l++) { 1163 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1164 } 1165 } 1166 len = cnt - pcnt; 1167 for (k=1; k<bs; k++) { 1168 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1169 cnt += len; 1170 } 1171 } 1172 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1173 1174 /* store the columns to the file */ 1175 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1176 if (!rank) { 1177 MPI_Status status; 1178 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1179 for (i=1; i<size; i++) { 1180 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1181 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1182 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1183 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1184 } 1185 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1186 } else { 1187 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1188 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1189 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1190 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1191 } 1192 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1193 1194 /* load up the numerical values */ 1195 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1196 cnt = 0; 1197 for (i=0; i<a->mbs; i++) { 1198 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1199 for (j=B->i[i]; j<B->i[i+1]; j++) { 1200 if ( garray[B->j[j]] > cstart) break; 1201 for (l=0; l<bs; l++) { 1202 for (ll=0; ll<bs; ll++) { 1203 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1204 } 1205 } 1206 cnt += bs; 1207 } 1208 for (k=A->i[i]; k<A->i[i+1]; k++) { 1209 for (l=0; l<bs; l++) { 1210 for (ll=0; ll<bs; ll++) { 1211 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1212 } 1213 } 1214 cnt += bs; 1215 } 1216 for (; j<B->i[i+1]; j++) { 1217 for (l=0; l<bs; l++) { 1218 for (ll=0; ll<bs; ll++) { 1219 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1220 } 1221 } 1222 cnt += bs; 1223 } 1224 cnt += (bs-1)*rlen; 1225 } 1226 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1227 1228 /* store the column values to the file */ 1229 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1230 if (!rank) { 1231 MPI_Status status; 1232 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1233 for (i=1; i<size; i++) { 1234 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1235 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1236 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1237 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1238 } 1239 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1240 } else { 1241 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1242 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1243 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1244 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1245 } 1246 ierr = PetscFree(column_values);CHKERRQ(ierr); 1247 1248 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1249 if (file) { 1250 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1251 } 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatView_MPIBAIJ" 1257 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1258 { 1259 PetscErrorCode ierr; 1260 PetscBool iascii,isdraw,issocket,isbinary; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1264 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1267 if (iascii || isdraw || issocket) { 1268 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1269 } else if (isbinary) { 1270 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1271 } else { 1272 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1279 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1280 { 1281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 #if defined(PETSC_USE_LOG) 1286 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1287 #endif 1288 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1289 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1290 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1291 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1292 #if defined (PETSC_USE_CTABLE) 1293 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1294 #else 1295 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1296 #endif 1297 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1298 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1299 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1300 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1301 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1302 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1304 ierr = PetscFree(baij);CHKERRQ(ierr); 1305 1306 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatMult_MPIBAIJ" 1319 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1320 { 1321 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 PetscInt nt; 1324 1325 PetscFunctionBegin; 1326 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1327 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1328 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1329 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1330 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1331 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1332 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1339 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1340 { 1341 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1347 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1354 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1355 { 1356 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 PetscBool merged; 1359 1360 PetscFunctionBegin; 1361 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1362 /* do nondiagonal part */ 1363 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1364 if (!merged) { 1365 /* send it on its way */ 1366 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* receive remote parts: note this assumes the values are not actually */ 1370 /* inserted in yy until the next line */ 1371 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1372 } else { 1373 /* do local part */ 1374 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1375 /* send it on its way */ 1376 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377 /* values actually were received in the Begin() but we need to call this nop */ 1378 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1385 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1386 { 1387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 /* do nondiagonal part */ 1392 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1393 /* send it on its way */ 1394 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1395 /* do local part */ 1396 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1397 /* receive remote parts: note this assumes the values are not actually */ 1398 /* inserted in yy until the next line, which is true for my implementation*/ 1399 /* but is not perhaps always true. */ 1400 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 /* 1405 This only works correctly for square matrices where the subblock A->A is the 1406 diagonal block 1407 */ 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1410 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1417 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatScale_MPIBAIJ" 1423 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1424 { 1425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1430 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1436 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1437 { 1438 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1439 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1440 PetscErrorCode ierr; 1441 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1442 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1443 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1444 1445 PetscFunctionBegin; 1446 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1447 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1448 mat->getrowactive = PETSC_TRUE; 1449 1450 if (!mat->rowvalues && (idx || v)) { 1451 /* 1452 allocate enough space to hold information from the longest row. 1453 */ 1454 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1455 PetscInt max = 1,mbs = mat->mbs,tmp; 1456 for (i=0; i<mbs; i++) { 1457 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1458 if (max < tmp) { max = tmp; } 1459 } 1460 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1461 } 1462 lrow = row - brstart; 1463 1464 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1465 if (!v) {pvA = 0; pvB = 0;} 1466 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1467 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1468 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1469 nztot = nzA + nzB; 1470 1471 cmap = mat->garray; 1472 if (v || idx) { 1473 if (nztot) { 1474 /* Sort by increasing column numbers, assuming A and B already sorted */ 1475 PetscInt imark = -1; 1476 if (v) { 1477 *v = v_p = mat->rowvalues; 1478 for (i=0; i<nzB; i++) { 1479 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1480 else break; 1481 } 1482 imark = i; 1483 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1484 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1485 } 1486 if (idx) { 1487 *idx = idx_p = mat->rowindices; 1488 if (imark > -1) { 1489 for (i=0; i<imark; i++) { 1490 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1491 } 1492 } else { 1493 for (i=0; i<nzB; i++) { 1494 if (cmap[cworkB[i]/bs] < cstart) 1495 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1496 else break; 1497 } 1498 imark = i; 1499 } 1500 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1501 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1502 } 1503 } else { 1504 if (idx) *idx = 0; 1505 if (v) *v = 0; 1506 } 1507 } 1508 *nz = nztot; 1509 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1510 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1516 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1517 { 1518 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1519 1520 PetscFunctionBegin; 1521 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1522 baij->getrowactive = PETSC_FALSE; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1528 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1529 { 1530 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1535 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1541 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1542 { 1543 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1544 Mat A = a->A,B = a->B; 1545 PetscErrorCode ierr; 1546 PetscReal isend[5],irecv[5]; 1547 1548 PetscFunctionBegin; 1549 info->block_size = (PetscReal)matin->rmap->bs; 1550 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1551 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1552 isend[3] = info->memory; isend[4] = info->mallocs; 1553 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1554 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1555 isend[3] += info->memory; isend[4] += info->mallocs; 1556 if (flag == MAT_LOCAL) { 1557 info->nz_used = isend[0]; 1558 info->nz_allocated = isend[1]; 1559 info->nz_unneeded = isend[2]; 1560 info->memory = isend[3]; 1561 info->mallocs = isend[4]; 1562 } else if (flag == MAT_GLOBAL_MAX) { 1563 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1564 info->nz_used = irecv[0]; 1565 info->nz_allocated = irecv[1]; 1566 info->nz_unneeded = irecv[2]; 1567 info->memory = irecv[3]; 1568 info->mallocs = irecv[4]; 1569 } else if (flag == MAT_GLOBAL_SUM) { 1570 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else { 1577 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1578 } 1579 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1580 info->fill_ratio_needed = 0; 1581 info->factor_mallocs = 0; 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1587 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1588 { 1589 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 switch (op) { 1594 case MAT_NEW_NONZERO_LOCATIONS: 1595 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1596 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1597 case MAT_KEEP_NONZERO_PATTERN: 1598 case MAT_NEW_NONZERO_LOCATION_ERR: 1599 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1600 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1601 break; 1602 case MAT_ROW_ORIENTED: 1603 a->roworiented = flg; 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_NEW_DIAGONALS: 1608 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1609 break; 1610 case MAT_IGNORE_OFF_PROC_ENTRIES: 1611 a->donotstash = flg; 1612 break; 1613 case MAT_USE_HASH_TABLE: 1614 a->ht_flag = flg; 1615 break; 1616 case MAT_SYMMETRIC: 1617 case MAT_STRUCTURALLY_SYMMETRIC: 1618 case MAT_HERMITIAN: 1619 case MAT_SYMMETRY_ETERNAL: 1620 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1621 break; 1622 default: 1623 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1630 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1633 Mat_SeqBAIJ *Aloc; 1634 Mat B; 1635 PetscErrorCode ierr; 1636 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1637 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1638 MatScalar *a; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1643 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1644 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1645 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1646 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1647 } else { 1648 B = *matout; 1649 } 1650 1651 /* copy over the A part */ 1652 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1653 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1654 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1655 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = (baij->cstartbs+aj[j])*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 col++; a += bs; 1664 } 1665 } 1666 } 1667 /* copy over the B part */ 1668 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1669 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1670 for (i=0; i<mbs; i++) { 1671 rvals[0] = bs*(baij->rstartbs + i); 1672 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1673 for (j=ai[i]; j<ai[i+1]; j++) { 1674 col = baij->garray[aj[j]]*bs; 1675 for (k=0; k<bs; k++) { 1676 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1677 col++; a += bs; 1678 } 1679 } 1680 } 1681 ierr = PetscFree(rvals);CHKERRQ(ierr); 1682 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 1685 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1686 *matout = B; 1687 } else { 1688 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1689 } 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1695 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1696 { 1697 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1698 Mat a = baij->A,b = baij->B; 1699 PetscErrorCode ierr; 1700 PetscInt s1,s2,s3; 1701 1702 PetscFunctionBegin; 1703 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1704 if (rr) { 1705 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1706 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1707 /* Overlap communication with computation. */ 1708 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1709 } 1710 if (ll) { 1711 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1712 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1713 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1714 } 1715 /* scale the diagonal block */ 1716 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1717 1718 if (rr) { 1719 /* Do a scatter end and then right scale the off-diagonal block */ 1720 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1721 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1722 } 1723 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1730 { 1731 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1732 PetscErrorCode ierr; 1733 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1734 PetscInt i,*owners = A->rmap->range; 1735 PetscInt *nprocs,j,idx,nsends,row; 1736 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1737 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1738 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1739 MPI_Comm comm = ((PetscObject)A)->comm; 1740 MPI_Request *send_waits,*recv_waits; 1741 MPI_Status recv_status,*send_status; 1742 const PetscScalar *xx; 1743 PetscScalar *bb; 1744 #if defined(PETSC_DEBUG) 1745 PetscBool found = PETSC_FALSE; 1746 #endif 1747 1748 PetscFunctionBegin; 1749 /* first count number of contributors to each processor */ 1750 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1751 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1753 j = 0; 1754 for (i=0; i<N; i++) { 1755 if (lastidx > (idx = rows[i])) j = 0; 1756 lastidx = idx; 1757 for (; j<size; j++) { 1758 if (idx >= owners[j] && idx < owners[j+1]) { 1759 nprocs[2*j]++; 1760 nprocs[2*j+1] = 1; 1761 owner[i] = j; 1762 #if defined(PETSC_DEBUG) 1763 found = PETSC_TRUE; 1764 #endif 1765 break; 1766 } 1767 } 1768 #if defined(PETSC_DEBUG) 1769 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1770 found = PETSC_FALSE; 1771 #endif 1772 } 1773 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1774 1775 if (A->nooffproczerorows) { 1776 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1777 nrecvs = nsends; 1778 nmax = N; 1779 } else { 1780 /* inform other processors of number of messages and max length*/ 1781 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1782 } 1783 1784 /* post receives: */ 1785 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1786 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1787 for (i=0; i<nrecvs; i++) { 1788 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1789 } 1790 1791 /* do sends: 1792 1) starts[i] gives the starting index in svalues for stuff going to 1793 the ith processor 1794 */ 1795 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1796 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1797 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1798 starts[0] = 0; 1799 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1800 for (i=0; i<N; i++) { 1801 svalues[starts[owner[i]]++] = rows[i]; 1802 } 1803 1804 starts[0] = 0; 1805 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1806 count = 0; 1807 for (i=0; i<size; i++) { 1808 if (nprocs[2*i+1]) { 1809 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1810 } 1811 } 1812 ierr = PetscFree(starts);CHKERRQ(ierr); 1813 1814 base = owners[rank]; 1815 1816 /* wait on receives */ 1817 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1818 count = nrecvs; 1819 slen = 0; 1820 while (count) { 1821 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1822 /* unpack receives into our local space */ 1823 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1824 source[imdex] = recv_status.MPI_SOURCE; 1825 lens[imdex] = n; 1826 slen += n; 1827 count--; 1828 } 1829 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1830 1831 /* move the data into the send scatter */ 1832 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1833 count = 0; 1834 for (i=0; i<nrecvs; i++) { 1835 values = rvalues + i*nmax; 1836 for (j=0; j<lens[i]; j++) { 1837 lrows[count++] = values[j] - base; 1838 } 1839 } 1840 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1841 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1842 ierr = PetscFree(owner);CHKERRQ(ierr); 1843 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1844 1845 /* fix right hand side if needed */ 1846 if (x && b) { 1847 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1848 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1849 for (i=0; i<slen; i++) { 1850 bb[lrows[i]] = diag*xx[lrows[i]]; 1851 } 1852 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1853 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1854 } 1855 1856 /* actually zap the local rows */ 1857 /* 1858 Zero the required rows. If the "diagonal block" of the matrix 1859 is square and the user wishes to set the diagonal we use separate 1860 code so that MatSetValues() is not called for each diagonal allocating 1861 new memory, thus calling lots of mallocs and slowing things down. 1862 1863 */ 1864 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1865 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1866 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1867 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1868 } else if (diag != 0.0) { 1869 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1872 for (i=0; i<slen; i++) { 1873 row = lrows[i] + rstart_bs; 1874 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1875 } 1876 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1877 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1878 } else { 1879 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1880 } 1881 1882 ierr = PetscFree(lrows);CHKERRQ(ierr); 1883 1884 /* wait on sends */ 1885 if (nsends) { 1886 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1887 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1888 ierr = PetscFree(send_status);CHKERRQ(ierr); 1889 } 1890 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1891 ierr = PetscFree(svalues);CHKERRQ(ierr); 1892 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatEqual_MPIBAIJ" 1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1913 { 1914 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1915 Mat a,b,c,d; 1916 PetscBool flg; 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 a = matA->A; b = matA->B; 1921 c = matB->A; d = matB->B; 1922 1923 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1924 if (flg) { 1925 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1926 } 1927 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatCopy_MPIBAIJ" 1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1934 { 1935 PetscErrorCode ierr; 1936 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1937 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1938 1939 PetscFunctionBegin; 1940 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1941 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1942 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1943 } else { 1944 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1945 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1952 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1964 { 1965 PetscErrorCode ierr; 1966 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1967 PetscBLASInt bnz,one=1; 1968 Mat_SeqBAIJ *x,*y; 1969 1970 PetscFunctionBegin; 1971 if (str == SAME_NONZERO_PATTERN) { 1972 PetscScalar alpha = a; 1973 x = (Mat_SeqBAIJ *)xx->A->data; 1974 y = (Mat_SeqBAIJ *)yy->A->data; 1975 bnz = PetscBLASIntCast(x->nz); 1976 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1977 x = (Mat_SeqBAIJ *)xx->B->data; 1978 y = (Mat_SeqBAIJ *)yy->B->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 } else { 1982 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1989 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1990 { 1991 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1992 PetscInt rbs,cbs; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1997 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1998 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1999 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2000 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2001 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2007 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2008 { 2009 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2014 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2020 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2021 { 2022 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2023 PetscErrorCode ierr; 2024 2025 PetscFunctionBegin; 2026 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2027 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2033 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2034 { 2035 PetscErrorCode ierr; 2036 IS iscol_local; 2037 PetscInt csize; 2038 2039 PetscFunctionBegin; 2040 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2041 if (call == MAT_REUSE_MATRIX) { 2042 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2043 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2044 } else { 2045 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2046 } 2047 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2048 if (call == MAT_INITIAL_MATRIX) { 2049 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2050 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2051 } 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2057 /* 2058 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2059 in local and then by concatenating the local matrices the end result. 2060 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2061 */ 2062 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2063 { 2064 PetscErrorCode ierr; 2065 PetscMPIInt rank,size; 2066 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2067 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2068 Mat *local,M,Mreuse; 2069 MatScalar *vwork,*aa; 2070 MPI_Comm comm = ((PetscObject)mat)->comm; 2071 Mat_SeqBAIJ *aij; 2072 2073 2074 PetscFunctionBegin; 2075 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2076 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2077 2078 if (call == MAT_REUSE_MATRIX) { 2079 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2080 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2081 local = &Mreuse; 2082 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2083 } else { 2084 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2085 Mreuse = *local; 2086 ierr = PetscFree(local);CHKERRQ(ierr); 2087 } 2088 2089 /* 2090 m - number of local rows 2091 n - number of columns (same on all processors) 2092 rstart - first row in new global matrix generated 2093 */ 2094 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2095 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2096 m = m/bs; 2097 n = n/bs; 2098 2099 if (call == MAT_INITIAL_MATRIX) { 2100 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2101 ii = aij->i; 2102 jj = aij->j; 2103 2104 /* 2105 Determine the number of non-zeros in the diagonal and off-diagonal 2106 portions of the matrix in order to do correct preallocation 2107 */ 2108 2109 /* first get start and end of "diagonal" columns */ 2110 if (csize == PETSC_DECIDE) { 2111 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2112 if (mglobal == n*bs) { /* square matrix */ 2113 nlocal = m; 2114 } else { 2115 nlocal = n/size + ((n % size) > rank); 2116 } 2117 } else { 2118 nlocal = csize/bs; 2119 } 2120 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2121 rstart = rend - nlocal; 2122 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2123 2124 /* next, compute all the lengths */ 2125 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2126 olens = dlens + m; 2127 for (i=0; i<m; i++) { 2128 jend = ii[i+1] - ii[i]; 2129 olen = 0; 2130 dlen = 0; 2131 for (j=0; j<jend; j++) { 2132 if (*jj < rstart || *jj >= rend) olen++; 2133 else dlen++; 2134 jj++; 2135 } 2136 olens[i] = olen; 2137 dlens[i] = dlen; 2138 } 2139 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2140 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2141 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2142 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2143 ierr = PetscFree(dlens);CHKERRQ(ierr); 2144 } else { 2145 PetscInt ml,nl; 2146 2147 M = *newmat; 2148 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2149 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2150 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2151 /* 2152 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2153 rather than the slower MatSetValues(). 2154 */ 2155 M->was_assembled = PETSC_TRUE; 2156 M->assembled = PETSC_FALSE; 2157 } 2158 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2159 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2160 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2161 ii = aij->i; 2162 jj = aij->j; 2163 aa = aij->a; 2164 for (i=0; i<m; i++) { 2165 row = rstart/bs + i; 2166 nz = ii[i+1] - ii[i]; 2167 cwork = jj; jj += nz; 2168 vwork = aa; aa += nz; 2169 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2170 } 2171 2172 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2174 *newmat = M; 2175 2176 /* save submatrix used in processor for next request */ 2177 if (call == MAT_INITIAL_MATRIX) { 2178 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2179 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2180 } 2181 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatPermute_MPIBAIJ" 2187 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2188 { 2189 MPI_Comm comm,pcomm; 2190 PetscInt first,local_size,nrows; 2191 const PetscInt *rows; 2192 PetscMPIInt size; 2193 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2198 /* make a collective version of 'rowp' */ 2199 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2200 if (pcomm==comm) { 2201 crowp = rowp; 2202 } else { 2203 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2204 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2205 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2206 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2207 } 2208 /* collect the global row permutation and invert it */ 2209 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2210 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2211 if (pcomm!=comm) { 2212 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2213 } 2214 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2215 /* get the local target indices */ 2216 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2217 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2218 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2219 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2220 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2221 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2222 /* the column permutation is so much easier; 2223 make a local version of 'colp' and invert it */ 2224 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2225 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2226 if (size==1) { 2227 lcolp = colp; 2228 } else { 2229 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2230 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2231 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2232 } 2233 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2234 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2235 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2236 if (size>1) { 2237 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2238 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2239 } 2240 /* now we just get the submatrix */ 2241 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2242 /* clean up */ 2243 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2244 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2250 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2251 { 2252 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2253 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2254 2255 PetscFunctionBegin; 2256 if (nghosts) { *nghosts = B->nbs;} 2257 if (ghosts) {*ghosts = baij->garray;} 2258 PetscFunctionReturn(0); 2259 } 2260 2261 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2265 /* 2266 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2267 */ 2268 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2269 { 2270 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2271 PetscErrorCode ierr; 2272 PetscMPIInt size,*ncolsonproc,*disp,nn; 2273 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2274 const PetscInt *is; 2275 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2276 PetscInt *rowhit,M,cstart,cend,colb; 2277 PetscInt *columnsforrow,l; 2278 IS *isa; 2279 PetscBool done,flg; 2280 ISLocalToGlobalMapping map = mat->cbmapping; 2281 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2282 2283 PetscFunctionBegin; 2284 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2285 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2286 2287 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2288 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2289 M = mat->rmap->n/bs; 2290 cstart = mat->cmap->rstart/bs; 2291 cend = mat->cmap->rend/bs; 2292 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2293 c->N = mat->cmap->N/bs; 2294 c->m = mat->rmap->n/bs; 2295 c->rstart = mat->rmap->rstart/bs; 2296 2297 c->ncolors = nis; 2298 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2299 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2300 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2301 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2302 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2303 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2304 2305 /* Allow access to data structures of local part of matrix */ 2306 if (!baij->colmap) { 2307 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2308 } 2309 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2310 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2311 2312 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2313 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2314 2315 for (i=0; i<nis; i++) { 2316 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2317 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2318 c->ncolumns[i] = n; 2319 if (n) { 2320 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2321 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2322 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2323 } else { 2324 c->columns[i] = 0; 2325 } 2326 2327 if (ctype == IS_COLORING_GLOBAL){ 2328 /* Determine the total (parallel) number of columns of this color */ 2329 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2330 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2331 2332 nn = PetscMPIIntCast(n); 2333 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2334 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2335 if (!nctot) { 2336 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2337 } 2338 2339 disp[0] = 0; 2340 for (j=1; j<size; j++) { 2341 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2342 } 2343 2344 /* Get complete list of columns for color on each processor */ 2345 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2346 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2347 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2348 } else if (ctype == IS_COLORING_GHOSTED){ 2349 /* Determine local number of columns of this color on this process, including ghost points */ 2350 nctot = n; 2351 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2352 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2353 } else { 2354 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2355 } 2356 2357 /* 2358 Mark all rows affect by these columns 2359 */ 2360 /* Temporary option to allow for debugging/testing */ 2361 flg = PETSC_FALSE; 2362 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2363 if (!flg) {/*-----------------------------------------------------------------------------*/ 2364 /* crude, fast version */ 2365 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2366 /* loop over columns*/ 2367 for (j=0; j<nctot; j++) { 2368 if (ctype == IS_COLORING_GHOSTED) { 2369 col = ltog[cols[j]]; 2370 } else { 2371 col = cols[j]; 2372 } 2373 if (col >= cstart && col < cend) { 2374 /* column is in diagonal block of matrix */ 2375 rows = A_cj + A_ci[col-cstart]; 2376 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2377 } else { 2378 #if defined (PETSC_USE_CTABLE) 2379 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2380 colb --; 2381 #else 2382 colb = baij->colmap[col] - 1; 2383 #endif 2384 if (colb == -1) { 2385 m = 0; 2386 } else { 2387 colb = colb/bs; 2388 rows = B_cj + B_ci[colb]; 2389 m = B_ci[colb+1] - B_ci[colb]; 2390 } 2391 } 2392 /* loop over columns marking them in rowhit */ 2393 for (k=0; k<m; k++) { 2394 rowhit[*rows++] = col + 1; 2395 } 2396 } 2397 2398 /* count the number of hits */ 2399 nrows = 0; 2400 for (j=0; j<M; j++) { 2401 if (rowhit[j]) nrows++; 2402 } 2403 c->nrows[i] = nrows; 2404 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2405 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2406 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2407 nrows = 0; 2408 for (j=0; j<M; j++) { 2409 if (rowhit[j]) { 2410 c->rows[i][nrows] = j; 2411 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2412 nrows++; 2413 } 2414 } 2415 } else {/*-------------------------------------------------------------------------------*/ 2416 /* slow version, using rowhit as a linked list */ 2417 PetscInt currentcol,fm,mfm; 2418 rowhit[M] = M; 2419 nrows = 0; 2420 /* loop over columns*/ 2421 for (j=0; j<nctot; j++) { 2422 if (ctype == IS_COLORING_GHOSTED) { 2423 col = ltog[cols[j]]; 2424 } else { 2425 col = cols[j]; 2426 } 2427 if (col >= cstart && col < cend) { 2428 /* column is in diagonal block of matrix */ 2429 rows = A_cj + A_ci[col-cstart]; 2430 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2431 } else { 2432 #if defined (PETSC_USE_CTABLE) 2433 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2434 colb --; 2435 #else 2436 colb = baij->colmap[col] - 1; 2437 #endif 2438 if (colb == -1) { 2439 m = 0; 2440 } else { 2441 colb = colb/bs; 2442 rows = B_cj + B_ci[colb]; 2443 m = B_ci[colb+1] - B_ci[colb]; 2444 } 2445 } 2446 2447 /* loop over columns marking them in rowhit */ 2448 fm = M; /* fm points to first entry in linked list */ 2449 for (k=0; k<m; k++) { 2450 currentcol = *rows++; 2451 /* is it already in the list? */ 2452 do { 2453 mfm = fm; 2454 fm = rowhit[fm]; 2455 } while (fm < currentcol); 2456 /* not in list so add it */ 2457 if (fm != currentcol) { 2458 nrows++; 2459 columnsforrow[currentcol] = col; 2460 /* next three lines insert new entry into linked list */ 2461 rowhit[mfm] = currentcol; 2462 rowhit[currentcol] = fm; 2463 fm = currentcol; 2464 /* fm points to present position in list since we know the columns are sorted */ 2465 } else { 2466 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2467 } 2468 } 2469 } 2470 c->nrows[i] = nrows; 2471 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2472 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2473 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2474 /* now store the linked list of rows into c->rows[i] */ 2475 nrows = 0; 2476 fm = rowhit[M]; 2477 do { 2478 c->rows[i][nrows] = fm; 2479 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2480 fm = rowhit[fm]; 2481 } while (fm < M); 2482 } /* ---------------------------------------------------------------------------------------*/ 2483 ierr = PetscFree(cols);CHKERRQ(ierr); 2484 } 2485 2486 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2487 /* 2488 vscale will contain the "diagonal" on processor scalings followed by the off processor 2489 */ 2490 if (ctype == IS_COLORING_GLOBAL) { 2491 PetscInt *garray; 2492 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2493 for (i=0; i<baij->B->cmap->n/bs; i++) { 2494 for (j=0; j<bs; j++) { 2495 garray[i*bs+j] = bs*baij->garray[i]+j; 2496 } 2497 } 2498 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2499 ierr = PetscFree(garray);CHKERRQ(ierr); 2500 CHKMEMQ; 2501 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2502 for (k=0; k<c->ncolors; k++) { 2503 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2504 for (l=0; l<c->nrows[k]; l++) { 2505 col = c->columnsforrow[k][l]; 2506 if (col >= cstart && col < cend) { 2507 /* column is in diagonal block of matrix */ 2508 colb = col - cstart; 2509 } else { 2510 /* column is in "off-processor" part */ 2511 #if defined (PETSC_USE_CTABLE) 2512 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2513 colb --; 2514 #else 2515 colb = baij->colmap[col] - 1; 2516 #endif 2517 colb = colb/bs; 2518 colb += cend - cstart; 2519 } 2520 c->vscaleforrow[k][l] = colb; 2521 } 2522 } 2523 } else if (ctype == IS_COLORING_GHOSTED) { 2524 /* Get gtol mapping */ 2525 PetscInt N = mat->cmap->N, *gtol; 2526 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2527 for (i=0; i<N; i++) gtol[i] = -1; 2528 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2529 2530 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2531 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2532 for (k=0; k<c->ncolors; k++) { 2533 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2534 for (l=0; l<c->nrows[k]; l++) { 2535 col = c->columnsforrow[k][l]; /* global column index */ 2536 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2537 } 2538 } 2539 ierr = PetscFree(gtol);CHKERRQ(ierr); 2540 } 2541 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2542 2543 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2544 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2545 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2546 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2547 CHKMEMQ; 2548 PetscFunctionReturn(0); 2549 } 2550 2551 #undef __FUNCT__ 2552 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2553 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2554 { 2555 Mat B; 2556 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2557 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2558 Mat_SeqAIJ *b; 2559 PetscErrorCode ierr; 2560 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2561 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2562 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2563 2564 PetscFunctionBegin; 2565 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2566 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2567 2568 /* ---------------------------------------------------------------- 2569 Tell every processor the number of nonzeros per row 2570 */ 2571 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2572 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2573 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2574 } 2575 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2576 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2577 displs = recvcounts + size; 2578 for (i=0; i<size; i++) { 2579 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2580 displs[i] = A->rmap->range[i]/bs; 2581 } 2582 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2583 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2584 #else 2585 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2586 #endif 2587 /* --------------------------------------------------------------- 2588 Create the sequential matrix of the same type as the local block diagonal 2589 */ 2590 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2591 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2592 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2593 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2594 b = (Mat_SeqAIJ *)B->data; 2595 2596 /*-------------------------------------------------------------------- 2597 Copy my part of matrix column indices over 2598 */ 2599 sendcount = ad->nz + bd->nz; 2600 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2601 a_jsendbuf = ad->j; 2602 b_jsendbuf = bd->j; 2603 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2604 cnt = 0; 2605 for (i=0; i<n; i++) { 2606 2607 /* put in lower diagonal portion */ 2608 m = bd->i[i+1] - bd->i[i]; 2609 while (m > 0) { 2610 /* is it above diagonal (in bd (compressed) numbering) */ 2611 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2612 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2613 m--; 2614 } 2615 2616 /* put in diagonal portion */ 2617 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2618 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2619 } 2620 2621 /* put in upper diagonal portion */ 2622 while (m-- > 0) { 2623 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2624 } 2625 } 2626 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2627 2628 /*-------------------------------------------------------------------- 2629 Gather all column indices to all processors 2630 */ 2631 for (i=0; i<size; i++) { 2632 recvcounts[i] = 0; 2633 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2634 recvcounts[i] += lens[j]; 2635 } 2636 } 2637 displs[0] = 0; 2638 for (i=1; i<size; i++) { 2639 displs[i] = displs[i-1] + recvcounts[i-1]; 2640 } 2641 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2642 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2643 #else 2644 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2645 #endif 2646 /*-------------------------------------------------------------------- 2647 Assemble the matrix into useable form (note numerical values not yet set) 2648 */ 2649 /* set the b->ilen (length of each row) values */ 2650 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2651 /* set the b->i indices */ 2652 b->i[0] = 0; 2653 for (i=1; i<=A->rmap->N/bs; i++) { 2654 b->i[i] = b->i[i-1] + lens[i-1]; 2655 } 2656 ierr = PetscFree(lens);CHKERRQ(ierr); 2657 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2659 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2660 2661 if (A->symmetric){ 2662 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2663 } else if (A->hermitian) { 2664 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2665 } else if (A->structurally_symmetric) { 2666 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2667 } 2668 *newmat = B; 2669 PetscFunctionReturn(0); 2670 } 2671 2672 #undef __FUNCT__ 2673 #define __FUNCT__ "MatSOR_MPIBAIJ" 2674 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2675 { 2676 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2677 PetscErrorCode ierr; 2678 Vec bb1 = 0; 2679 2680 PetscFunctionBegin; 2681 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2682 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2683 } 2684 2685 if (flag == SOR_APPLY_UPPER) { 2686 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2691 if (flag & SOR_ZERO_INITIAL_GUESS) { 2692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2693 its--; 2694 } 2695 2696 while (its--) { 2697 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2698 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2699 2700 /* update rhs: bb1 = bb - B*x */ 2701 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2702 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2703 2704 /* local sweep */ 2705 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2706 } 2707 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2708 if (flag & SOR_ZERO_INITIAL_GUESS) { 2709 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2710 its--; 2711 } 2712 while (its--) { 2713 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2714 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2715 2716 /* update rhs: bb1 = bb - B*x */ 2717 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2718 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2719 2720 /* local sweep */ 2721 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2722 } 2723 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2724 if (flag & SOR_ZERO_INITIAL_GUESS) { 2725 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2726 its--; 2727 } 2728 while (its--) { 2729 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2730 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2731 2732 /* update rhs: bb1 = bb - B*x */ 2733 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2734 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2735 2736 /* local sweep */ 2737 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2738 } 2739 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2740 2741 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2742 PetscFunctionReturn(0); 2743 } 2744 2745 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2746 2747 2748 /* -------------------------------------------------------------------*/ 2749 static struct _MatOps MatOps_Values = { 2750 MatSetValues_MPIBAIJ, 2751 MatGetRow_MPIBAIJ, 2752 MatRestoreRow_MPIBAIJ, 2753 MatMult_MPIBAIJ, 2754 /* 4*/ MatMultAdd_MPIBAIJ, 2755 MatMultTranspose_MPIBAIJ, 2756 MatMultTransposeAdd_MPIBAIJ, 2757 0, 2758 0, 2759 0, 2760 /*10*/ 0, 2761 0, 2762 0, 2763 MatSOR_MPIBAIJ, 2764 MatTranspose_MPIBAIJ, 2765 /*15*/ MatGetInfo_MPIBAIJ, 2766 MatEqual_MPIBAIJ, 2767 MatGetDiagonal_MPIBAIJ, 2768 MatDiagonalScale_MPIBAIJ, 2769 MatNorm_MPIBAIJ, 2770 /*20*/ MatAssemblyBegin_MPIBAIJ, 2771 MatAssemblyEnd_MPIBAIJ, 2772 MatSetOption_MPIBAIJ, 2773 MatZeroEntries_MPIBAIJ, 2774 /*24*/ MatZeroRows_MPIBAIJ, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2780 0, 2781 0, 2782 0, 2783 0, 2784 /*34*/ MatDuplicate_MPIBAIJ, 2785 0, 2786 0, 2787 0, 2788 0, 2789 /*39*/ MatAXPY_MPIBAIJ, 2790 MatGetSubMatrices_MPIBAIJ, 2791 MatIncreaseOverlap_MPIBAIJ, 2792 MatGetValues_MPIBAIJ, 2793 MatCopy_MPIBAIJ, 2794 /*44*/ 0, 2795 MatScale_MPIBAIJ, 2796 0, 2797 0, 2798 0, 2799 /*49*/ MatSetBlockSize_MPIBAIJ, 2800 0, 2801 0, 2802 0, 2803 0, 2804 /*54*/ MatFDColoringCreate_MPIBAIJ, 2805 0, 2806 MatSetUnfactored_MPIBAIJ, 2807 MatPermute_MPIBAIJ, 2808 MatSetValuesBlocked_MPIBAIJ, 2809 /*59*/ MatGetSubMatrix_MPIBAIJ, 2810 MatDestroy_MPIBAIJ, 2811 MatView_MPIBAIJ, 2812 0, 2813 0, 2814 /*64*/ 0, 2815 0, 2816 0, 2817 0, 2818 0, 2819 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2820 0, 2821 0, 2822 0, 2823 0, 2824 /*74*/ 0, 2825 MatFDColoringApply_BAIJ, 2826 0, 2827 0, 2828 0, 2829 /*79*/ 0, 2830 0, 2831 0, 2832 0, 2833 MatLoad_MPIBAIJ, 2834 /*84*/ 0, 2835 0, 2836 0, 2837 0, 2838 0, 2839 /*89*/ 0, 2840 0, 2841 0, 2842 0, 2843 0, 2844 /*94*/ 0, 2845 0, 2846 0, 2847 0, 2848 0, 2849 /*99*/ 0, 2850 0, 2851 0, 2852 0, 2853 0, 2854 /*104*/0, 2855 MatRealPart_MPIBAIJ, 2856 MatImaginaryPart_MPIBAIJ, 2857 0, 2858 0, 2859 /*109*/0, 2860 0, 2861 0, 2862 0, 2863 0, 2864 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2865 0, 2866 MatGetGhosts_MPIBAIJ, 2867 0, 2868 0, 2869 /*119*/0, 2870 0, 2871 0, 2872 0 2873 }; 2874 2875 EXTERN_C_BEGIN 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2878 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2879 { 2880 PetscFunctionBegin; 2881 *a = ((Mat_MPIBAIJ *)A->data)->A; 2882 *iscopy = PETSC_FALSE; 2883 PetscFunctionReturn(0); 2884 } 2885 EXTERN_C_END 2886 2887 EXTERN_C_BEGIN 2888 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2889 EXTERN_C_END 2890 2891 EXTERN_C_BEGIN 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2894 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2895 { 2896 PetscInt m,rstart,cstart,cend; 2897 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2898 const PetscInt *JJ=0; 2899 PetscScalar *values=0; 2900 PetscErrorCode ierr; 2901 2902 PetscFunctionBegin; 2903 2904 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2905 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2906 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2909 m = B->rmap->n/bs; 2910 rstart = B->rmap->rstart/bs; 2911 cstart = B->cmap->rstart/bs; 2912 cend = B->cmap->rend/bs; 2913 2914 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2915 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2916 for (i=0; i<m; i++) { 2917 nz = ii[i+1] - ii[i]; 2918 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2919 nz_max = PetscMax(nz_max,nz); 2920 JJ = jj + ii[i]; 2921 for (j=0; j<nz; j++) { 2922 if (*JJ >= cstart) break; 2923 JJ++; 2924 } 2925 d = 0; 2926 for (; j<nz; j++) { 2927 if (*JJ++ >= cend) break; 2928 d++; 2929 } 2930 d_nnz[i] = d; 2931 o_nnz[i] = nz - d; 2932 } 2933 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2934 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2935 2936 values = (PetscScalar*)V; 2937 if (!values) { 2938 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2939 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2940 } 2941 for (i=0; i<m; i++) { 2942 PetscInt row = i + rstart; 2943 PetscInt ncols = ii[i+1] - ii[i]; 2944 const PetscInt *icols = jj + ii[i]; 2945 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2946 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2947 } 2948 2949 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2950 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2951 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2952 2953 PetscFunctionReturn(0); 2954 } 2955 EXTERN_C_END 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2959 /*@C 2960 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2961 (the default parallel PETSc format). 2962 2963 Collective on MPI_Comm 2964 2965 Input Parameters: 2966 + A - the matrix 2967 . bs - the block size 2968 . i - the indices into j for the start of each local row (starts with zero) 2969 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2970 - v - optional values in the matrix 2971 2972 Level: developer 2973 2974 .keywords: matrix, aij, compressed row, sparse, parallel 2975 2976 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2977 @*/ 2978 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2979 { 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 EXTERN_C_BEGIN 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2990 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2991 { 2992 Mat_MPIBAIJ *b; 2993 PetscErrorCode ierr; 2994 PetscInt i, newbs = PetscAbs(bs); 2995 2996 PetscFunctionBegin; 2997 if (bs < 0) { 2998 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2999 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3000 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3001 bs = PetscAbs(bs); 3002 } 3003 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3004 bs = newbs; 3005 3006 3007 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3008 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3009 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3010 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3011 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3012 3013 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3014 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3015 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3016 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3017 3018 if (d_nnz) { 3019 for (i=0; i<B->rmap->n/bs; i++) { 3020 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3021 } 3022 } 3023 if (o_nnz) { 3024 for (i=0; i<B->rmap->n/bs; i++) { 3025 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3026 } 3027 } 3028 3029 b = (Mat_MPIBAIJ*)B->data; 3030 b->bs2 = bs*bs; 3031 b->mbs = B->rmap->n/bs; 3032 b->nbs = B->cmap->n/bs; 3033 b->Mbs = B->rmap->N/bs; 3034 b->Nbs = B->cmap->N/bs; 3035 3036 for (i=0; i<=b->size; i++) { 3037 b->rangebs[i] = B->rmap->range[i]/bs; 3038 } 3039 b->rstartbs = B->rmap->rstart/bs; 3040 b->rendbs = B->rmap->rend/bs; 3041 b->cstartbs = B->cmap->rstart/bs; 3042 b->cendbs = B->cmap->rend/bs; 3043 3044 if (!B->preallocated) { 3045 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3046 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3047 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3048 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3049 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3050 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3051 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3052 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3053 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3054 } 3055 3056 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3057 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3058 B->preallocated = PETSC_TRUE; 3059 PetscFunctionReturn(0); 3060 } 3061 EXTERN_C_END 3062 3063 EXTERN_C_BEGIN 3064 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3065 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3066 EXTERN_C_END 3067 3068 3069 EXTERN_C_BEGIN 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3072 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3073 { 3074 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3075 PetscErrorCode ierr; 3076 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3077 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3078 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3079 3080 PetscFunctionBegin; 3081 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3082 ii[0] = 0; 3083 CHKMEMQ; 3084 for (i=0; i<M; i++) { 3085 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3086 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3087 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3088 /* remove one from count of matrix has diagonal */ 3089 for (j=id[i]; j<id[i+1]; j++) { 3090 if (jd[j] == i) {ii[i+1]--;break;} 3091 } 3092 CHKMEMQ; 3093 } 3094 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3095 cnt = 0; 3096 for (i=0; i<M; i++) { 3097 for (j=io[i]; j<io[i+1]; j++) { 3098 if (garray[jo[j]] > rstart) break; 3099 jj[cnt++] = garray[jo[j]]; 3100 CHKMEMQ; 3101 } 3102 for (k=id[i]; k<id[i+1]; k++) { 3103 if (jd[k] != i) { 3104 jj[cnt++] = rstart + jd[k]; 3105 CHKMEMQ; 3106 } 3107 } 3108 for (;j<io[i+1]; j++) { 3109 jj[cnt++] = garray[jo[j]]; 3110 CHKMEMQ; 3111 } 3112 } 3113 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3114 PetscFunctionReturn(0); 3115 } 3116 EXTERN_C_END 3117 3118 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3119 EXTERN_C_BEGIN 3120 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3121 EXTERN_C_END 3122 3123 EXTERN_C_BEGIN 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3126 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3127 { 3128 PetscErrorCode ierr; 3129 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3130 Mat B; 3131 Mat_MPIAIJ *b; 3132 3133 PetscFunctionBegin; 3134 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3135 3136 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3137 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3138 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3139 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3140 b = (Mat_MPIAIJ*) B->data; 3141 3142 ierr = MatDestroy(b->A);CHKERRQ(ierr); 3143 ierr = MatDestroy(b->B);CHKERRQ(ierr); 3144 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3145 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3146 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3147 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3148 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3149 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 if (reuse == MAT_REUSE_MATRIX) { 3152 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3153 } else { 3154 *newmat = B; 3155 } 3156 PetscFunctionReturn(0); 3157 } 3158 EXTERN_C_END 3159 3160 EXTERN_C_BEGIN 3161 #if defined(PETSC_HAVE_MUMPS) 3162 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3163 #endif 3164 EXTERN_C_END 3165 3166 /*MC 3167 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3168 3169 Options Database Keys: 3170 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3171 . -mat_block_size <bs> - set the blocksize used to store the matrix 3172 - -mat_use_hash_table <fact> 3173 3174 Level: beginner 3175 3176 .seealso: MatCreateMPIBAIJ 3177 M*/ 3178 3179 EXTERN_C_BEGIN 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "MatCreate_MPIBAIJ" 3182 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3183 { 3184 Mat_MPIBAIJ *b; 3185 PetscErrorCode ierr; 3186 PetscBool flg; 3187 3188 PetscFunctionBegin; 3189 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3190 B->data = (void*)b; 3191 3192 3193 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3194 B->assembled = PETSC_FALSE; 3195 3196 B->insertmode = NOT_SET_VALUES; 3197 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3198 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3199 3200 /* build local table of row and column ownerships */ 3201 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3202 3203 /* build cache for off array entries formed */ 3204 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3205 b->donotstash = PETSC_FALSE; 3206 b->colmap = PETSC_NULL; 3207 b->garray = PETSC_NULL; 3208 b->roworiented = PETSC_TRUE; 3209 3210 /* stuff used in block assembly */ 3211 b->barray = 0; 3212 3213 /* stuff used for matrix vector multiply */ 3214 b->lvec = 0; 3215 b->Mvctx = 0; 3216 3217 /* stuff for MatGetRow() */ 3218 b->rowindices = 0; 3219 b->rowvalues = 0; 3220 b->getrowactive = PETSC_FALSE; 3221 3222 /* hash table stuff */ 3223 b->ht = 0; 3224 b->hd = 0; 3225 b->ht_size = 0; 3226 b->ht_flag = PETSC_FALSE; 3227 b->ht_fact = 0; 3228 b->ht_total_ct = 0; 3229 b->ht_insert_ct = 0; 3230 3231 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3232 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3233 if (flg) { 3234 PetscReal fact = 1.39; 3235 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3236 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3237 if (fact <= 1.0) fact = 1.39; 3238 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3239 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3240 } 3241 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3242 3243 #if defined(PETSC_HAVE_MUMPS) 3244 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3245 #endif 3246 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3247 "MatConvert_MPIBAIJ_MPIAdj", 3248 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3250 "MatConvert_MPIBAIJ_MPIAIJ", 3251 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3252 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3253 "MatStoreValues_MPIBAIJ", 3254 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3256 "MatRetrieveValues_MPIBAIJ", 3257 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3259 "MatGetDiagonalBlock_MPIBAIJ", 3260 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3262 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3263 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3265 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3266 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3268 "MatDiagonalScaleLocal_MPIBAIJ", 3269 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3271 "MatSetHashTableFactor_MPIBAIJ", 3272 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3274 PetscFunctionReturn(0); 3275 } 3276 EXTERN_C_END 3277 3278 /*MC 3279 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3280 3281 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3282 and MATMPIBAIJ otherwise. 3283 3284 Options Database Keys: 3285 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3286 3287 Level: beginner 3288 3289 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3290 M*/ 3291 3292 EXTERN_C_BEGIN 3293 #undef __FUNCT__ 3294 #define __FUNCT__ "MatCreate_BAIJ" 3295 PetscErrorCode MatCreate_BAIJ(Mat A) 3296 { 3297 PetscErrorCode ierr; 3298 PetscMPIInt size; 3299 3300 PetscFunctionBegin; 3301 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3302 if (size == 1) { 3303 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3304 } else { 3305 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3306 } 3307 PetscFunctionReturn(0); 3308 } 3309 EXTERN_C_END 3310 3311 #undef __FUNCT__ 3312 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3313 /*@C 3314 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3315 (block compressed row). For good matrix assembly performance 3316 the user should preallocate the matrix storage by setting the parameters 3317 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3318 performance can be increased by more than a factor of 50. 3319 3320 Collective on Mat 3321 3322 Input Parameters: 3323 + A - the matrix 3324 . bs - size of blockk 3325 . d_nz - number of block nonzeros per block row in diagonal portion of local 3326 submatrix (same for all local rows) 3327 . d_nnz - array containing the number of block nonzeros in the various block rows 3328 of the in diagonal portion of the local (possibly different for each block 3329 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3330 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3331 submatrix (same for all local rows). 3332 - o_nnz - array containing the number of nonzeros in the various block rows of the 3333 off-diagonal portion of the local submatrix (possibly different for 3334 each block row) or PETSC_NULL. 3335 3336 If the *_nnz parameter is given then the *_nz parameter is ignored 3337 3338 Options Database Keys: 3339 + -mat_block_size - size of the blocks to use 3340 - -mat_use_hash_table <fact> 3341 3342 Notes: 3343 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3344 than it must be used on all processors that share the object for that argument. 3345 3346 Storage Information: 3347 For a square global matrix we define each processor's diagonal portion 3348 to be its local rows and the corresponding columns (a square submatrix); 3349 each processor's off-diagonal portion encompasses the remainder of the 3350 local matrix (a rectangular submatrix). 3351 3352 The user can specify preallocated storage for the diagonal part of 3353 the local submatrix with either d_nz or d_nnz (not both). Set 3354 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3355 memory allocation. Likewise, specify preallocated storage for the 3356 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3357 3358 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3359 the figure below we depict these three local rows and all columns (0-11). 3360 3361 .vb 3362 0 1 2 3 4 5 6 7 8 9 10 11 3363 ------------------- 3364 row 3 | o o o d d d o o o o o o 3365 row 4 | o o o d d d o o o o o o 3366 row 5 | o o o d d d o o o o o o 3367 ------------------- 3368 .ve 3369 3370 Thus, any entries in the d locations are stored in the d (diagonal) 3371 submatrix, and any entries in the o locations are stored in the 3372 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3373 stored simply in the MATSEQBAIJ format for compressed row storage. 3374 3375 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3376 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3377 In general, for PDE problems in which most nonzeros are near the diagonal, 3378 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3379 or you will get TERRIBLE performance; see the users' manual chapter on 3380 matrices. 3381 3382 You can call MatGetInfo() to get information on how effective the preallocation was; 3383 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3384 You can also run with the option -info and look for messages with the string 3385 malloc in them to see if additional memory allocation was needed. 3386 3387 Level: intermediate 3388 3389 .keywords: matrix, block, aij, compressed row, sparse, parallel 3390 3391 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3392 @*/ 3393 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3394 { 3395 PetscErrorCode ierr; 3396 3397 PetscFunctionBegin; 3398 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } 3401 3402 #undef __FUNCT__ 3403 #define __FUNCT__ "MatCreateMPIBAIJ" 3404 /*@C 3405 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3406 (block compressed row). For good matrix assembly performance 3407 the user should preallocate the matrix storage by setting the parameters 3408 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3409 performance can be increased by more than a factor of 50. 3410 3411 Collective on MPI_Comm 3412 3413 Input Parameters: 3414 + comm - MPI communicator 3415 . bs - size of blockk 3416 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3417 This value should be the same as the local size used in creating the 3418 y vector for the matrix-vector product y = Ax. 3419 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3420 This value should be the same as the local size used in creating the 3421 x vector for the matrix-vector product y = Ax. 3422 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3423 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3424 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3425 submatrix (same for all local rows) 3426 . d_nnz - array containing the number of nonzero blocks in the various block rows 3427 of the in diagonal portion of the local (possibly different for each block 3428 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3429 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3430 submatrix (same for all local rows). 3431 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3432 off-diagonal portion of the local submatrix (possibly different for 3433 each block row) or PETSC_NULL. 3434 3435 Output Parameter: 3436 . A - the matrix 3437 3438 Options Database Keys: 3439 + -mat_block_size - size of the blocks to use 3440 - -mat_use_hash_table <fact> 3441 3442 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3443 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3444 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3445 3446 Notes: 3447 If the *_nnz parameter is given then the *_nz parameter is ignored 3448 3449 A nonzero block is any block that as 1 or more nonzeros in it 3450 3451 The user MUST specify either the local or global matrix dimensions 3452 (possibly both). 3453 3454 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3455 than it must be used on all processors that share the object for that argument. 3456 3457 Storage Information: 3458 For a square global matrix we define each processor's diagonal portion 3459 to be its local rows and the corresponding columns (a square submatrix); 3460 each processor's off-diagonal portion encompasses the remainder of the 3461 local matrix (a rectangular submatrix). 3462 3463 The user can specify preallocated storage for the diagonal part of 3464 the local submatrix with either d_nz or d_nnz (not both). Set 3465 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3466 memory allocation. Likewise, specify preallocated storage for the 3467 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3468 3469 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3470 the figure below we depict these three local rows and all columns (0-11). 3471 3472 .vb 3473 0 1 2 3 4 5 6 7 8 9 10 11 3474 ------------------- 3475 row 3 | o o o d d d o o o o o o 3476 row 4 | o o o d d d o o o o o o 3477 row 5 | o o o d d d o o o o o o 3478 ------------------- 3479 .ve 3480 3481 Thus, any entries in the d locations are stored in the d (diagonal) 3482 submatrix, and any entries in the o locations are stored in the 3483 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3484 stored simply in the MATSEQBAIJ format for compressed row storage. 3485 3486 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3487 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3488 In general, for PDE problems in which most nonzeros are near the diagonal, 3489 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3490 or you will get TERRIBLE performance; see the users' manual chapter on 3491 matrices. 3492 3493 Level: intermediate 3494 3495 .keywords: matrix, block, aij, compressed row, sparse, parallel 3496 3497 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3498 @*/ 3499 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3500 { 3501 PetscErrorCode ierr; 3502 PetscMPIInt size; 3503 3504 PetscFunctionBegin; 3505 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3506 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3507 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3508 if (size > 1) { 3509 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3510 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3511 } else { 3512 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3513 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3514 } 3515 PetscFunctionReturn(0); 3516 } 3517 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3520 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3521 { 3522 Mat mat; 3523 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3524 PetscErrorCode ierr; 3525 PetscInt len=0; 3526 3527 PetscFunctionBegin; 3528 *newmat = 0; 3529 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3530 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3531 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3532 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3533 3534 mat->factortype = matin->factortype; 3535 mat->preallocated = PETSC_TRUE; 3536 mat->assembled = PETSC_TRUE; 3537 mat->insertmode = NOT_SET_VALUES; 3538 3539 a = (Mat_MPIBAIJ*)mat->data; 3540 mat->rmap->bs = matin->rmap->bs; 3541 a->bs2 = oldmat->bs2; 3542 a->mbs = oldmat->mbs; 3543 a->nbs = oldmat->nbs; 3544 a->Mbs = oldmat->Mbs; 3545 a->Nbs = oldmat->Nbs; 3546 3547 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3548 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3549 3550 a->size = oldmat->size; 3551 a->rank = oldmat->rank; 3552 a->donotstash = oldmat->donotstash; 3553 a->roworiented = oldmat->roworiented; 3554 a->rowindices = 0; 3555 a->rowvalues = 0; 3556 a->getrowactive = PETSC_FALSE; 3557 a->barray = 0; 3558 a->rstartbs = oldmat->rstartbs; 3559 a->rendbs = oldmat->rendbs; 3560 a->cstartbs = oldmat->cstartbs; 3561 a->cendbs = oldmat->cendbs; 3562 3563 /* hash table stuff */ 3564 a->ht = 0; 3565 a->hd = 0; 3566 a->ht_size = 0; 3567 a->ht_flag = oldmat->ht_flag; 3568 a->ht_fact = oldmat->ht_fact; 3569 a->ht_total_ct = 0; 3570 a->ht_insert_ct = 0; 3571 3572 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3573 if (oldmat->colmap) { 3574 #if defined (PETSC_USE_CTABLE) 3575 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3576 #else 3577 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3578 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3579 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3580 #endif 3581 } else a->colmap = 0; 3582 3583 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3584 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3585 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3586 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3587 } else a->garray = 0; 3588 3589 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3590 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3591 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3592 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3593 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3594 3595 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3596 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3597 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3598 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3599 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3600 *newmat = mat; 3601 3602 PetscFunctionReturn(0); 3603 } 3604 3605 #undef __FUNCT__ 3606 #define __FUNCT__ "MatLoad_MPIBAIJ" 3607 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3608 { 3609 PetscErrorCode ierr; 3610 int fd; 3611 PetscInt i,nz,j,rstart,rend; 3612 PetscScalar *vals,*buf; 3613 MPI_Comm comm = ((PetscObject)viewer)->comm; 3614 MPI_Status status; 3615 PetscMPIInt rank,size,maxnz; 3616 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3617 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3618 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3619 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3620 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3621 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3622 3623 PetscFunctionBegin; 3624 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3625 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3626 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3627 3628 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3629 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3630 if (!rank) { 3631 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3632 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3633 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3634 } 3635 3636 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3637 3638 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3639 M = header[1]; N = header[2]; 3640 3641 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3642 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3643 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3644 3645 /* If global sizes are set, check if they are consistent with that given in the file */ 3646 if (sizesset) { 3647 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3648 } 3649 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3650 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3651 3652 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3653 3654 /* 3655 This code adds extra rows to make sure the number of rows is 3656 divisible by the blocksize 3657 */ 3658 Mbs = M/bs; 3659 extra_rows = bs - M + bs*Mbs; 3660 if (extra_rows == bs) extra_rows = 0; 3661 else Mbs++; 3662 if (extra_rows && !rank) { 3663 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3664 } 3665 3666 /* determine ownership of all rows */ 3667 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3668 mbs = Mbs/size + ((Mbs % size) > rank); 3669 m = mbs*bs; 3670 } else { /* User set */ 3671 m = newmat->rmap->n; 3672 mbs = m/bs; 3673 } 3674 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3675 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3676 3677 /* process 0 needs enough room for process with most rows */ 3678 if (!rank) { 3679 mmax = rowners[1]; 3680 for (i=2; i<size; i++) { 3681 mmax = PetscMax(mmax,rowners[i]); 3682 } 3683 mmax*=bs; 3684 } else mmax = m; 3685 3686 rowners[0] = 0; 3687 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3688 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3689 rstart = rowners[rank]; 3690 rend = rowners[rank+1]; 3691 3692 /* distribute row lengths to all processors */ 3693 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3694 if (!rank) { 3695 mend = m; 3696 if (size == 1) mend = mend - extra_rows; 3697 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3698 for (j=mend; j<m; j++) locrowlens[j] = 1; 3699 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3700 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3701 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3702 for (j=0; j<m; j++) { 3703 procsnz[0] += locrowlens[j]; 3704 } 3705 for (i=1; i<size; i++) { 3706 mend = browners[i+1] - browners[i]; 3707 if (i == size-1) mend = mend - extra_rows; 3708 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3709 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3710 /* calculate the number of nonzeros on each processor */ 3711 for (j=0; j<browners[i+1]-browners[i]; j++) { 3712 procsnz[i] += rowlengths[j]; 3713 } 3714 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3715 } 3716 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3717 } else { 3718 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3719 } 3720 3721 if (!rank) { 3722 /* determine max buffer needed and allocate it */ 3723 maxnz = procsnz[0]; 3724 for (i=1; i<size; i++) { 3725 maxnz = PetscMax(maxnz,procsnz[i]); 3726 } 3727 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3728 3729 /* read in my part of the matrix column indices */ 3730 nz = procsnz[0]; 3731 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3732 mycols = ibuf; 3733 if (size == 1) nz -= extra_rows; 3734 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3735 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3736 3737 /* read in every ones (except the last) and ship off */ 3738 for (i=1; i<size-1; i++) { 3739 nz = procsnz[i]; 3740 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3741 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3742 } 3743 /* read in the stuff for the last proc */ 3744 if (size != 1) { 3745 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3746 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3747 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3748 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3749 } 3750 ierr = PetscFree(cols);CHKERRQ(ierr); 3751 } else { 3752 /* determine buffer space needed for message */ 3753 nz = 0; 3754 for (i=0; i<m; i++) { 3755 nz += locrowlens[i]; 3756 } 3757 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3758 mycols = ibuf; 3759 /* receive message of column indices*/ 3760 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3761 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3762 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3763 } 3764 3765 /* loop over local rows, determining number of off diagonal entries */ 3766 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3767 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3768 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3769 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3770 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3771 rowcount = 0; nzcount = 0; 3772 for (i=0; i<mbs; i++) { 3773 dcount = 0; 3774 odcount = 0; 3775 for (j=0; j<bs; j++) { 3776 kmax = locrowlens[rowcount]; 3777 for (k=0; k<kmax; k++) { 3778 tmp = mycols[nzcount++]/bs; 3779 if (!mask[tmp]) { 3780 mask[tmp] = 1; 3781 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3782 else masked1[dcount++] = tmp; 3783 } 3784 } 3785 rowcount++; 3786 } 3787 3788 dlens[i] = dcount; 3789 odlens[i] = odcount; 3790 3791 /* zero out the mask elements we set */ 3792 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3793 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3794 } 3795 3796 3797 if (!sizesset) { 3798 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3799 } 3800 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3801 3802 if (!rank) { 3803 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3804 /* read in my part of the matrix numerical values */ 3805 nz = procsnz[0]; 3806 vals = buf; 3807 mycols = ibuf; 3808 if (size == 1) nz -= extra_rows; 3809 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3810 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3811 3812 /* insert into matrix */ 3813 jj = rstart*bs; 3814 for (i=0; i<m; i++) { 3815 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3816 mycols += locrowlens[i]; 3817 vals += locrowlens[i]; 3818 jj++; 3819 } 3820 /* read in other processors (except the last one) and ship out */ 3821 for (i=1; i<size-1; i++) { 3822 nz = procsnz[i]; 3823 vals = buf; 3824 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3825 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3826 } 3827 /* the last proc */ 3828 if (size != 1){ 3829 nz = procsnz[i] - extra_rows; 3830 vals = buf; 3831 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3832 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3833 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3834 } 3835 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3836 } else { 3837 /* receive numeric values */ 3838 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3839 3840 /* receive message of values*/ 3841 vals = buf; 3842 mycols = ibuf; 3843 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3844 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3845 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3846 3847 /* insert into matrix */ 3848 jj = rstart*bs; 3849 for (i=0; i<m; i++) { 3850 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3851 mycols += locrowlens[i]; 3852 vals += locrowlens[i]; 3853 jj++; 3854 } 3855 } 3856 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3857 ierr = PetscFree(buf);CHKERRQ(ierr); 3858 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3859 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3860 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3861 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3862 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 3865 PetscFunctionReturn(0); 3866 } 3867 3868 #undef __FUNCT__ 3869 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3870 /*@ 3871 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3872 3873 Input Parameters: 3874 . mat - the matrix 3875 . fact - factor 3876 3877 Not Collective, each process can use a different factor 3878 3879 Level: advanced 3880 3881 Notes: 3882 This can also be set by the command line option: -mat_use_hash_table <fact> 3883 3884 .keywords: matrix, hashtable, factor, HT 3885 3886 .seealso: MatSetOption() 3887 @*/ 3888 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3889 { 3890 PetscErrorCode ierr; 3891 3892 PetscFunctionBegin; 3893 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896 3897 EXTERN_C_BEGIN 3898 #undef __FUNCT__ 3899 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3900 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3901 { 3902 Mat_MPIBAIJ *baij; 3903 3904 PetscFunctionBegin; 3905 baij = (Mat_MPIBAIJ*)mat->data; 3906 baij->ht_fact = fact; 3907 PetscFunctionReturn(0); 3908 } 3909 EXTERN_C_END 3910 3911 #undef __FUNCT__ 3912 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3913 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3914 { 3915 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3916 PetscFunctionBegin; 3917 *Ad = a->A; 3918 *Ao = a->B; 3919 *colmap = a->garray; 3920 PetscFunctionReturn(0); 3921 } 3922 3923 /* 3924 Special version for direct calls from Fortran (to eliminate two function call overheads 3925 */ 3926 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3927 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3928 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3929 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3930 #endif 3931 3932 #undef __FUNCT__ 3933 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3934 /*@C 3935 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3936 3937 Collective on Mat 3938 3939 Input Parameters: 3940 + mat - the matrix 3941 . min - number of input rows 3942 . im - input rows 3943 . nin - number of input columns 3944 . in - input columns 3945 . v - numerical values input 3946 - addvin - INSERT_VALUES or ADD_VALUES 3947 3948 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3949 3950 Level: advanced 3951 3952 .seealso: MatSetValuesBlocked() 3953 @*/ 3954 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3955 { 3956 /* convert input arguments to C version */ 3957 Mat mat = *matin; 3958 PetscInt m = *min, n = *nin; 3959 InsertMode addv = *addvin; 3960 3961 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3962 const MatScalar *value; 3963 MatScalar *barray=baij->barray; 3964 PetscBool roworiented = baij->roworiented; 3965 PetscErrorCode ierr; 3966 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3967 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3968 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3969 3970 PetscFunctionBegin; 3971 /* tasks normally handled by MatSetValuesBlocked() */ 3972 if (mat->insertmode == NOT_SET_VALUES) { 3973 mat->insertmode = addv; 3974 } 3975 #if defined(PETSC_USE_DEBUG) 3976 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3977 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3978 #endif 3979 if (mat->assembled) { 3980 mat->was_assembled = PETSC_TRUE; 3981 mat->assembled = PETSC_FALSE; 3982 } 3983 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3984 3985 3986 if(!barray) { 3987 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3988 baij->barray = barray; 3989 } 3990 3991 if (roworiented) { 3992 stepval = (n-1)*bs; 3993 } else { 3994 stepval = (m-1)*bs; 3995 } 3996 for (i=0; i<m; i++) { 3997 if (im[i] < 0) continue; 3998 #if defined(PETSC_USE_DEBUG) 3999 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4000 #endif 4001 if (im[i] >= rstart && im[i] < rend) { 4002 row = im[i] - rstart; 4003 for (j=0; j<n; j++) { 4004 /* If NumCol = 1 then a copy is not required */ 4005 if ((roworiented) && (n == 1)) { 4006 barray = (MatScalar*)v + i*bs2; 4007 } else if((!roworiented) && (m == 1)) { 4008 barray = (MatScalar*)v + j*bs2; 4009 } else { /* Here a copy is required */ 4010 if (roworiented) { 4011 value = v + i*(stepval+bs)*bs + j*bs; 4012 } else { 4013 value = v + j*(stepval+bs)*bs + i*bs; 4014 } 4015 for (ii=0; ii<bs; ii++,value+=stepval) { 4016 for (jj=0; jj<bs; jj++) { 4017 *barray++ = *value++; 4018 } 4019 } 4020 barray -=bs2; 4021 } 4022 4023 if (in[j] >= cstart && in[j] < cend){ 4024 col = in[j] - cstart; 4025 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4026 } 4027 else if (in[j] < 0) continue; 4028 #if defined(PETSC_USE_DEBUG) 4029 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4030 #endif 4031 else { 4032 if (mat->was_assembled) { 4033 if (!baij->colmap) { 4034 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4035 } 4036 4037 #if defined(PETSC_USE_DEBUG) 4038 #if defined (PETSC_USE_CTABLE) 4039 { PetscInt data; 4040 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4041 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4042 } 4043 #else 4044 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4045 #endif 4046 #endif 4047 #if defined (PETSC_USE_CTABLE) 4048 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4049 col = (col - 1)/bs; 4050 #else 4051 col = (baij->colmap[in[j]] - 1)/bs; 4052 #endif 4053 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4054 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4055 col = in[j]; 4056 } 4057 } 4058 else col = in[j]; 4059 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4060 } 4061 } 4062 } else { 4063 if (!baij->donotstash) { 4064 if (roworiented) { 4065 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4066 } else { 4067 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4068 } 4069 } 4070 } 4071 } 4072 4073 /* task normally handled by MatSetValuesBlocked() */ 4074 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4075 PetscFunctionReturn(0); 4076 } 4077 4078 #undef __FUNCT__ 4079 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4080 /*@ 4081 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4082 CSR format the local rows. 4083 4084 Collective on MPI_Comm 4085 4086 Input Parameters: 4087 + comm - MPI communicator 4088 . bs - the block size, only a block size of 1 is supported 4089 . m - number of local rows (Cannot be PETSC_DECIDE) 4090 . n - This value should be the same as the local size used in creating the 4091 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4092 calculated if N is given) For square matrices n is almost always m. 4093 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4094 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4095 . i - row indices 4096 . j - column indices 4097 - a - matrix values 4098 4099 Output Parameter: 4100 . mat - the matrix 4101 4102 Level: intermediate 4103 4104 Notes: 4105 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4106 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4107 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4108 4109 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4110 4111 .keywords: matrix, aij, compressed row, sparse, parallel 4112 4113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4114 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4115 @*/ 4116 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4117 { 4118 PetscErrorCode ierr; 4119 4120 4121 PetscFunctionBegin; 4122 if (i[0]) { 4123 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4124 } 4125 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4126 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4127 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4128 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4129 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4130 PetscFunctionReturn(0); 4131 } 4132