1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = PetscSqrtReal(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(&A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 PetscInt message_count,flowcontrolcount; 1085 1086 PetscFunctionBegin; 1087 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1088 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1089 nz = bs2*(A->nz + B->nz); 1090 rlen = mat->rmap->n; 1091 if (!rank) { 1092 header[0] = MAT_FILE_CLASSID; 1093 header[1] = mat->rmap->N; 1094 header[2] = mat->cmap->N; 1095 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1097 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1098 /* get largest number of rows any processor has */ 1099 range = mat->rmap->range; 1100 for (i=1; i<size; i++) { 1101 rlen = PetscMax(rlen,range[i+1] - range[i]); 1102 } 1103 } else { 1104 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1105 } 1106 1107 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1108 /* compute lengths of each row */ 1109 for (i=0; i<a->mbs; i++) { 1110 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1111 } 1112 /* store the row lengths to the file */ 1113 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1114 if (!rank) { 1115 MPI_Status status; 1116 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1117 rlen = (range[1] - range[0])/bs; 1118 for (i=0; i<rlen; i++) { 1119 for (j=0; j<bs; j++) { 1120 row_lens[i*bs+j] = bs*crow_lens[i]; 1121 } 1122 } 1123 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1124 for (i=1; i<size; i++) { 1125 rlen = (range[i+1] - range[i])/bs; 1126 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1127 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1128 for (k=0; k<rlen; k++) { 1129 for (j=0; j<bs; j++) { 1130 row_lens[k*bs+j] = bs*crow_lens[k]; 1131 } 1132 } 1133 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1136 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1139 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1140 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1141 } 1142 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1143 1144 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1145 information needed to make it for each row from a block row. This does require more communication but still not more than 1146 the communication needed for the nonzero values */ 1147 nzmax = nz; /* space a largest processor needs */ 1148 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1149 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1150 cnt = 0; 1151 for (i=0; i<a->mbs; i++) { 1152 pcnt = cnt; 1153 for (j=B->i[i]; j<B->i[i+1]; j++) { 1154 if ( (col = garray[B->j[j]]) > cstart) break; 1155 for (l=0; l<bs; l++) { 1156 column_indices[cnt++] = bs*col+l; 1157 } 1158 } 1159 for (k=A->i[i]; k<A->i[i+1]; k++) { 1160 for (l=0; l<bs; l++) { 1161 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1162 } 1163 } 1164 for (; j<B->i[i+1]; j++) { 1165 for (l=0; l<bs; l++) { 1166 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1167 } 1168 } 1169 len = cnt - pcnt; 1170 for (k=1; k<bs; k++) { 1171 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1172 cnt += len; 1173 } 1174 } 1175 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1176 1177 /* store the columns to the file */ 1178 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1179 if (!rank) { 1180 MPI_Status status; 1181 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1182 for (i=1; i<size; i++) { 1183 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1184 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1186 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1187 } 1188 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1189 } else { 1190 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1191 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1193 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1194 } 1195 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1196 1197 /* load up the numerical values */ 1198 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<a->mbs; i++) { 1201 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1202 for (j=B->i[i]; j<B->i[i+1]; j++) { 1203 if ( garray[B->j[j]] > cstart) break; 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 for (k=A->i[i]; k<A->i[i+1]; k++) { 1212 for (l=0; l<bs; l++) { 1213 for (ll=0; ll<bs; ll++) { 1214 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1215 } 1216 } 1217 cnt += bs; 1218 } 1219 for (; j<B->i[i+1]; j++) { 1220 for (l=0; l<bs; l++) { 1221 for (ll=0; ll<bs; ll++) { 1222 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1223 } 1224 } 1225 cnt += bs; 1226 } 1227 cnt += (bs-1)*rlen; 1228 } 1229 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1230 1231 /* store the column values to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 MPI_Status status; 1235 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1236 for (i=1; i<size; i++) { 1237 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1238 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1240 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1241 } 1242 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1243 } else { 1244 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1245 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1247 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1248 } 1249 ierr = PetscFree(column_values);CHKERRQ(ierr); 1250 1251 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1252 if (file) { 1253 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatView_MPIBAIJ" 1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1261 { 1262 PetscErrorCode ierr; 1263 PetscBool iascii,isdraw,issocket,isbinary; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1269 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1270 if (iascii || isdraw || issocket) { 1271 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1272 } else if (isbinary) { 1273 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1274 } else { 1275 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1283 { 1284 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 #if defined(PETSC_USE_LOG) 1289 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1290 #endif 1291 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1292 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1294 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1295 #if defined (PETSC_USE_CTABLE) 1296 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1297 #else 1298 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1299 #endif 1300 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1301 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1302 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1303 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1304 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1305 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1306 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1307 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1308 1309 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatMult_MPIBAIJ" 1324 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1325 { 1326 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 PetscInt nt; 1329 1330 PetscFunctionBegin; 1331 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1332 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1333 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1334 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1335 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1344 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1345 { 1346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1352 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1353 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1359 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1360 { 1361 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1362 PetscErrorCode ierr; 1363 PetscBool merged; 1364 1365 PetscFunctionBegin; 1366 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1367 /* do nondiagonal part */ 1368 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1369 if (!merged) { 1370 /* send it on its way */ 1371 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1372 /* do local part */ 1373 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1374 /* receive remote parts: note this assumes the values are not actually */ 1375 /* inserted in yy until the next line */ 1376 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377 } else { 1378 /* do local part */ 1379 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1380 /* send it on its way */ 1381 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 /* values actually were received in the Begin() but we need to call this nop */ 1383 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1390 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1391 { 1392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 /* do nondiagonal part */ 1397 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1398 /* send it on its way */ 1399 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1400 /* do local part */ 1401 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1402 /* receive remote parts: note this assumes the values are not actually */ 1403 /* inserted in yy until the next line, which is true for my implementation*/ 1404 /* but is not perhaps always true. */ 1405 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 /* 1410 This only works correctly for square matrices where the subblock A->A is the 1411 diagonal block 1412 */ 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1415 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1422 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatScale_MPIBAIJ" 1428 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1429 { 1430 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1431 PetscErrorCode ierr; 1432 1433 PetscFunctionBegin; 1434 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1435 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1441 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1442 { 1443 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1444 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1445 PetscErrorCode ierr; 1446 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1447 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1448 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1449 1450 PetscFunctionBegin; 1451 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1452 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1453 mat->getrowactive = PETSC_TRUE; 1454 1455 if (!mat->rowvalues && (idx || v)) { 1456 /* 1457 allocate enough space to hold information from the longest row. 1458 */ 1459 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1460 PetscInt max = 1,mbs = mat->mbs,tmp; 1461 for (i=0; i<mbs; i++) { 1462 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1463 if (max < tmp) { max = tmp; } 1464 } 1465 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1466 } 1467 lrow = row - brstart; 1468 1469 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1470 if (!v) {pvA = 0; pvB = 0;} 1471 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1472 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1473 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1474 nztot = nzA + nzB; 1475 1476 cmap = mat->garray; 1477 if (v || idx) { 1478 if (nztot) { 1479 /* Sort by increasing column numbers, assuming A and B already sorted */ 1480 PetscInt imark = -1; 1481 if (v) { 1482 *v = v_p = mat->rowvalues; 1483 for (i=0; i<nzB; i++) { 1484 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1485 else break; 1486 } 1487 imark = i; 1488 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1489 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1490 } 1491 if (idx) { 1492 *idx = idx_p = mat->rowindices; 1493 if (imark > -1) { 1494 for (i=0; i<imark; i++) { 1495 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1496 } 1497 } else { 1498 for (i=0; i<nzB; i++) { 1499 if (cmap[cworkB[i]/bs] < cstart) 1500 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1501 else break; 1502 } 1503 imark = i; 1504 } 1505 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1506 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1507 } 1508 } else { 1509 if (idx) *idx = 0; 1510 if (v) *v = 0; 1511 } 1512 } 1513 *nz = nztot; 1514 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1515 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1522 { 1523 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1524 1525 PetscFunctionBegin; 1526 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1527 baij->getrowactive = PETSC_FALSE; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1533 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1534 { 1535 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1540 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1546 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1547 { 1548 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1549 Mat A = a->A,B = a->B; 1550 PetscErrorCode ierr; 1551 PetscReal isend[5],irecv[5]; 1552 1553 PetscFunctionBegin; 1554 info->block_size = (PetscReal)matin->rmap->bs; 1555 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1556 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1557 isend[3] = info->memory; isend[4] = info->mallocs; 1558 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1559 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1560 isend[3] += info->memory; isend[4] += info->mallocs; 1561 if (flag == MAT_LOCAL) { 1562 info->nz_used = isend[0]; 1563 info->nz_allocated = isend[1]; 1564 info->nz_unneeded = isend[2]; 1565 info->memory = isend[3]; 1566 info->mallocs = isend[4]; 1567 } else if (flag == MAT_GLOBAL_MAX) { 1568 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1569 info->nz_used = irecv[0]; 1570 info->nz_allocated = irecv[1]; 1571 info->nz_unneeded = irecv[2]; 1572 info->memory = irecv[3]; 1573 info->mallocs = irecv[4]; 1574 } else if (flag == MAT_GLOBAL_SUM) { 1575 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1576 info->nz_used = irecv[0]; 1577 info->nz_allocated = irecv[1]; 1578 info->nz_unneeded = irecv[2]; 1579 info->memory = irecv[3]; 1580 info->mallocs = irecv[4]; 1581 } else { 1582 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1583 } 1584 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1585 info->fill_ratio_needed = 0; 1586 info->factor_mallocs = 0; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1592 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1593 { 1594 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 switch (op) { 1599 case MAT_NEW_NONZERO_LOCATIONS: 1600 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1601 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1602 case MAT_KEEP_NONZERO_PATTERN: 1603 case MAT_NEW_NONZERO_LOCATION_ERR: 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_ROW_ORIENTED: 1608 a->roworiented = flg; 1609 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1610 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1611 break; 1612 case MAT_NEW_DIAGONALS: 1613 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1614 break; 1615 case MAT_IGNORE_OFF_PROC_ENTRIES: 1616 a->donotstash = flg; 1617 break; 1618 case MAT_USE_HASH_TABLE: 1619 a->ht_flag = flg; 1620 break; 1621 case MAT_SYMMETRIC: 1622 case MAT_STRUCTURALLY_SYMMETRIC: 1623 case MAT_HERMITIAN: 1624 case MAT_SYMMETRY_ETERNAL: 1625 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1626 break; 1627 default: 1628 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1635 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1636 { 1637 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1638 Mat_SeqBAIJ *Aloc; 1639 Mat B; 1640 PetscErrorCode ierr; 1641 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1642 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1643 MatScalar *a; 1644 1645 PetscFunctionBegin; 1646 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1647 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1648 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1649 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1650 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1651 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1652 } else { 1653 B = *matout; 1654 } 1655 1656 /* copy over the A part */ 1657 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1658 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1659 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1660 1661 for (i=0; i<mbs; i++) { 1662 rvals[0] = bs*(baij->rstartbs + i); 1663 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1664 for (j=ai[i]; j<ai[i+1]; j++) { 1665 col = (baij->cstartbs+aj[j])*bs; 1666 for (k=0; k<bs; k++) { 1667 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1668 col++; a += bs; 1669 } 1670 } 1671 } 1672 /* copy over the B part */ 1673 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1675 for (i=0; i<mbs; i++) { 1676 rvals[0] = bs*(baij->rstartbs + i); 1677 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1678 for (j=ai[i]; j<ai[i+1]; j++) { 1679 col = baij->garray[aj[j]]*bs; 1680 for (k=0; k<bs; k++) { 1681 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1682 col++; a += bs; 1683 } 1684 } 1685 } 1686 ierr = PetscFree(rvals);CHKERRQ(ierr); 1687 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 1690 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1691 *matout = B; 1692 } else { 1693 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1694 } 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1700 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1701 { 1702 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1703 Mat a = baij->A,b = baij->B; 1704 PetscErrorCode ierr; 1705 PetscInt s1,s2,s3; 1706 1707 PetscFunctionBegin; 1708 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1709 if (rr) { 1710 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1711 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1712 /* Overlap communication with computation. */ 1713 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1714 } 1715 if (ll) { 1716 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1717 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1718 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1719 } 1720 /* scale the diagonal block */ 1721 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1722 1723 if (rr) { 1724 /* Do a scatter end and then right scale the off-diagonal block */ 1725 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1727 } 1728 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1735 { 1736 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1737 PetscErrorCode ierr; 1738 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1739 PetscInt i,*owners = A->rmap->range; 1740 PetscInt *nprocs,j,idx,nsends,row; 1741 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1742 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1743 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1744 MPI_Comm comm = ((PetscObject)A)->comm; 1745 MPI_Request *send_waits,*recv_waits; 1746 MPI_Status recv_status,*send_status; 1747 const PetscScalar *xx; 1748 PetscScalar *bb; 1749 #if defined(PETSC_DEBUG) 1750 PetscBool found = PETSC_FALSE; 1751 #endif 1752 1753 PetscFunctionBegin; 1754 /* first count number of contributors to each processor */ 1755 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1756 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1757 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1758 j = 0; 1759 for (i=0; i<N; i++) { 1760 if (lastidx > (idx = rows[i])) j = 0; 1761 lastidx = idx; 1762 for (; j<size; j++) { 1763 if (idx >= owners[j] && idx < owners[j+1]) { 1764 nprocs[2*j]++; 1765 nprocs[2*j+1] = 1; 1766 owner[i] = j; 1767 #if defined(PETSC_DEBUG) 1768 found = PETSC_TRUE; 1769 #endif 1770 break; 1771 } 1772 } 1773 #if defined(PETSC_DEBUG) 1774 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1775 found = PETSC_FALSE; 1776 #endif 1777 } 1778 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1779 1780 if (A->nooffproczerorows) { 1781 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1782 nrecvs = nsends; 1783 nmax = N; 1784 } else { 1785 /* inform other processors of number of messages and max length*/ 1786 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1787 } 1788 1789 /* post receives: */ 1790 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1791 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1792 for (i=0; i<nrecvs; i++) { 1793 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1794 } 1795 1796 /* do sends: 1797 1) starts[i] gives the starting index in svalues for stuff going to 1798 the ith processor 1799 */ 1800 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1801 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1802 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1803 starts[0] = 0; 1804 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1805 for (i=0; i<N; i++) { 1806 svalues[starts[owner[i]]++] = rows[i]; 1807 } 1808 1809 starts[0] = 0; 1810 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1811 count = 0; 1812 for (i=0; i<size; i++) { 1813 if (nprocs[2*i+1]) { 1814 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1815 } 1816 } 1817 ierr = PetscFree(starts);CHKERRQ(ierr); 1818 1819 base = owners[rank]; 1820 1821 /* wait on receives */ 1822 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1823 count = nrecvs; 1824 slen = 0; 1825 while (count) { 1826 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1827 /* unpack receives into our local space */ 1828 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1829 source[imdex] = recv_status.MPI_SOURCE; 1830 lens[imdex] = n; 1831 slen += n; 1832 count--; 1833 } 1834 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1835 1836 /* move the data into the send scatter */ 1837 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1838 count = 0; 1839 for (i=0; i<nrecvs; i++) { 1840 values = rvalues + i*nmax; 1841 for (j=0; j<lens[i]; j++) { 1842 lrows[count++] = values[j] - base; 1843 } 1844 } 1845 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1846 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1847 ierr = PetscFree(owner);CHKERRQ(ierr); 1848 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1849 1850 /* fix right hand side if needed */ 1851 if (x && b) { 1852 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1853 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1854 for (i=0; i<slen; i++) { 1855 bb[lrows[i]] = diag*xx[lrows[i]]; 1856 } 1857 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1858 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1859 } 1860 1861 /* actually zap the local rows */ 1862 /* 1863 Zero the required rows. If the "diagonal block" of the matrix 1864 is square and the user wishes to set the diagonal we use separate 1865 code so that MatSetValues() is not called for each diagonal allocating 1866 new memory, thus calling lots of mallocs and slowing things down. 1867 1868 */ 1869 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1870 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1871 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1872 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1873 } else if (diag != 0.0) { 1874 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1875 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1877 for (i=0; i<slen; i++) { 1878 row = lrows[i] + rstart_bs; 1879 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1880 } 1881 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1883 } else { 1884 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1885 } 1886 1887 ierr = PetscFree(lrows);CHKERRQ(ierr); 1888 1889 /* wait on sends */ 1890 if (nsends) { 1891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1893 ierr = PetscFree(send_status);CHKERRQ(ierr); 1894 } 1895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1896 ierr = PetscFree(svalues);CHKERRQ(ierr); 1897 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1903 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1904 { 1905 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatEqual_MPIBAIJ" 1917 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1918 { 1919 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1920 Mat a,b,c,d; 1921 PetscBool flg; 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 a = matA->A; b = matA->B; 1926 c = matB->A; d = matB->B; 1927 1928 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1929 if (flg) { 1930 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1931 } 1932 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatCopy_MPIBAIJ" 1938 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1939 { 1940 PetscErrorCode ierr; 1941 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1942 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1943 1944 PetscFunctionBegin; 1945 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1946 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1947 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1948 } else { 1949 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1950 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1957 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1958 { 1959 PetscErrorCode ierr; 1960 1961 PetscFunctionBegin; 1962 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1968 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1969 { 1970 PetscErrorCode ierr; 1971 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1972 PetscBLASInt bnz,one=1; 1973 Mat_SeqBAIJ *x,*y; 1974 1975 PetscFunctionBegin; 1976 if (str == SAME_NONZERO_PATTERN) { 1977 PetscScalar alpha = a; 1978 x = (Mat_SeqBAIJ *)xx->A->data; 1979 y = (Mat_SeqBAIJ *)yy->A->data; 1980 bnz = PetscBLASIntCast(x->nz); 1981 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1982 x = (Mat_SeqBAIJ *)xx->B->data; 1983 y = (Mat_SeqBAIJ *)yy->B->data; 1984 bnz = PetscBLASIntCast(x->nz); 1985 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1986 } else { 1987 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1994 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1995 { 1996 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1997 PetscInt rbs,cbs; 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2002 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2003 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2004 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2005 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2006 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2012 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2013 { 2014 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2019 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2025 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2026 { 2027 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2032 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2038 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2039 { 2040 PetscErrorCode ierr; 2041 IS iscol_local; 2042 PetscInt csize; 2043 2044 PetscFunctionBegin; 2045 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2046 if (call == MAT_REUSE_MATRIX) { 2047 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2048 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2049 } else { 2050 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2051 } 2052 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2053 if (call == MAT_INITIAL_MATRIX) { 2054 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2055 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2056 } 2057 PetscFunctionReturn(0); 2058 } 2059 2060 #undef __FUNCT__ 2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2062 /* 2063 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2064 in local and then by concatenating the local matrices the end result. 2065 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2066 */ 2067 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2068 { 2069 PetscErrorCode ierr; 2070 PetscMPIInt rank,size; 2071 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2072 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2073 Mat *local,M,Mreuse; 2074 MatScalar *vwork,*aa; 2075 MPI_Comm comm = ((PetscObject)mat)->comm; 2076 Mat_SeqBAIJ *aij; 2077 2078 2079 PetscFunctionBegin; 2080 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2081 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2082 2083 if (call == MAT_REUSE_MATRIX) { 2084 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2085 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2086 local = &Mreuse; 2087 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2088 } else { 2089 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2090 Mreuse = *local; 2091 ierr = PetscFree(local);CHKERRQ(ierr); 2092 } 2093 2094 /* 2095 m - number of local rows 2096 n - number of columns (same on all processors) 2097 rstart - first row in new global matrix generated 2098 */ 2099 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2100 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2101 m = m/bs; 2102 n = n/bs; 2103 2104 if (call == MAT_INITIAL_MATRIX) { 2105 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2106 ii = aij->i; 2107 jj = aij->j; 2108 2109 /* 2110 Determine the number of non-zeros in the diagonal and off-diagonal 2111 portions of the matrix in order to do correct preallocation 2112 */ 2113 2114 /* first get start and end of "diagonal" columns */ 2115 if (csize == PETSC_DECIDE) { 2116 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2117 if (mglobal == n*bs) { /* square matrix */ 2118 nlocal = m; 2119 } else { 2120 nlocal = n/size + ((n % size) > rank); 2121 } 2122 } else { 2123 nlocal = csize/bs; 2124 } 2125 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2126 rstart = rend - nlocal; 2127 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2128 2129 /* next, compute all the lengths */ 2130 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2131 olens = dlens + m; 2132 for (i=0; i<m; i++) { 2133 jend = ii[i+1] - ii[i]; 2134 olen = 0; 2135 dlen = 0; 2136 for (j=0; j<jend; j++) { 2137 if (*jj < rstart || *jj >= rend) olen++; 2138 else dlen++; 2139 jj++; 2140 } 2141 olens[i] = olen; 2142 dlens[i] = dlen; 2143 } 2144 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2145 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2146 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2147 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2148 ierr = PetscFree(dlens);CHKERRQ(ierr); 2149 } else { 2150 PetscInt ml,nl; 2151 2152 M = *newmat; 2153 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2154 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2155 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2156 /* 2157 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2158 rather than the slower MatSetValues(). 2159 */ 2160 M->was_assembled = PETSC_TRUE; 2161 M->assembled = PETSC_FALSE; 2162 } 2163 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2164 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2165 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2166 ii = aij->i; 2167 jj = aij->j; 2168 aa = aij->a; 2169 for (i=0; i<m; i++) { 2170 row = rstart/bs + i; 2171 nz = ii[i+1] - ii[i]; 2172 cwork = jj; jj += nz; 2173 vwork = aa; aa += nz; 2174 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2175 } 2176 2177 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2179 *newmat = M; 2180 2181 /* save submatrix used in processor for next request */ 2182 if (call == MAT_INITIAL_MATRIX) { 2183 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2184 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2185 } 2186 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatPermute_MPIBAIJ" 2192 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2193 { 2194 MPI_Comm comm,pcomm; 2195 PetscInt first,local_size,nrows; 2196 const PetscInt *rows; 2197 PetscMPIInt size; 2198 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2203 /* make a collective version of 'rowp' */ 2204 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2205 if (pcomm==comm) { 2206 crowp = rowp; 2207 } else { 2208 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2209 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2210 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2211 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2212 } 2213 /* collect the global row permutation and invert it */ 2214 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2215 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2216 if (pcomm!=comm) { 2217 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2218 } 2219 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2220 /* get the local target indices */ 2221 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2223 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2224 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2225 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2226 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2227 /* the column permutation is so much easier; 2228 make a local version of 'colp' and invert it */ 2229 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2230 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2231 if (size==1) { 2232 lcolp = colp; 2233 } else { 2234 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2235 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2236 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2237 } 2238 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2239 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2240 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2241 if (size>1) { 2242 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2243 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2244 } 2245 /* now we just get the submatrix */ 2246 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2247 /* clean up */ 2248 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2249 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2255 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2256 { 2257 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2258 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2259 2260 PetscFunctionBegin; 2261 if (nghosts) { *nghosts = B->nbs;} 2262 if (ghosts) {*ghosts = baij->garray;} 2263 PetscFunctionReturn(0); 2264 } 2265 2266 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2270 /* 2271 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2272 */ 2273 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2274 { 2275 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2276 PetscErrorCode ierr; 2277 PetscMPIInt size,*ncolsonproc,*disp,nn; 2278 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2279 const PetscInt *is; 2280 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2281 PetscInt *rowhit,M,cstart,cend,colb; 2282 PetscInt *columnsforrow,l; 2283 IS *isa; 2284 PetscBool done,flg; 2285 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2286 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2287 2288 PetscFunctionBegin; 2289 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2290 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2291 2292 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2293 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2294 M = mat->rmap->n/bs; 2295 cstart = mat->cmap->rstart/bs; 2296 cend = mat->cmap->rend/bs; 2297 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2298 c->N = mat->cmap->N/bs; 2299 c->m = mat->rmap->n/bs; 2300 c->rstart = mat->rmap->rstart/bs; 2301 2302 c->ncolors = nis; 2303 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2307 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2308 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2309 2310 /* Allow access to data structures of local part of matrix */ 2311 if (!baij->colmap) { 2312 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2313 } 2314 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2315 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2316 2317 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2318 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2319 2320 for (i=0; i<nis; i++) { 2321 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2322 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2323 c->ncolumns[i] = n; 2324 if (n) { 2325 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2326 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2327 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2328 } else { 2329 c->columns[i] = 0; 2330 } 2331 2332 if (ctype == IS_COLORING_GLOBAL){ 2333 /* Determine the total (parallel) number of columns of this color */ 2334 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2335 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2336 2337 nn = PetscMPIIntCast(n); 2338 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2339 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2340 if (!nctot) { 2341 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2342 } 2343 2344 disp[0] = 0; 2345 for (j=1; j<size; j++) { 2346 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2347 } 2348 2349 /* Get complete list of columns for color on each processor */ 2350 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2351 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2352 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2353 } else if (ctype == IS_COLORING_GHOSTED){ 2354 /* Determine local number of columns of this color on this process, including ghost points */ 2355 nctot = n; 2356 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2357 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2358 } else { 2359 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2360 } 2361 2362 /* 2363 Mark all rows affect by these columns 2364 */ 2365 /* Temporary option to allow for debugging/testing */ 2366 flg = PETSC_FALSE; 2367 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2368 if (!flg) {/*-----------------------------------------------------------------------------*/ 2369 /* crude, fast version */ 2370 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2371 /* loop over columns*/ 2372 for (j=0; j<nctot; j++) { 2373 if (ctype == IS_COLORING_GHOSTED) { 2374 col = ltog[cols[j]]; 2375 } else { 2376 col = cols[j]; 2377 } 2378 if (col >= cstart && col < cend) { 2379 /* column is in diagonal block of matrix */ 2380 rows = A_cj + A_ci[col-cstart]; 2381 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2382 } else { 2383 #if defined (PETSC_USE_CTABLE) 2384 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2385 colb --; 2386 #else 2387 colb = baij->colmap[col] - 1; 2388 #endif 2389 if (colb == -1) { 2390 m = 0; 2391 } else { 2392 colb = colb/bs; 2393 rows = B_cj + B_ci[colb]; 2394 m = B_ci[colb+1] - B_ci[colb]; 2395 } 2396 } 2397 /* loop over columns marking them in rowhit */ 2398 for (k=0; k<m; k++) { 2399 rowhit[*rows++] = col + 1; 2400 } 2401 } 2402 2403 /* count the number of hits */ 2404 nrows = 0; 2405 for (j=0; j<M; j++) { 2406 if (rowhit[j]) nrows++; 2407 } 2408 c->nrows[i] = nrows; 2409 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2410 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2411 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2412 nrows = 0; 2413 for (j=0; j<M; j++) { 2414 if (rowhit[j]) { 2415 c->rows[i][nrows] = j; 2416 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2417 nrows++; 2418 } 2419 } 2420 } else {/*-------------------------------------------------------------------------------*/ 2421 /* slow version, using rowhit as a linked list */ 2422 PetscInt currentcol,fm,mfm; 2423 rowhit[M] = M; 2424 nrows = 0; 2425 /* loop over columns*/ 2426 for (j=0; j<nctot; j++) { 2427 if (ctype == IS_COLORING_GHOSTED) { 2428 col = ltog[cols[j]]; 2429 } else { 2430 col = cols[j]; 2431 } 2432 if (col >= cstart && col < cend) { 2433 /* column is in diagonal block of matrix */ 2434 rows = A_cj + A_ci[col-cstart]; 2435 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2436 } else { 2437 #if defined (PETSC_USE_CTABLE) 2438 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2439 colb --; 2440 #else 2441 colb = baij->colmap[col] - 1; 2442 #endif 2443 if (colb == -1) { 2444 m = 0; 2445 } else { 2446 colb = colb/bs; 2447 rows = B_cj + B_ci[colb]; 2448 m = B_ci[colb+1] - B_ci[colb]; 2449 } 2450 } 2451 2452 /* loop over columns marking them in rowhit */ 2453 fm = M; /* fm points to first entry in linked list */ 2454 for (k=0; k<m; k++) { 2455 currentcol = *rows++; 2456 /* is it already in the list? */ 2457 do { 2458 mfm = fm; 2459 fm = rowhit[fm]; 2460 } while (fm < currentcol); 2461 /* not in list so add it */ 2462 if (fm != currentcol) { 2463 nrows++; 2464 columnsforrow[currentcol] = col; 2465 /* next three lines insert new entry into linked list */ 2466 rowhit[mfm] = currentcol; 2467 rowhit[currentcol] = fm; 2468 fm = currentcol; 2469 /* fm points to present position in list since we know the columns are sorted */ 2470 } else { 2471 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2472 } 2473 } 2474 } 2475 c->nrows[i] = nrows; 2476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2477 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2478 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2479 /* now store the linked list of rows into c->rows[i] */ 2480 nrows = 0; 2481 fm = rowhit[M]; 2482 do { 2483 c->rows[i][nrows] = fm; 2484 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2485 fm = rowhit[fm]; 2486 } while (fm < M); 2487 } /* ---------------------------------------------------------------------------------------*/ 2488 ierr = PetscFree(cols);CHKERRQ(ierr); 2489 } 2490 2491 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2492 /* 2493 vscale will contain the "diagonal" on processor scalings followed by the off processor 2494 */ 2495 if (ctype == IS_COLORING_GLOBAL) { 2496 PetscInt *garray; 2497 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2498 for (i=0; i<baij->B->cmap->n/bs; i++) { 2499 for (j=0; j<bs; j++) { 2500 garray[i*bs+j] = bs*baij->garray[i]+j; 2501 } 2502 } 2503 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2504 ierr = PetscFree(garray);CHKERRQ(ierr); 2505 CHKMEMQ; 2506 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2507 for (k=0; k<c->ncolors; k++) { 2508 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2509 for (l=0; l<c->nrows[k]; l++) { 2510 col = c->columnsforrow[k][l]; 2511 if (col >= cstart && col < cend) { 2512 /* column is in diagonal block of matrix */ 2513 colb = col - cstart; 2514 } else { 2515 /* column is in "off-processor" part */ 2516 #if defined (PETSC_USE_CTABLE) 2517 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2518 colb --; 2519 #else 2520 colb = baij->colmap[col] - 1; 2521 #endif 2522 colb = colb/bs; 2523 colb += cend - cstart; 2524 } 2525 c->vscaleforrow[k][l] = colb; 2526 } 2527 } 2528 } else if (ctype == IS_COLORING_GHOSTED) { 2529 /* Get gtol mapping */ 2530 PetscInt N = mat->cmap->N, *gtol; 2531 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2532 for (i=0; i<N; i++) gtol[i] = -1; 2533 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2534 2535 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2536 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2537 for (k=0; k<c->ncolors; k++) { 2538 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2539 for (l=0; l<c->nrows[k]; l++) { 2540 col = c->columnsforrow[k][l]; /* global column index */ 2541 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2542 } 2543 } 2544 ierr = PetscFree(gtol);CHKERRQ(ierr); 2545 } 2546 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2547 2548 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2549 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2550 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2551 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2552 CHKMEMQ; 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2559 { 2560 Mat B; 2561 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2562 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2563 Mat_SeqAIJ *b; 2564 PetscErrorCode ierr; 2565 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2566 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2567 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2568 2569 PetscFunctionBegin; 2570 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2571 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2572 2573 /* ---------------------------------------------------------------- 2574 Tell every processor the number of nonzeros per row 2575 */ 2576 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2577 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2578 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2579 } 2580 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2581 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2582 displs = recvcounts + size; 2583 for (i=0; i<size; i++) { 2584 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2585 displs[i] = A->rmap->range[i]/bs; 2586 } 2587 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2588 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2589 #else 2590 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2591 #endif 2592 /* --------------------------------------------------------------- 2593 Create the sequential matrix of the same type as the local block diagonal 2594 */ 2595 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2596 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2597 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2598 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2599 b = (Mat_SeqAIJ *)B->data; 2600 2601 /*-------------------------------------------------------------------- 2602 Copy my part of matrix column indices over 2603 */ 2604 sendcount = ad->nz + bd->nz; 2605 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2606 a_jsendbuf = ad->j; 2607 b_jsendbuf = bd->j; 2608 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2609 cnt = 0; 2610 for (i=0; i<n; i++) { 2611 2612 /* put in lower diagonal portion */ 2613 m = bd->i[i+1] - bd->i[i]; 2614 while (m > 0) { 2615 /* is it above diagonal (in bd (compressed) numbering) */ 2616 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2617 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2618 m--; 2619 } 2620 2621 /* put in diagonal portion */ 2622 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2623 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2624 } 2625 2626 /* put in upper diagonal portion */ 2627 while (m-- > 0) { 2628 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2629 } 2630 } 2631 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2632 2633 /*-------------------------------------------------------------------- 2634 Gather all column indices to all processors 2635 */ 2636 for (i=0; i<size; i++) { 2637 recvcounts[i] = 0; 2638 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2639 recvcounts[i] += lens[j]; 2640 } 2641 } 2642 displs[0] = 0; 2643 for (i=1; i<size; i++) { 2644 displs[i] = displs[i-1] + recvcounts[i-1]; 2645 } 2646 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2647 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2648 #else 2649 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2650 #endif 2651 /*-------------------------------------------------------------------- 2652 Assemble the matrix into useable form (note numerical values not yet set) 2653 */ 2654 /* set the b->ilen (length of each row) values */ 2655 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2656 /* set the b->i indices */ 2657 b->i[0] = 0; 2658 for (i=1; i<=A->rmap->N/bs; i++) { 2659 b->i[i] = b->i[i-1] + lens[i-1]; 2660 } 2661 ierr = PetscFree(lens);CHKERRQ(ierr); 2662 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2664 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2665 2666 if (A->symmetric){ 2667 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2668 } else if (A->hermitian) { 2669 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2670 } else if (A->structurally_symmetric) { 2671 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2672 } 2673 *newmat = B; 2674 PetscFunctionReturn(0); 2675 } 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatSOR_MPIBAIJ" 2679 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2680 { 2681 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2682 PetscErrorCode ierr; 2683 Vec bb1 = 0; 2684 2685 PetscFunctionBegin; 2686 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2687 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2688 } 2689 2690 if (flag == SOR_APPLY_UPPER) { 2691 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2696 if (flag & SOR_ZERO_INITIAL_GUESS) { 2697 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2698 its--; 2699 } 2700 2701 while (its--) { 2702 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2704 2705 /* update rhs: bb1 = bb - B*x */ 2706 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2707 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2708 2709 /* local sweep */ 2710 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2711 } 2712 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2713 if (flag & SOR_ZERO_INITIAL_GUESS) { 2714 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2715 its--; 2716 } 2717 while (its--) { 2718 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 2721 /* update rhs: bb1 = bb - B*x */ 2722 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2723 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2724 2725 /* local sweep */ 2726 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2727 } 2728 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2729 if (flag & SOR_ZERO_INITIAL_GUESS) { 2730 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2731 its--; 2732 } 2733 while (its--) { 2734 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2736 2737 /* update rhs: bb1 = bb - B*x */ 2738 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2739 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2740 2741 /* local sweep */ 2742 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2743 } 2744 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2745 2746 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2751 2752 #undef __FUNCT__ 2753 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2754 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values) 2755 { 2756 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2757 PetscErrorCode ierr; 2758 2759 PetscFunctionBegin; 2760 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 2764 2765 /* -------------------------------------------------------------------*/ 2766 static struct _MatOps MatOps_Values = { 2767 MatSetValues_MPIBAIJ, 2768 MatGetRow_MPIBAIJ, 2769 MatRestoreRow_MPIBAIJ, 2770 MatMult_MPIBAIJ, 2771 /* 4*/ MatMultAdd_MPIBAIJ, 2772 MatMultTranspose_MPIBAIJ, 2773 MatMultTransposeAdd_MPIBAIJ, 2774 0, 2775 0, 2776 0, 2777 /*10*/ 0, 2778 0, 2779 0, 2780 MatSOR_MPIBAIJ, 2781 MatTranspose_MPIBAIJ, 2782 /*15*/ MatGetInfo_MPIBAIJ, 2783 MatEqual_MPIBAIJ, 2784 MatGetDiagonal_MPIBAIJ, 2785 MatDiagonalScale_MPIBAIJ, 2786 MatNorm_MPIBAIJ, 2787 /*20*/ MatAssemblyBegin_MPIBAIJ, 2788 MatAssemblyEnd_MPIBAIJ, 2789 MatSetOption_MPIBAIJ, 2790 MatZeroEntries_MPIBAIJ, 2791 /*24*/ MatZeroRows_MPIBAIJ, 2792 0, 2793 0, 2794 0, 2795 0, 2796 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2797 0, 2798 0, 2799 0, 2800 0, 2801 /*34*/ MatDuplicate_MPIBAIJ, 2802 0, 2803 0, 2804 0, 2805 0, 2806 /*39*/ MatAXPY_MPIBAIJ, 2807 MatGetSubMatrices_MPIBAIJ, 2808 MatIncreaseOverlap_MPIBAIJ, 2809 MatGetValues_MPIBAIJ, 2810 MatCopy_MPIBAIJ, 2811 /*44*/ 0, 2812 MatScale_MPIBAIJ, 2813 0, 2814 0, 2815 0, 2816 /*49*/ MatSetBlockSize_MPIBAIJ, 2817 0, 2818 0, 2819 0, 2820 0, 2821 /*54*/ MatFDColoringCreate_MPIBAIJ, 2822 0, 2823 MatSetUnfactored_MPIBAIJ, 2824 MatPermute_MPIBAIJ, 2825 MatSetValuesBlocked_MPIBAIJ, 2826 /*59*/ MatGetSubMatrix_MPIBAIJ, 2827 MatDestroy_MPIBAIJ, 2828 MatView_MPIBAIJ, 2829 0, 2830 0, 2831 /*64*/ 0, 2832 0, 2833 0, 2834 0, 2835 0, 2836 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2837 0, 2838 0, 2839 0, 2840 0, 2841 /*74*/ 0, 2842 MatFDColoringApply_BAIJ, 2843 0, 2844 0, 2845 0, 2846 /*79*/ 0, 2847 0, 2848 0, 2849 0, 2850 MatLoad_MPIBAIJ, 2851 /*84*/ 0, 2852 0, 2853 0, 2854 0, 2855 0, 2856 /*89*/ 0, 2857 0, 2858 0, 2859 0, 2860 0, 2861 /*94*/ 0, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*99*/ 0, 2867 0, 2868 0, 2869 0, 2870 0, 2871 /*104*/0, 2872 MatRealPart_MPIBAIJ, 2873 MatImaginaryPart_MPIBAIJ, 2874 0, 2875 0, 2876 /*109*/0, 2877 0, 2878 0, 2879 0, 2880 0, 2881 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2882 0, 2883 MatGetGhosts_MPIBAIJ, 2884 0, 2885 0, 2886 /*119*/0, 2887 0, 2888 0, 2889 0, 2890 0, 2891 /*124*/0, 2892 0, 2893 MatInvertBlockDiagonal_MPIBAIJ 2894 }; 2895 2896 EXTERN_C_BEGIN 2897 #undef __FUNCT__ 2898 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2899 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2900 { 2901 PetscFunctionBegin; 2902 *a = ((Mat_MPIBAIJ *)A->data)->A; 2903 PetscFunctionReturn(0); 2904 } 2905 EXTERN_C_END 2906 2907 EXTERN_C_BEGIN 2908 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2909 EXTERN_C_END 2910 2911 EXTERN_C_BEGIN 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2914 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2915 { 2916 PetscInt m,rstart,cstart,cend; 2917 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2918 const PetscInt *JJ=0; 2919 PetscScalar *values=0; 2920 PetscErrorCode ierr; 2921 2922 PetscFunctionBegin; 2923 2924 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2925 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2926 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2927 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2928 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2929 m = B->rmap->n/bs; 2930 rstart = B->rmap->rstart/bs; 2931 cstart = B->cmap->rstart/bs; 2932 cend = B->cmap->rend/bs; 2933 2934 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2935 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2936 for (i=0; i<m; i++) { 2937 nz = ii[i+1] - ii[i]; 2938 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2939 nz_max = PetscMax(nz_max,nz); 2940 JJ = jj + ii[i]; 2941 for (j=0; j<nz; j++) { 2942 if (*JJ >= cstart) break; 2943 JJ++; 2944 } 2945 d = 0; 2946 for (; j<nz; j++) { 2947 if (*JJ++ >= cend) break; 2948 d++; 2949 } 2950 d_nnz[i] = d; 2951 o_nnz[i] = nz - d; 2952 } 2953 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2954 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2955 2956 values = (PetscScalar*)V; 2957 if (!values) { 2958 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2959 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2960 } 2961 for (i=0; i<m; i++) { 2962 PetscInt row = i + rstart; 2963 PetscInt ncols = ii[i+1] - ii[i]; 2964 const PetscInt *icols = jj + ii[i]; 2965 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2966 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2967 } 2968 2969 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 EXTERN_C_END 2976 2977 #undef __FUNCT__ 2978 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2979 /*@C 2980 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2981 (the default parallel PETSc format). 2982 2983 Collective on MPI_Comm 2984 2985 Input Parameters: 2986 + A - the matrix 2987 . bs - the block size 2988 . i - the indices into j for the start of each local row (starts with zero) 2989 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2990 - v - optional values in the matrix 2991 2992 Level: developer 2993 2994 .keywords: matrix, aij, compressed row, sparse, parallel 2995 2996 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2997 @*/ 2998 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2999 { 3000 PetscErrorCode ierr; 3001 3002 PetscFunctionBegin; 3003 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3004 PetscValidType(B,1); 3005 PetscValidLogicalCollectiveInt(B,bs,2); 3006 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3007 PetscFunctionReturn(0); 3008 } 3009 3010 EXTERN_C_BEGIN 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3013 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3014 { 3015 Mat_MPIBAIJ *b; 3016 PetscErrorCode ierr; 3017 PetscInt i, newbs = PetscAbs(bs); 3018 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3019 3020 PetscFunctionBegin; 3021 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3022 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3023 if (bs < 0) { 3024 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3025 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3026 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3027 bs = PetscAbs(bs); 3028 } 3029 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3030 bs = newbs; 3031 3032 3033 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3034 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3035 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3036 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3037 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3038 3039 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3040 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3041 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3042 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3043 3044 if (d_nnz) { 3045 for (i=0; i<B->rmap->n/bs; i++) { 3046 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3047 } 3048 } 3049 if (o_nnz) { 3050 for (i=0; i<B->rmap->n/bs; i++) { 3051 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3052 } 3053 } 3054 3055 b = (Mat_MPIBAIJ*)B->data; 3056 b->bs2 = bs*bs; 3057 b->mbs = B->rmap->n/bs; 3058 b->nbs = B->cmap->n/bs; 3059 b->Mbs = B->rmap->N/bs; 3060 b->Nbs = B->cmap->N/bs; 3061 3062 for (i=0; i<=b->size; i++) { 3063 b->rangebs[i] = B->rmap->range[i]/bs; 3064 } 3065 b->rstartbs = B->rmap->rstart/bs; 3066 b->rendbs = B->rmap->rend/bs; 3067 b->cstartbs = B->cmap->rstart/bs; 3068 b->cendbs = B->cmap->rend/bs; 3069 3070 if (!B->preallocated) { 3071 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3072 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3073 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3074 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3075 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3076 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3077 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3078 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3079 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3080 } 3081 3082 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3083 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3084 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3085 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3086 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3087 B->preallocated = PETSC_TRUE; 3088 PetscFunctionReturn(0); 3089 } 3090 EXTERN_C_END 3091 3092 EXTERN_C_BEGIN 3093 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3094 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3095 EXTERN_C_END 3096 3097 3098 EXTERN_C_BEGIN 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3101 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3102 { 3103 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3104 PetscErrorCode ierr; 3105 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3106 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3107 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3108 3109 PetscFunctionBegin; 3110 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3111 ii[0] = 0; 3112 CHKMEMQ; 3113 for (i=0; i<M; i++) { 3114 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3115 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3116 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3117 /* remove one from count of matrix has diagonal */ 3118 for (j=id[i]; j<id[i+1]; j++) { 3119 if (jd[j] == i) {ii[i+1]--;break;} 3120 } 3121 CHKMEMQ; 3122 } 3123 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3124 cnt = 0; 3125 for (i=0; i<M; i++) { 3126 for (j=io[i]; j<io[i+1]; j++) { 3127 if (garray[jo[j]] > rstart) break; 3128 jj[cnt++] = garray[jo[j]]; 3129 CHKMEMQ; 3130 } 3131 for (k=id[i]; k<id[i+1]; k++) { 3132 if (jd[k] != i) { 3133 jj[cnt++] = rstart + jd[k]; 3134 CHKMEMQ; 3135 } 3136 } 3137 for (;j<io[i+1]; j++) { 3138 jj[cnt++] = garray[jo[j]]; 3139 CHKMEMQ; 3140 } 3141 } 3142 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3143 PetscFunctionReturn(0); 3144 } 3145 EXTERN_C_END 3146 3147 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3148 EXTERN_C_BEGIN 3149 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3150 EXTERN_C_END 3151 3152 EXTERN_C_BEGIN 3153 #undef __FUNCT__ 3154 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3155 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3156 { 3157 PetscErrorCode ierr; 3158 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3159 Mat B; 3160 Mat_MPIAIJ *b; 3161 3162 PetscFunctionBegin; 3163 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3164 3165 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3166 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3167 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3168 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3169 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3170 b = (Mat_MPIAIJ*) B->data; 3171 3172 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3173 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3174 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3175 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3176 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3177 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3178 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3179 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3180 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3181 if (reuse == MAT_REUSE_MATRIX) { 3182 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3183 } else { 3184 *newmat = B; 3185 } 3186 PetscFunctionReturn(0); 3187 } 3188 EXTERN_C_END 3189 3190 EXTERN_C_BEGIN 3191 #if defined(PETSC_HAVE_MUMPS) 3192 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3193 #endif 3194 EXTERN_C_END 3195 3196 /*MC 3197 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3198 3199 Options Database Keys: 3200 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3201 . -mat_block_size <bs> - set the blocksize used to store the matrix 3202 - -mat_use_hash_table <fact> 3203 3204 Level: beginner 3205 3206 .seealso: MatCreateMPIBAIJ 3207 M*/ 3208 3209 EXTERN_C_BEGIN 3210 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3211 EXTERN_C_END 3212 3213 EXTERN_C_BEGIN 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatCreate_MPIBAIJ" 3216 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3217 { 3218 Mat_MPIBAIJ *b; 3219 PetscErrorCode ierr; 3220 PetscBool flg; 3221 3222 PetscFunctionBegin; 3223 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3224 B->data = (void*)b; 3225 3226 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3227 B->assembled = PETSC_FALSE; 3228 3229 B->insertmode = NOT_SET_VALUES; 3230 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3231 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3232 3233 /* build local table of row and column ownerships */ 3234 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3235 3236 /* build cache for off array entries formed */ 3237 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3238 b->donotstash = PETSC_FALSE; 3239 b->colmap = PETSC_NULL; 3240 b->garray = PETSC_NULL; 3241 b->roworiented = PETSC_TRUE; 3242 3243 /* stuff used in block assembly */ 3244 b->barray = 0; 3245 3246 /* stuff used for matrix vector multiply */ 3247 b->lvec = 0; 3248 b->Mvctx = 0; 3249 3250 /* stuff for MatGetRow() */ 3251 b->rowindices = 0; 3252 b->rowvalues = 0; 3253 b->getrowactive = PETSC_FALSE; 3254 3255 /* hash table stuff */ 3256 b->ht = 0; 3257 b->hd = 0; 3258 b->ht_size = 0; 3259 b->ht_flag = PETSC_FALSE; 3260 b->ht_fact = 0; 3261 b->ht_total_ct = 0; 3262 b->ht_insert_ct = 0; 3263 3264 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3265 b->ijonly = PETSC_FALSE; 3266 3267 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3268 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3269 if (flg) { 3270 PetscReal fact = 1.39; 3271 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3272 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3273 if (fact <= 1.0) fact = 1.39; 3274 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3275 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3276 } 3277 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3278 3279 #if defined(PETSC_HAVE_MUMPS) 3280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3281 #endif 3282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3283 "MatConvert_MPIBAIJ_MPIAdj", 3284 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3286 "MatConvert_MPIBAIJ_MPIAIJ", 3287 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3289 "MatConvert_MPIBAIJ_MPISBAIJ", 3290 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3291 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3292 "MatStoreValues_MPIBAIJ", 3293 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3295 "MatRetrieveValues_MPIBAIJ", 3296 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3297 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3298 "MatGetDiagonalBlock_MPIBAIJ", 3299 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3300 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3301 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3302 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3303 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3304 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3305 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3306 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3307 "MatDiagonalScaleLocal_MPIBAIJ", 3308 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3309 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3310 "MatSetHashTableFactor_MPIBAIJ", 3311 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3312 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3313 "MatConvert_MPIBAIJ_MPIBSTRM", 3314 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3315 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3316 PetscFunctionReturn(0); 3317 } 3318 EXTERN_C_END 3319 3320 /*MC 3321 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3322 3323 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3324 and MATMPIBAIJ otherwise. 3325 3326 Options Database Keys: 3327 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3328 3329 Level: beginner 3330 3331 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3332 M*/ 3333 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3336 /*@C 3337 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3338 (block compressed row). For good matrix assembly performance 3339 the user should preallocate the matrix storage by setting the parameters 3340 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3341 performance can be increased by more than a factor of 50. 3342 3343 Collective on Mat 3344 3345 Input Parameters: 3346 + A - the matrix 3347 . bs - size of blockk 3348 . d_nz - number of block nonzeros per block row in diagonal portion of local 3349 submatrix (same for all local rows) 3350 . d_nnz - array containing the number of block nonzeros in the various block rows 3351 of the in diagonal portion of the local (possibly different for each block 3352 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3353 set it even if it is zero. 3354 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3355 submatrix (same for all local rows). 3356 - o_nnz - array containing the number of nonzeros in the various block rows of the 3357 off-diagonal portion of the local submatrix (possibly different for 3358 each block row) or PETSC_NULL. 3359 3360 If the *_nnz parameter is given then the *_nz parameter is ignored 3361 3362 Options Database Keys: 3363 + -mat_block_size - size of the blocks to use 3364 - -mat_use_hash_table <fact> 3365 3366 Notes: 3367 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3368 than it must be used on all processors that share the object for that argument. 3369 3370 Storage Information: 3371 For a square global matrix we define each processor's diagonal portion 3372 to be its local rows and the corresponding columns (a square submatrix); 3373 each processor's off-diagonal portion encompasses the remainder of the 3374 local matrix (a rectangular submatrix). 3375 3376 The user can specify preallocated storage for the diagonal part of 3377 the local submatrix with either d_nz or d_nnz (not both). Set 3378 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3379 memory allocation. Likewise, specify preallocated storage for the 3380 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3381 3382 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3383 the figure below we depict these three local rows and all columns (0-11). 3384 3385 .vb 3386 0 1 2 3 4 5 6 7 8 9 10 11 3387 ------------------- 3388 row 3 | o o o d d d o o o o o o 3389 row 4 | o o o d d d o o o o o o 3390 row 5 | o o o d d d o o o o o o 3391 ------------------- 3392 .ve 3393 3394 Thus, any entries in the d locations are stored in the d (diagonal) 3395 submatrix, and any entries in the o locations are stored in the 3396 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3397 stored simply in the MATSEQBAIJ format for compressed row storage. 3398 3399 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3400 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3401 In general, for PDE problems in which most nonzeros are near the diagonal, 3402 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3403 or you will get TERRIBLE performance; see the users' manual chapter on 3404 matrices. 3405 3406 You can call MatGetInfo() to get information on how effective the preallocation was; 3407 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3408 You can also run with the option -info and look for messages with the string 3409 malloc in them to see if additional memory allocation was needed. 3410 3411 Level: intermediate 3412 3413 .keywords: matrix, block, aij, compressed row, sparse, parallel 3414 3415 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3416 @*/ 3417 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3418 { 3419 PetscErrorCode ierr; 3420 3421 PetscFunctionBegin; 3422 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3423 PetscValidType(B,1); 3424 PetscValidLogicalCollectiveInt(B,bs,2); 3425 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } 3428 3429 #undef __FUNCT__ 3430 #define __FUNCT__ "MatCreateMPIBAIJ" 3431 /*@C 3432 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3433 (block compressed row). For good matrix assembly performance 3434 the user should preallocate the matrix storage by setting the parameters 3435 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3436 performance can be increased by more than a factor of 50. 3437 3438 Collective on MPI_Comm 3439 3440 Input Parameters: 3441 + comm - MPI communicator 3442 . bs - size of blockk 3443 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3444 This value should be the same as the local size used in creating the 3445 y vector for the matrix-vector product y = Ax. 3446 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3447 This value should be the same as the local size used in creating the 3448 x vector for the matrix-vector product y = Ax. 3449 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3450 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3451 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3452 submatrix (same for all local rows) 3453 . d_nnz - array containing the number of nonzero blocks in the various block rows 3454 of the in diagonal portion of the local (possibly different for each block 3455 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3456 and set it even if it is zero. 3457 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3458 submatrix (same for all local rows). 3459 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3460 off-diagonal portion of the local submatrix (possibly different for 3461 each block row) or PETSC_NULL. 3462 3463 Output Parameter: 3464 . A - the matrix 3465 3466 Options Database Keys: 3467 + -mat_block_size - size of the blocks to use 3468 - -mat_use_hash_table <fact> 3469 3470 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3471 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3472 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3473 3474 Notes: 3475 If the *_nnz parameter is given then the *_nz parameter is ignored 3476 3477 A nonzero block is any block that as 1 or more nonzeros in it 3478 3479 The user MUST specify either the local or global matrix dimensions 3480 (possibly both). 3481 3482 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3483 than it must be used on all processors that share the object for that argument. 3484 3485 Storage Information: 3486 For a square global matrix we define each processor's diagonal portion 3487 to be its local rows and the corresponding columns (a square submatrix); 3488 each processor's off-diagonal portion encompasses the remainder of the 3489 local matrix (a rectangular submatrix). 3490 3491 The user can specify preallocated storage for the diagonal part of 3492 the local submatrix with either d_nz or d_nnz (not both). Set 3493 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3494 memory allocation. Likewise, specify preallocated storage for the 3495 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3496 3497 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3498 the figure below we depict these three local rows and all columns (0-11). 3499 3500 .vb 3501 0 1 2 3 4 5 6 7 8 9 10 11 3502 ------------------- 3503 row 3 | o o o d d d o o o o o o 3504 row 4 | o o o d d d o o o o o o 3505 row 5 | o o o d d d o o o o o o 3506 ------------------- 3507 .ve 3508 3509 Thus, any entries in the d locations are stored in the d (diagonal) 3510 submatrix, and any entries in the o locations are stored in the 3511 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3512 stored simply in the MATSEQBAIJ format for compressed row storage. 3513 3514 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3515 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3516 In general, for PDE problems in which most nonzeros are near the diagonal, 3517 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3518 or you will get TERRIBLE performance; see the users' manual chapter on 3519 matrices. 3520 3521 Level: intermediate 3522 3523 .keywords: matrix, block, aij, compressed row, sparse, parallel 3524 3525 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3526 @*/ 3527 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3528 { 3529 PetscErrorCode ierr; 3530 PetscMPIInt size; 3531 3532 PetscFunctionBegin; 3533 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3534 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3535 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3536 if (size > 1) { 3537 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3538 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3539 } else { 3540 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3541 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3542 } 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3548 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3549 { 3550 Mat mat; 3551 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3552 PetscErrorCode ierr; 3553 PetscInt len=0; 3554 3555 PetscFunctionBegin; 3556 *newmat = 0; 3557 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3558 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3559 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3560 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3561 3562 mat->factortype = matin->factortype; 3563 mat->preallocated = PETSC_TRUE; 3564 mat->assembled = PETSC_TRUE; 3565 mat->insertmode = NOT_SET_VALUES; 3566 3567 a = (Mat_MPIBAIJ*)mat->data; 3568 mat->rmap->bs = matin->rmap->bs; 3569 a->bs2 = oldmat->bs2; 3570 a->mbs = oldmat->mbs; 3571 a->nbs = oldmat->nbs; 3572 a->Mbs = oldmat->Mbs; 3573 a->Nbs = oldmat->Nbs; 3574 3575 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3576 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3577 3578 a->size = oldmat->size; 3579 a->rank = oldmat->rank; 3580 a->donotstash = oldmat->donotstash; 3581 a->roworiented = oldmat->roworiented; 3582 a->rowindices = 0; 3583 a->rowvalues = 0; 3584 a->getrowactive = PETSC_FALSE; 3585 a->barray = 0; 3586 a->rstartbs = oldmat->rstartbs; 3587 a->rendbs = oldmat->rendbs; 3588 a->cstartbs = oldmat->cstartbs; 3589 a->cendbs = oldmat->cendbs; 3590 3591 /* hash table stuff */ 3592 a->ht = 0; 3593 a->hd = 0; 3594 a->ht_size = 0; 3595 a->ht_flag = oldmat->ht_flag; 3596 a->ht_fact = oldmat->ht_fact; 3597 a->ht_total_ct = 0; 3598 a->ht_insert_ct = 0; 3599 3600 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3601 if (oldmat->colmap) { 3602 #if defined (PETSC_USE_CTABLE) 3603 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3604 #else 3605 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3606 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3607 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3608 #endif 3609 } else a->colmap = 0; 3610 3611 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3612 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3613 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3614 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3615 } else a->garray = 0; 3616 3617 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3618 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3619 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3620 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3621 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3622 3623 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3624 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3625 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3626 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3627 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3628 *newmat = mat; 3629 3630 PetscFunctionReturn(0); 3631 } 3632 3633 #undef __FUNCT__ 3634 #define __FUNCT__ "MatLoad_MPIBAIJ" 3635 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3636 { 3637 PetscErrorCode ierr; 3638 int fd; 3639 PetscInt i,nz,j,rstart,rend; 3640 PetscScalar *vals,*buf; 3641 MPI_Comm comm = ((PetscObject)viewer)->comm; 3642 MPI_Status status; 3643 PetscMPIInt rank,size,maxnz; 3644 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3645 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3646 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3647 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3648 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3649 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3650 3651 PetscFunctionBegin; 3652 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3653 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3654 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3655 3656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3658 if (!rank) { 3659 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3660 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3661 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3662 } 3663 3664 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3665 3666 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3667 M = header[1]; N = header[2]; 3668 3669 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3670 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3671 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3672 3673 /* If global sizes are set, check if they are consistent with that given in the file */ 3674 if (sizesset) { 3675 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3676 } 3677 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3678 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3679 3680 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3681 3682 /* 3683 This code adds extra rows to make sure the number of rows is 3684 divisible by the blocksize 3685 */ 3686 Mbs = M/bs; 3687 extra_rows = bs - M + bs*Mbs; 3688 if (extra_rows == bs) extra_rows = 0; 3689 else Mbs++; 3690 if (extra_rows && !rank) { 3691 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3692 } 3693 3694 /* determine ownership of all rows */ 3695 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3696 mbs = Mbs/size + ((Mbs % size) > rank); 3697 m = mbs*bs; 3698 } else { /* User set */ 3699 m = newmat->rmap->n; 3700 mbs = m/bs; 3701 } 3702 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3703 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3704 3705 /* process 0 needs enough room for process with most rows */ 3706 if (!rank) { 3707 mmax = rowners[1]; 3708 for (i=2; i<size; i++) { 3709 mmax = PetscMax(mmax,rowners[i]); 3710 } 3711 mmax*=bs; 3712 } else mmax = m; 3713 3714 rowners[0] = 0; 3715 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3716 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3717 rstart = rowners[rank]; 3718 rend = rowners[rank+1]; 3719 3720 /* distribute row lengths to all processors */ 3721 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3722 if (!rank) { 3723 mend = m; 3724 if (size == 1) mend = mend - extra_rows; 3725 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3726 for (j=mend; j<m; j++) locrowlens[j] = 1; 3727 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3728 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3729 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3730 for (j=0; j<m; j++) { 3731 procsnz[0] += locrowlens[j]; 3732 } 3733 for (i=1; i<size; i++) { 3734 mend = browners[i+1] - browners[i]; 3735 if (i == size-1) mend = mend - extra_rows; 3736 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3737 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3738 /* calculate the number of nonzeros on each processor */ 3739 for (j=0; j<browners[i+1]-browners[i]; j++) { 3740 procsnz[i] += rowlengths[j]; 3741 } 3742 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3743 } 3744 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3745 } else { 3746 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3747 } 3748 3749 if (!rank) { 3750 /* determine max buffer needed and allocate it */ 3751 maxnz = procsnz[0]; 3752 for (i=1; i<size; i++) { 3753 maxnz = PetscMax(maxnz,procsnz[i]); 3754 } 3755 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3756 3757 /* read in my part of the matrix column indices */ 3758 nz = procsnz[0]; 3759 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3760 mycols = ibuf; 3761 if (size == 1) nz -= extra_rows; 3762 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3763 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3764 3765 /* read in every ones (except the last) and ship off */ 3766 for (i=1; i<size-1; i++) { 3767 nz = procsnz[i]; 3768 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3769 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3770 } 3771 /* read in the stuff for the last proc */ 3772 if (size != 1) { 3773 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3774 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3775 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3776 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3777 } 3778 ierr = PetscFree(cols);CHKERRQ(ierr); 3779 } else { 3780 /* determine buffer space needed for message */ 3781 nz = 0; 3782 for (i=0; i<m; i++) { 3783 nz += locrowlens[i]; 3784 } 3785 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3786 mycols = ibuf; 3787 /* receive message of column indices*/ 3788 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3789 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3790 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3791 } 3792 3793 /* loop over local rows, determining number of off diagonal entries */ 3794 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3795 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3796 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3797 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3798 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3799 rowcount = 0; nzcount = 0; 3800 for (i=0; i<mbs; i++) { 3801 dcount = 0; 3802 odcount = 0; 3803 for (j=0; j<bs; j++) { 3804 kmax = locrowlens[rowcount]; 3805 for (k=0; k<kmax; k++) { 3806 tmp = mycols[nzcount++]/bs; 3807 if (!mask[tmp]) { 3808 mask[tmp] = 1; 3809 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3810 else masked1[dcount++] = tmp; 3811 } 3812 } 3813 rowcount++; 3814 } 3815 3816 dlens[i] = dcount; 3817 odlens[i] = odcount; 3818 3819 /* zero out the mask elements we set */ 3820 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3821 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3822 } 3823 3824 3825 if (!sizesset) { 3826 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3827 } 3828 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3829 3830 if (!rank) { 3831 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3832 /* read in my part of the matrix numerical values */ 3833 nz = procsnz[0]; 3834 vals = buf; 3835 mycols = ibuf; 3836 if (size == 1) nz -= extra_rows; 3837 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3838 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3839 3840 /* insert into matrix */ 3841 jj = rstart*bs; 3842 for (i=0; i<m; i++) { 3843 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3844 mycols += locrowlens[i]; 3845 vals += locrowlens[i]; 3846 jj++; 3847 } 3848 /* read in other processors (except the last one) and ship out */ 3849 for (i=1; i<size-1; i++) { 3850 nz = procsnz[i]; 3851 vals = buf; 3852 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3853 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3854 } 3855 /* the last proc */ 3856 if (size != 1){ 3857 nz = procsnz[i] - extra_rows; 3858 vals = buf; 3859 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3860 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3861 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3862 } 3863 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3864 } else { 3865 /* receive numeric values */ 3866 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3867 3868 /* receive message of values*/ 3869 vals = buf; 3870 mycols = ibuf; 3871 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3872 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3873 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3874 3875 /* insert into matrix */ 3876 jj = rstart*bs; 3877 for (i=0; i<m; i++) { 3878 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3879 mycols += locrowlens[i]; 3880 vals += locrowlens[i]; 3881 jj++; 3882 } 3883 } 3884 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3885 ierr = PetscFree(buf);CHKERRQ(ierr); 3886 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3887 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3888 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3889 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3890 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 3893 PetscFunctionReturn(0); 3894 } 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3898 /*@ 3899 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3900 3901 Input Parameters: 3902 . mat - the matrix 3903 . fact - factor 3904 3905 Not Collective, each process can use a different factor 3906 3907 Level: advanced 3908 3909 Notes: 3910 This can also be set by the command line option: -mat_use_hash_table <fact> 3911 3912 .keywords: matrix, hashtable, factor, HT 3913 3914 .seealso: MatSetOption() 3915 @*/ 3916 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3917 { 3918 PetscErrorCode ierr; 3919 3920 PetscFunctionBegin; 3921 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3922 PetscFunctionReturn(0); 3923 } 3924 3925 EXTERN_C_BEGIN 3926 #undef __FUNCT__ 3927 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3928 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3929 { 3930 Mat_MPIBAIJ *baij; 3931 3932 PetscFunctionBegin; 3933 baij = (Mat_MPIBAIJ*)mat->data; 3934 baij->ht_fact = fact; 3935 PetscFunctionReturn(0); 3936 } 3937 EXTERN_C_END 3938 3939 #undef __FUNCT__ 3940 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3941 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3942 { 3943 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3944 PetscFunctionBegin; 3945 *Ad = a->A; 3946 *Ao = a->B; 3947 *colmap = a->garray; 3948 PetscFunctionReturn(0); 3949 } 3950 3951 /* 3952 Special version for direct calls from Fortran (to eliminate two function call overheads 3953 */ 3954 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3955 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3957 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3958 #endif 3959 3960 #undef __FUNCT__ 3961 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3962 /*@C 3963 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3964 3965 Collective on Mat 3966 3967 Input Parameters: 3968 + mat - the matrix 3969 . min - number of input rows 3970 . im - input rows 3971 . nin - number of input columns 3972 . in - input columns 3973 . v - numerical values input 3974 - addvin - INSERT_VALUES or ADD_VALUES 3975 3976 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3977 3978 Level: advanced 3979 3980 .seealso: MatSetValuesBlocked() 3981 @*/ 3982 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3983 { 3984 /* convert input arguments to C version */ 3985 Mat mat = *matin; 3986 PetscInt m = *min, n = *nin; 3987 InsertMode addv = *addvin; 3988 3989 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3990 const MatScalar *value; 3991 MatScalar *barray=baij->barray; 3992 PetscBool roworiented = baij->roworiented; 3993 PetscErrorCode ierr; 3994 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3995 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3996 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3997 3998 PetscFunctionBegin; 3999 /* tasks normally handled by MatSetValuesBlocked() */ 4000 if (mat->insertmode == NOT_SET_VALUES) { 4001 mat->insertmode = addv; 4002 } 4003 #if defined(PETSC_USE_DEBUG) 4004 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4005 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4006 #endif 4007 if (mat->assembled) { 4008 mat->was_assembled = PETSC_TRUE; 4009 mat->assembled = PETSC_FALSE; 4010 } 4011 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4012 4013 4014 if(!barray) { 4015 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4016 baij->barray = barray; 4017 } 4018 4019 if (roworiented) { 4020 stepval = (n-1)*bs; 4021 } else { 4022 stepval = (m-1)*bs; 4023 } 4024 for (i=0; i<m; i++) { 4025 if (im[i] < 0) continue; 4026 #if defined(PETSC_USE_DEBUG) 4027 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4028 #endif 4029 if (im[i] >= rstart && im[i] < rend) { 4030 row = im[i] - rstart; 4031 for (j=0; j<n; j++) { 4032 /* If NumCol = 1 then a copy is not required */ 4033 if ((roworiented) && (n == 1)) { 4034 barray = (MatScalar*)v + i*bs2; 4035 } else if((!roworiented) && (m == 1)) { 4036 barray = (MatScalar*)v + j*bs2; 4037 } else { /* Here a copy is required */ 4038 if (roworiented) { 4039 value = v + i*(stepval+bs)*bs + j*bs; 4040 } else { 4041 value = v + j*(stepval+bs)*bs + i*bs; 4042 } 4043 for (ii=0; ii<bs; ii++,value+=stepval) { 4044 for (jj=0; jj<bs; jj++) { 4045 *barray++ = *value++; 4046 } 4047 } 4048 barray -=bs2; 4049 } 4050 4051 if (in[j] >= cstart && in[j] < cend){ 4052 col = in[j] - cstart; 4053 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4054 } 4055 else if (in[j] < 0) continue; 4056 #if defined(PETSC_USE_DEBUG) 4057 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4058 #endif 4059 else { 4060 if (mat->was_assembled) { 4061 if (!baij->colmap) { 4062 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4063 } 4064 4065 #if defined(PETSC_USE_DEBUG) 4066 #if defined (PETSC_USE_CTABLE) 4067 { PetscInt data; 4068 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4069 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4070 } 4071 #else 4072 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4073 #endif 4074 #endif 4075 #if defined (PETSC_USE_CTABLE) 4076 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4077 col = (col - 1)/bs; 4078 #else 4079 col = (baij->colmap[in[j]] - 1)/bs; 4080 #endif 4081 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4082 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4083 col = in[j]; 4084 } 4085 } 4086 else col = in[j]; 4087 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4088 } 4089 } 4090 } else { 4091 if (!baij->donotstash) { 4092 if (roworiented) { 4093 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4094 } else { 4095 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4096 } 4097 } 4098 } 4099 } 4100 4101 /* task normally handled by MatSetValuesBlocked() */ 4102 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4103 PetscFunctionReturn(0); 4104 } 4105 4106 #undef __FUNCT__ 4107 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4108 /*@ 4109 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4110 CSR format the local rows. 4111 4112 Collective on MPI_Comm 4113 4114 Input Parameters: 4115 + comm - MPI communicator 4116 . bs - the block size, only a block size of 1 is supported 4117 . m - number of local rows (Cannot be PETSC_DECIDE) 4118 . n - This value should be the same as the local size used in creating the 4119 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4120 calculated if N is given) For square matrices n is almost always m. 4121 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4122 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4123 . i - row indices 4124 . j - column indices 4125 - a - matrix values 4126 4127 Output Parameter: 4128 . mat - the matrix 4129 4130 Level: intermediate 4131 4132 Notes: 4133 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4134 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4135 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4136 4137 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4138 4139 .keywords: matrix, aij, compressed row, sparse, parallel 4140 4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4142 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4143 @*/ 4144 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4145 { 4146 PetscErrorCode ierr; 4147 4148 4149 PetscFunctionBegin; 4150 if (i[0]) { 4151 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4152 } 4153 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4154 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4155 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4156 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4157 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4158 PetscFunctionReturn(0); 4159 } 4160