1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 mat->assembled = PETSC_FALSE; 262 if (roworiented) { 263 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 264 } else { 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 275 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 276 { 277 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 278 const PetscScalar *value; 279 MatScalar *barray=baij->barray; 280 PetscBool roworiented = baij->roworiented; 281 PetscErrorCode ierr; 282 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 283 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 284 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 285 286 PetscFunctionBegin; 287 if(!barray) { 288 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 289 baij->barray = barray; 290 } 291 292 if (roworiented) { 293 stepval = (n-1)*bs; 294 } else { 295 stepval = (m-1)*bs; 296 } 297 for (i=0; i<m; i++) { 298 if (im[i] < 0) continue; 299 #if defined(PETSC_USE_DEBUG) 300 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 301 #endif 302 if (im[i] >= rstart && im[i] < rend) { 303 row = im[i] - rstart; 304 for (j=0; j<n; j++) { 305 /* If NumCol = 1 then a copy is not required */ 306 if ((roworiented) && (n == 1)) { 307 barray = (MatScalar*)v + i*bs2; 308 } else if((!roworiented) && (m == 1)) { 309 barray = (MatScalar*)v + j*bs2; 310 } else { /* Here a copy is required */ 311 if (roworiented) { 312 value = v + (i*(stepval+bs) + j)*bs; 313 } else { 314 value = v + (j*(stepval+bs) + i)*bs; 315 } 316 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 317 for (jj=0; jj<bs; jj++) { 318 barray[jj] = value[jj]; 319 } 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 366 if (!baij->donotstash) { 367 if (roworiented) { 368 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } else { 370 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #define HASH_KEY 0.6180339887 379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 385 { 386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 387 PetscBool roworiented = baij->roworiented; 388 PetscErrorCode ierr; 389 PetscInt i,j,row,col; 390 PetscInt rstart_orig=mat->rmap->rstart; 391 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 392 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 393 PetscReal tmp; 394 MatScalar **HD = baij->hd,value; 395 #if defined(PETSC_USE_DEBUG) 396 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 397 #endif 398 399 PetscFunctionBegin; 400 if (v) PetscValidScalarPointer(v,6); 401 for (i=0; i<m; i++) { 402 #if defined(PETSC_USE_DEBUG) 403 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 404 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 405 #endif 406 row = im[i]; 407 if (row >= rstart_orig && row < rend_orig) { 408 for (j=0; j<n; j++) { 409 col = in[j]; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 /* Look up PetscInto the Hash Table */ 412 key = (row/bs)*Nbs+(col/bs)+1; 413 h1 = HASH(size,key,tmp); 414 415 416 idx = h1; 417 #if defined(PETSC_USE_DEBUG) 418 insert_ct++; 419 total_ct++; 420 if (HT[idx] != key) { 421 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 422 if (idx == size) { 423 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == h1) { 425 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 426 } 427 } 428 } 429 #else 430 if (HT[idx] != key) { 431 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 432 if (idx == size) { 433 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 434 if (idx == h1) { 435 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 } 439 #endif 440 /* A HASH table entry is found, so insert the values at the correct address */ 441 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 442 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 #if defined(PETSC_USE_DEBUG) 455 baij->ht_total_ct = total_ct; 456 baij->ht_insert_ct = insert_ct; 457 #endif 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 PetscBool roworiented = baij->roworiented; 467 PetscErrorCode ierr; 468 PetscInt i,j,ii,jj,row,col; 469 PetscInt rstart=baij->rstartbs; 470 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 471 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 472 PetscReal tmp; 473 MatScalar **HD = baij->hd,*baij_a; 474 const PetscScalar *v_t,*value; 475 #if defined(PETSC_USE_DEBUG) 476 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 477 #endif 478 479 PetscFunctionBegin; 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 #if defined(PETSC_USE_DEBUG) 488 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 row = im[i]; 492 v_t = v + i*nbs2; 493 if (row >= rstart && row < rend) { 494 for (j=0; j<n; j++) { 495 col = in[j]; 496 497 /* Look up into the Hash Table */ 498 key = row*Nbs+col+1; 499 h1 = HASH(size,key,tmp); 500 501 idx = h1; 502 #if defined(PETSC_USE_DEBUG) 503 total_ct++; 504 insert_ct++; 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == h1) { 510 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 511 } 512 } 513 } 514 #else 515 if (HT[idx] != key) { 516 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 517 if (idx == size) { 518 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 519 if (idx == h1) { 520 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 521 } 522 } 523 } 524 #endif 525 baij_a = HD[idx]; 526 if (roworiented) { 527 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 528 /* value = v + (i*(stepval+bs)+j)*bs; */ 529 value = v_t; 530 v_t += bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval) { 539 for (jj=ii; jj<bs2; jj+=bs) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 value = v + j*(stepval+bs)*bs + i*bs; 546 if (addv == ADD_VALUES) { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] += *value++; 550 } 551 } 552 } else { 553 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 554 for (jj=0; jj<bs; jj++) { 555 baij_a[jj] = *value++; 556 } 557 } 558 } 559 } 560 } 561 } else { 562 if (!baij->donotstash) { 563 if (roworiented) { 564 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 565 } else { 566 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 #if defined(PETSC_USE_DEBUG) 572 baij->ht_total_ct = total_ct; 573 baij->ht_insert_ct = insert_ct; 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetValues_MPIBAIJ" 580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 581 { 582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 583 PetscErrorCode ierr; 584 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 585 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 586 587 PetscFunctionBegin; 588 for (i=0; i<m; i++) { 589 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 590 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 591 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 592 row = idxm[i] - bsrstart; 593 for (j=0; j<n; j++) { 594 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 595 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 596 if (idxn[j] >= bscstart && idxn[j] < bscend){ 597 col = idxn[j] - bscstart; 598 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } else { 600 if (!baij->colmap) { 601 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 602 } 603 #if defined (PETSC_USE_CTABLE) 604 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 605 data --; 606 #else 607 data = baij->colmap[idxn[j]/bs]-1; 608 #endif 609 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 610 else { 611 col = data + idxn[j]%bs; 612 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 613 } 614 } 615 } 616 } else { 617 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatNorm_MPIBAIJ" 625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 626 { 627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 628 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 631 PetscReal sum = 0.0; 632 MatScalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 nz = amat->nz*bs2; 641 for (i=0; i<nz; i++) { 642 #if defined(PETSC_USE_COMPLEX) 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 nz = bmat->nz*bs2; 650 for (i=0; i<nz; i++) { 651 #if defined(PETSC_USE_COMPLEX) 652 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 *nrm = PetscSqrtReal(*nrm); 659 } else if (type == NORM_1) { /* max column sum */ 660 PetscReal *tmp,*tmp2; 661 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 662 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 663 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 664 v = amat->a; jj = amat->j; 665 for (i=0; i<amat->nz; i++) { 666 for (j=0; j<bs; j++){ 667 col = bs*(cstart + *jj) + j; /* column index */ 668 for (row=0; row<bs; row++){ 669 tmp[col] += PetscAbsScalar(*v); v++; 670 } 671 } 672 jj++; 673 } 674 v = bmat->a; jj = bmat->j; 675 for (i=0; i<bmat->nz; i++) { 676 for (j=0; j<bs; j++){ 677 col = bs*garray[*jj] + j; 678 for (row=0; row<bs; row++){ 679 tmp[col] += PetscAbsScalar(*v); v++; 680 } 681 } 682 jj++; 683 } 684 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 685 *nrm = 0.0; 686 for (j=0; j<mat->cmap->N; j++) { 687 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 688 } 689 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 690 } else if (type == NORM_INFINITY) { /* max row sum */ 691 PetscReal *sums; 692 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 693 sum = 0.0; 694 for (j=0; j<amat->mbs; j++) { 695 for (row=0; row<bs; row++) sums[row] = 0.0; 696 v = amat->a + bs2*amat->i[j]; 697 nz = amat->i[j+1]-amat->i[j]; 698 for (i=0; i<nz; i++) { 699 for (col=0; col<bs; col++){ 700 for (row=0; row<bs; row++){ 701 sums[row] += PetscAbsScalar(*v); v++; 702 } 703 } 704 } 705 v = bmat->a + bs2*bmat->i[j]; 706 nz = bmat->i[j+1]-bmat->i[j]; 707 for (i=0; i<nz; i++) { 708 for (col=0; col<bs; col++){ 709 for (row=0; row<bs; row++){ 710 sums[row] += PetscAbsScalar(*v); v++; 711 } 712 } 713 } 714 for (row=0; row<bs; row++){ 715 if (sums[row] > sum) sum = sums[row]; 716 } 717 } 718 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 719 ierr = PetscFree(sums);CHKERRQ(ierr); 720 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 721 } 722 PetscFunctionReturn(0); 723 } 724 725 /* 726 Creates the hash table, and sets the table 727 This table is created only once. 728 If new entried need to be added to the matrix 729 then the hash table has to be destroyed and 730 recreated. 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 735 { 736 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 737 Mat A = baij->A,B=baij->B; 738 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 739 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 740 PetscErrorCode ierr; 741 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 742 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 743 PetscInt *HT,key; 744 MatScalar **HD; 745 PetscReal tmp; 746 #if defined(PETSC_USE_INFO) 747 PetscInt ct=0,max=0; 748 #endif 749 750 PetscFunctionBegin; 751 if (baij->ht) PetscFunctionReturn(0); 752 753 baij->ht_size = (PetscInt)(factor*nz); 754 ht_size = baij->ht_size; 755 756 /* Allocate Memory for Hash Table */ 757 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 759 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 760 HD = baij->hd; 761 HT = baij->ht; 762 763 /* Loop Over A */ 764 for (i=0; i<a->mbs; i++) { 765 for (j=ai[i]; j<ai[i+1]; j++) { 766 row = i+rstart; 767 col = aj[j]+cstart; 768 769 key = row*Nbs + col + 1; 770 h1 = HASH(ht_size,key,tmp); 771 for (k=0; k<ht_size; k++){ 772 if (!HT[(h1+k)%ht_size]) { 773 HT[(h1+k)%ht_size] = key; 774 HD[(h1+k)%ht_size] = a->a + j*bs2; 775 break; 776 #if defined(PETSC_USE_INFO) 777 } else { 778 ct++; 779 #endif 780 } 781 } 782 #if defined(PETSC_USE_INFO) 783 if (k> max) max = k; 784 #endif 785 } 786 } 787 /* Loop Over B */ 788 for (i=0; i<b->mbs; i++) { 789 for (j=bi[i]; j<bi[i+1]; j++) { 790 row = i+rstart; 791 col = garray[bj[j]]; 792 key = row*Nbs + col + 1; 793 h1 = HASH(ht_size,key,tmp); 794 for (k=0; k<ht_size; k++){ 795 if (!HT[(h1+k)%ht_size]) { 796 HT[(h1+k)%ht_size] = key; 797 HD[(h1+k)%ht_size] = b->a + j*bs2; 798 break; 799 #if defined(PETSC_USE_INFO) 800 } else { 801 ct++; 802 #endif 803 } 804 } 805 #if defined(PETSC_USE_INFO) 806 if (k> max) max = k; 807 #endif 808 } 809 } 810 811 /* Print Summary */ 812 #if defined(PETSC_USE_INFO) 813 for (i=0,j=0; i<ht_size; i++) { 814 if (HT[i]) {j++;} 815 } 816 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 817 #endif 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 826 PetscErrorCode ierr; 827 PetscInt nstash,reallocs; 828 InsertMode addv; 829 830 PetscFunctionBegin; 831 if (baij->donotstash || mat->nooffprocentries) { 832 PetscFunctionReturn(0); 833 } 834 835 /* make sure all processors are either in INSERTMODE or ADDMODE */ 836 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 837 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 838 mat->insertmode = addv; /* in case this processor had no cache */ 839 840 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 841 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 842 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 843 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 852 { 853 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 855 PetscErrorCode ierr; 856 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 857 PetscInt *row,*col; 858 PetscBool r1,r2,r3,other_disassembled; 859 MatScalar *val; 860 InsertMode addv = mat->insertmode; 861 PetscMPIInt n; 862 863 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 864 PetscFunctionBegin; 865 if (!baij->donotstash && !mat->nooffprocentries) { 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 /* Now assemble all these values with a single function call */ 876 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 877 i = j; 878 } 879 } 880 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 881 /* Now process the block-stash. Since the values are stashed column-oriented, 882 set the roworiented flag to column oriented, and after MatSetValues() 883 restore the original flags */ 884 r1 = baij->roworiented; 885 r2 = a->roworiented; 886 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 887 baij->roworiented = PETSC_FALSE; 888 a->roworiented = PETSC_FALSE; 889 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 890 while (1) { 891 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 892 if (!flg) break; 893 894 for (i=0; i<n;) { 895 /* Now identify the consecutive vals belonging to the same row */ 896 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 897 if (j < n) ncols = j-i; 898 else ncols = n-i; 899 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 900 i = j; 901 } 902 } 903 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 904 baij->roworiented = r1; 905 a->roworiented = r2; 906 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 907 } 908 909 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 910 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 911 912 /* determine if any processor has disassembled, if so we must 913 also disassemble ourselfs, in order that we may reassemble. */ 914 /* 915 if nonzero structure of submatrix B cannot change then we know that 916 no processor disassembled thus we can skip this stuff 917 */ 918 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 919 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 920 if (mat->was_assembled && !other_disassembled) { 921 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 922 } 923 } 924 925 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 926 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 927 } 928 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 929 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 930 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 931 932 #if defined(PETSC_USE_INFO) 933 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 934 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 935 baij->ht_total_ct = 0; 936 baij->ht_insert_ct = 0; 937 } 938 #endif 939 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 940 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 941 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 942 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 943 } 944 945 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 946 baij->rowvalues = 0; 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 953 { 954 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 955 PetscErrorCode ierr; 956 PetscMPIInt size = baij->size,rank = baij->rank; 957 PetscInt bs = mat->rmap->bs; 958 PetscBool iascii,isdraw; 959 PetscViewer sviewer; 960 PetscViewerFormat format; 961 962 PetscFunctionBegin; 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 964 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 965 if (iascii) { 966 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 967 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 968 MatInfo info; 969 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 970 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 972 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 973 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 974 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 976 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 977 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 979 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscBool isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1019 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1020 1021 /* copy over the A part */ 1022 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1023 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1024 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1025 1026 for (i=0; i<mbs; i++) { 1027 rvals[0] = bs*(baij->rstartbs + i); 1028 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1029 for (j=ai[i]; j<ai[i+1]; j++) { 1030 col = (baij->cstartbs+aj[j])*bs; 1031 for (k=0; k<bs; k++) { 1032 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1033 col++; a += bs; 1034 } 1035 } 1036 } 1037 /* copy over the B part */ 1038 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 for (i=0; i<mbs; i++) { 1041 rvals[0] = bs*(baij->rstartbs + i); 1042 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1043 for (j=ai[i]; j<ai[i+1]; j++) { 1044 col = baij->garray[aj[j]]*bs; 1045 for (k=0; k<bs; k++) { 1046 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1047 col++; a += bs; 1048 } 1049 } 1050 } 1051 ierr = PetscFree(rvals);CHKERRQ(ierr); 1052 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1054 /* 1055 Everyone has to call to draw the matrix since the graphics waits are 1056 synchronized across all processors that share the PetscDraw object 1057 */ 1058 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1059 if (!rank) { 1060 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1061 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1062 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1063 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1064 } 1065 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1066 ierr = MatDestroy(&A);CHKERRQ(ierr); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1073 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1074 { 1075 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1076 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1077 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1078 PetscErrorCode ierr; 1079 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1080 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1081 int fd; 1082 PetscScalar *column_values; 1083 FILE *file; 1084 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1085 PetscInt message_count,flowcontrolcount; 1086 1087 PetscFunctionBegin; 1088 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1089 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1090 nz = bs2*(A->nz + B->nz); 1091 rlen = mat->rmap->n; 1092 if (!rank) { 1093 header[0] = MAT_FILE_CLASSID; 1094 header[1] = mat->rmap->N; 1095 header[2] = mat->cmap->N; 1096 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1097 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1098 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1099 /* get largest number of rows any processor has */ 1100 range = mat->rmap->range; 1101 for (i=1; i<size; i++) { 1102 rlen = PetscMax(rlen,range[i+1] - range[i]); 1103 } 1104 } else { 1105 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1106 } 1107 1108 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1109 /* compute lengths of each row */ 1110 for (i=0; i<a->mbs; i++) { 1111 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1112 } 1113 /* store the row lengths to the file */ 1114 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1115 if (!rank) { 1116 MPI_Status status; 1117 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1118 rlen = (range[1] - range[0])/bs; 1119 for (i=0; i<rlen; i++) { 1120 for (j=0; j<bs; j++) { 1121 row_lens[i*bs+j] = bs*crow_lens[i]; 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1125 for (i=1; i<size; i++) { 1126 rlen = (range[i+1] - range[i])/bs; 1127 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1128 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1129 for (k=0; k<rlen; k++) { 1130 for (j=0; j<bs; j++) { 1131 row_lens[k*bs+j] = bs*crow_lens[k]; 1132 } 1133 } 1134 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1135 } 1136 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1137 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1138 } else { 1139 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1140 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1141 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1144 1145 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1146 information needed to make it for each row from a block row. This does require more communication but still not more than 1147 the communication needed for the nonzero values */ 1148 nzmax = nz; /* space a largest processor needs */ 1149 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1150 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1151 cnt = 0; 1152 for (i=0; i<a->mbs; i++) { 1153 pcnt = cnt; 1154 for (j=B->i[i]; j<B->i[i+1]; j++) { 1155 if ( (col = garray[B->j[j]]) > cstart) break; 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*col+l; 1158 } 1159 } 1160 for (k=A->i[i]; k<A->i[i+1]; k++) { 1161 for (l=0; l<bs; l++) { 1162 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1163 } 1164 } 1165 for (; j<B->i[i+1]; j++) { 1166 for (l=0; l<bs; l++) { 1167 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1168 } 1169 } 1170 len = cnt - pcnt; 1171 for (k=1; k<bs; k++) { 1172 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1173 cnt += len; 1174 } 1175 } 1176 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1177 1178 /* store the columns to the file */ 1179 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1180 if (!rank) { 1181 MPI_Status status; 1182 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 for (i=1; i<size; i++) { 1184 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1185 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1186 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1190 } else { 1191 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1192 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1193 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1194 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1195 } 1196 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1197 1198 /* load up the numerical values */ 1199 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1200 cnt = 0; 1201 for (i=0; i<a->mbs; i++) { 1202 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1203 for (j=B->i[i]; j<B->i[i+1]; j++) { 1204 if ( garray[B->j[j]] > cstart) break; 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 for (k=A->i[i]; k<A->i[i+1]; k++) { 1213 for (l=0; l<bs; l++) { 1214 for (ll=0; ll<bs; ll++) { 1215 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1216 } 1217 } 1218 cnt += bs; 1219 } 1220 for (; j<B->i[i+1]; j++) { 1221 for (l=0; l<bs; l++) { 1222 for (ll=0; ll<bs; ll++) { 1223 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1224 } 1225 } 1226 cnt += bs; 1227 } 1228 cnt += (bs-1)*rlen; 1229 } 1230 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1231 1232 /* store the column values to the file */ 1233 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1234 if (!rank) { 1235 MPI_Status status; 1236 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1237 for (i=1; i<size; i++) { 1238 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1239 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1240 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1241 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1242 } 1243 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1244 } else { 1245 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1246 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1247 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1248 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree(column_values);CHKERRQ(ierr); 1251 1252 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1253 if (file) { 1254 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatView_MPIBAIJ" 1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1262 { 1263 PetscErrorCode ierr; 1264 PetscBool iascii,isdraw,issocket,isbinary; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1269 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1270 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1271 if (iascii || isdraw || issocket) { 1272 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1273 } else if (isbinary) { 1274 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1275 } else { 1276 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1284 { 1285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 1288 PetscFunctionBegin; 1289 #if defined(PETSC_USE_LOG) 1290 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1291 #endif 1292 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1293 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1294 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1295 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1296 #if defined (PETSC_USE_CTABLE) 1297 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1298 #else 1299 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1300 #endif 1301 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1302 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1303 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1306 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1307 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1308 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1309 1310 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1319 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatMult_MPIBAIJ" 1325 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1326 { 1327 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1328 PetscErrorCode ierr; 1329 PetscInt nt; 1330 1331 PetscFunctionBegin; 1332 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1333 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1334 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1335 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1336 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1338 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1339 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1345 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1346 { 1347 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1353 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1354 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1360 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1361 { 1362 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1363 PetscErrorCode ierr; 1364 PetscBool merged; 1365 1366 PetscFunctionBegin; 1367 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1368 /* do nondiagonal part */ 1369 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1370 if (!merged) { 1371 /* send it on its way */ 1372 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1373 /* do local part */ 1374 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1375 /* receive remote parts: note this assumes the values are not actually */ 1376 /* inserted in yy until the next line */ 1377 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1378 } else { 1379 /* do local part */ 1380 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* values actually were received in the Begin() but we need to call this nop */ 1384 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1391 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1392 { 1393 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 /* do nondiagonal part */ 1398 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1399 /* send it on its way */ 1400 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1401 /* do local part */ 1402 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1403 /* receive remote parts: note this assumes the values are not actually */ 1404 /* inserted in yy until the next line, which is true for my implementation*/ 1405 /* but is not perhaps always true. */ 1406 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 /* 1411 This only works correctly for square matrices where the subblock A->A is the 1412 diagonal block 1413 */ 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1416 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1417 { 1418 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1419 PetscErrorCode ierr; 1420 1421 PetscFunctionBegin; 1422 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1423 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatScale_MPIBAIJ" 1429 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1430 { 1431 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1432 PetscErrorCode ierr; 1433 1434 PetscFunctionBegin; 1435 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1436 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1442 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1443 { 1444 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1445 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1446 PetscErrorCode ierr; 1447 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1448 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1449 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1450 1451 PetscFunctionBegin; 1452 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1453 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1454 mat->getrowactive = PETSC_TRUE; 1455 1456 if (!mat->rowvalues && (idx || v)) { 1457 /* 1458 allocate enough space to hold information from the longest row. 1459 */ 1460 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1461 PetscInt max = 1,mbs = mat->mbs,tmp; 1462 for (i=0; i<mbs; i++) { 1463 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1464 if (max < tmp) { max = tmp; } 1465 } 1466 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1467 } 1468 lrow = row - brstart; 1469 1470 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1471 if (!v) {pvA = 0; pvB = 0;} 1472 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1473 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1475 nztot = nzA + nzB; 1476 1477 cmap = mat->garray; 1478 if (v || idx) { 1479 if (nztot) { 1480 /* Sort by increasing column numbers, assuming A and B already sorted */ 1481 PetscInt imark = -1; 1482 if (v) { 1483 *v = v_p = mat->rowvalues; 1484 for (i=0; i<nzB; i++) { 1485 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1486 else break; 1487 } 1488 imark = i; 1489 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1490 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1491 } 1492 if (idx) { 1493 *idx = idx_p = mat->rowindices; 1494 if (imark > -1) { 1495 for (i=0; i<imark; i++) { 1496 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1497 } 1498 } else { 1499 for (i=0; i<nzB; i++) { 1500 if (cmap[cworkB[i]/bs] < cstart) 1501 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1502 else break; 1503 } 1504 imark = i; 1505 } 1506 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1507 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1508 } 1509 } else { 1510 if (idx) *idx = 0; 1511 if (v) *v = 0; 1512 } 1513 } 1514 *nz = nztot; 1515 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1516 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1522 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1523 { 1524 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1525 1526 PetscFunctionBegin; 1527 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1528 baij->getrowactive = PETSC_FALSE; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1534 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1535 { 1536 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1541 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1547 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1548 { 1549 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1550 Mat A = a->A,B = a->B; 1551 PetscErrorCode ierr; 1552 PetscReal isend[5],irecv[5]; 1553 1554 PetscFunctionBegin; 1555 info->block_size = (PetscReal)matin->rmap->bs; 1556 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1557 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1558 isend[3] = info->memory; isend[4] = info->mallocs; 1559 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1560 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1561 isend[3] += info->memory; isend[4] += info->mallocs; 1562 if (flag == MAT_LOCAL) { 1563 info->nz_used = isend[0]; 1564 info->nz_allocated = isend[1]; 1565 info->nz_unneeded = isend[2]; 1566 info->memory = isend[3]; 1567 info->mallocs = isend[4]; 1568 } else if (flag == MAT_GLOBAL_MAX) { 1569 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1570 info->nz_used = irecv[0]; 1571 info->nz_allocated = irecv[1]; 1572 info->nz_unneeded = irecv[2]; 1573 info->memory = irecv[3]; 1574 info->mallocs = irecv[4]; 1575 } else if (flag == MAT_GLOBAL_SUM) { 1576 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1577 info->nz_used = irecv[0]; 1578 info->nz_allocated = irecv[1]; 1579 info->nz_unneeded = irecv[2]; 1580 info->memory = irecv[3]; 1581 info->mallocs = irecv[4]; 1582 } else { 1583 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1584 } 1585 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1586 info->fill_ratio_needed = 0; 1587 info->factor_mallocs = 0; 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1593 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1594 { 1595 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 switch (op) { 1600 case MAT_NEW_NONZERO_LOCATIONS: 1601 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1602 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1603 case MAT_KEEP_NONZERO_PATTERN: 1604 case MAT_NEW_NONZERO_LOCATION_ERR: 1605 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1606 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1607 break; 1608 case MAT_ROW_ORIENTED: 1609 a->roworiented = flg; 1610 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1611 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1612 break; 1613 case MAT_NEW_DIAGONALS: 1614 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1615 break; 1616 case MAT_IGNORE_OFF_PROC_ENTRIES: 1617 a->donotstash = flg; 1618 break; 1619 case MAT_USE_HASH_TABLE: 1620 a->ht_flag = flg; 1621 break; 1622 case MAT_SYMMETRIC: 1623 case MAT_STRUCTURALLY_SYMMETRIC: 1624 case MAT_HERMITIAN: 1625 case MAT_SYMMETRY_ETERNAL: 1626 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1627 break; 1628 default: 1629 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1636 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1637 { 1638 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1639 Mat_SeqBAIJ *Aloc; 1640 Mat B; 1641 PetscErrorCode ierr; 1642 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1643 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1644 MatScalar *a; 1645 1646 PetscFunctionBegin; 1647 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1648 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1649 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1650 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1651 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1652 /* Do not know preallocation information, but must set block size */ 1653 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1654 } else { 1655 B = *matout; 1656 } 1657 1658 /* copy over the A part */ 1659 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1660 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1661 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1662 1663 for (i=0; i<mbs; i++) { 1664 rvals[0] = bs*(baij->rstartbs + i); 1665 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1666 for (j=ai[i]; j<ai[i+1]; j++) { 1667 col = (baij->cstartbs+aj[j])*bs; 1668 for (k=0; k<bs; k++) { 1669 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1670 col++; a += bs; 1671 } 1672 } 1673 } 1674 /* copy over the B part */ 1675 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1676 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1677 for (i=0; i<mbs; i++) { 1678 rvals[0] = bs*(baij->rstartbs + i); 1679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1680 for (j=ai[i]; j<ai[i+1]; j++) { 1681 col = baij->garray[aj[j]]*bs; 1682 for (k=0; k<bs; k++) { 1683 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1684 col++; a += bs; 1685 } 1686 } 1687 } 1688 ierr = PetscFree(rvals);CHKERRQ(ierr); 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 1692 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1693 *matout = B; 1694 } else { 1695 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1702 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1703 { 1704 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1705 Mat a = baij->A,b = baij->B; 1706 PetscErrorCode ierr; 1707 PetscInt s1,s2,s3; 1708 1709 PetscFunctionBegin; 1710 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1711 if (rr) { 1712 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1713 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1714 /* Overlap communication with computation. */ 1715 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1716 } 1717 if (ll) { 1718 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1719 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1720 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1721 } 1722 /* scale the diagonal block */ 1723 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1724 1725 if (rr) { 1726 /* Do a scatter end and then right scale the off-diagonal block */ 1727 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1728 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1729 } 1730 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1736 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1737 { 1738 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1741 PetscInt i,*owners = A->rmap->range; 1742 PetscInt *nprocs,j,idx,nsends,row; 1743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1744 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1745 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1746 MPI_Comm comm = ((PetscObject)A)->comm; 1747 MPI_Request *send_waits,*recv_waits; 1748 MPI_Status recv_status,*send_status; 1749 const PetscScalar *xx; 1750 PetscScalar *bb; 1751 #if defined(PETSC_DEBUG) 1752 PetscBool found = PETSC_FALSE; 1753 #endif 1754 1755 PetscFunctionBegin; 1756 /* first count number of contributors to each processor */ 1757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1760 j = 0; 1761 for (i=0; i<N; i++) { 1762 if (lastidx > (idx = rows[i])) j = 0; 1763 lastidx = idx; 1764 for (; j<size; j++) { 1765 if (idx >= owners[j] && idx < owners[j+1]) { 1766 nprocs[2*j]++; 1767 nprocs[2*j+1] = 1; 1768 owner[i] = j; 1769 #if defined(PETSC_DEBUG) 1770 found = PETSC_TRUE; 1771 #endif 1772 break; 1773 } 1774 } 1775 #if defined(PETSC_DEBUG) 1776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1777 found = PETSC_FALSE; 1778 #endif 1779 } 1780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1781 1782 if (A->nooffproczerorows) { 1783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1784 nrecvs = nsends; 1785 nmax = N; 1786 } else { 1787 /* inform other processors of number of messages and max length*/ 1788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1789 } 1790 1791 /* post receives: */ 1792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1794 for (i=0; i<nrecvs; i++) { 1795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1796 } 1797 1798 /* do sends: 1799 1) starts[i] gives the starting index in svalues for stuff going to 1800 the ith processor 1801 */ 1802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1805 starts[0] = 0; 1806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1807 for (i=0; i<N; i++) { 1808 svalues[starts[owner[i]]++] = rows[i]; 1809 } 1810 1811 starts[0] = 0; 1812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1813 count = 0; 1814 for (i=0; i<size; i++) { 1815 if (nprocs[2*i+1]) { 1816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1817 } 1818 } 1819 ierr = PetscFree(starts);CHKERRQ(ierr); 1820 1821 base = owners[rank]; 1822 1823 /* wait on receives */ 1824 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1825 count = nrecvs; 1826 slen = 0; 1827 while (count) { 1828 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1829 /* unpack receives into our local space */ 1830 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1831 source[imdex] = recv_status.MPI_SOURCE; 1832 lens[imdex] = n; 1833 slen += n; 1834 count--; 1835 } 1836 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1837 1838 /* move the data into the send scatter */ 1839 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1840 count = 0; 1841 for (i=0; i<nrecvs; i++) { 1842 values = rvalues + i*nmax; 1843 for (j=0; j<lens[i]; j++) { 1844 lrows[count++] = values[j] - base; 1845 } 1846 } 1847 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1848 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1849 ierr = PetscFree(owner);CHKERRQ(ierr); 1850 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1851 1852 /* fix right hand side if needed */ 1853 if (x && b) { 1854 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1855 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1856 for (i=0; i<slen; i++) { 1857 bb[lrows[i]] = diag*xx[lrows[i]]; 1858 } 1859 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1860 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1861 } 1862 1863 /* actually zap the local rows */ 1864 /* 1865 Zero the required rows. If the "diagonal block" of the matrix 1866 is square and the user wishes to set the diagonal we use separate 1867 code so that MatSetValues() is not called for each diagonal allocating 1868 new memory, thus calling lots of mallocs and slowing things down. 1869 1870 */ 1871 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1872 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1873 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1874 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1875 } else if (diag != 0.0) { 1876 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1877 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1878 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1879 for (i=0; i<slen; i++) { 1880 row = lrows[i] + rstart_bs; 1881 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1882 } 1883 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1884 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1885 } else { 1886 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1887 } 1888 1889 ierr = PetscFree(lrows);CHKERRQ(ierr); 1890 1891 /* wait on sends */ 1892 if (nsends) { 1893 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1894 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1895 ierr = PetscFree(send_status);CHKERRQ(ierr); 1896 } 1897 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1898 ierr = PetscFree(svalues);CHKERRQ(ierr); 1899 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1905 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1906 { 1907 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatEqual_MPIBAIJ" 1919 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1920 { 1921 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1922 Mat a,b,c,d; 1923 PetscBool flg; 1924 PetscErrorCode ierr; 1925 1926 PetscFunctionBegin; 1927 a = matA->A; b = matA->B; 1928 c = matB->A; d = matB->B; 1929 1930 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1931 if (flg) { 1932 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1933 } 1934 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatCopy_MPIBAIJ" 1940 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1941 { 1942 PetscErrorCode ierr; 1943 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1944 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1945 1946 PetscFunctionBegin; 1947 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1948 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1949 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1950 } else { 1951 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1952 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1953 } 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1959 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1960 { 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1970 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1971 { 1972 PetscErrorCode ierr; 1973 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1974 PetscBLASInt bnz,one=1; 1975 Mat_SeqBAIJ *x,*y; 1976 1977 PetscFunctionBegin; 1978 if (str == SAME_NONZERO_PATTERN) { 1979 PetscScalar alpha = a; 1980 x = (Mat_SeqBAIJ *)xx->A->data; 1981 y = (Mat_SeqBAIJ *)yy->A->data; 1982 bnz = PetscBLASIntCast(x->nz); 1983 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1984 x = (Mat_SeqBAIJ *)xx->B->data; 1985 y = (Mat_SeqBAIJ *)yy->B->data; 1986 bnz = PetscBLASIntCast(x->nz); 1987 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1988 } else { 1989 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1990 } 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1996 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1997 { 1998 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1999 PetscInt rbs,cbs; 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2004 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2005 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2006 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2007 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2008 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2014 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2015 { 2016 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2017 PetscErrorCode ierr; 2018 2019 PetscFunctionBegin; 2020 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2021 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2027 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2028 { 2029 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2034 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2040 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2041 { 2042 PetscErrorCode ierr; 2043 IS iscol_local; 2044 PetscInt csize; 2045 2046 PetscFunctionBegin; 2047 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2048 if (call == MAT_REUSE_MATRIX) { 2049 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2050 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2051 } else { 2052 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2053 } 2054 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2055 if (call == MAT_INITIAL_MATRIX) { 2056 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2057 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2058 } 2059 PetscFunctionReturn(0); 2060 } 2061 2062 #undef __FUNCT__ 2063 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2064 /* 2065 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2066 in local and then by concatenating the local matrices the end result. 2067 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2068 */ 2069 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2070 { 2071 PetscErrorCode ierr; 2072 PetscMPIInt rank,size; 2073 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2074 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2075 Mat *local,M,Mreuse; 2076 MatScalar *vwork,*aa; 2077 MPI_Comm comm = ((PetscObject)mat)->comm; 2078 Mat_SeqBAIJ *aij; 2079 2080 2081 PetscFunctionBegin; 2082 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2083 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2084 2085 if (call == MAT_REUSE_MATRIX) { 2086 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2087 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2088 local = &Mreuse; 2089 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2090 } else { 2091 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2092 Mreuse = *local; 2093 ierr = PetscFree(local);CHKERRQ(ierr); 2094 } 2095 2096 /* 2097 m - number of local rows 2098 n - number of columns (same on all processors) 2099 rstart - first row in new global matrix generated 2100 */ 2101 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2102 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2103 m = m/bs; 2104 n = n/bs; 2105 2106 if (call == MAT_INITIAL_MATRIX) { 2107 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2108 ii = aij->i; 2109 jj = aij->j; 2110 2111 /* 2112 Determine the number of non-zeros in the diagonal and off-diagonal 2113 portions of the matrix in order to do correct preallocation 2114 */ 2115 2116 /* first get start and end of "diagonal" columns */ 2117 if (csize == PETSC_DECIDE) { 2118 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2119 if (mglobal == n*bs) { /* square matrix */ 2120 nlocal = m; 2121 } else { 2122 nlocal = n/size + ((n % size) > rank); 2123 } 2124 } else { 2125 nlocal = csize/bs; 2126 } 2127 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2128 rstart = rend - nlocal; 2129 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2130 2131 /* next, compute all the lengths */ 2132 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2133 olens = dlens + m; 2134 for (i=0; i<m; i++) { 2135 jend = ii[i+1] - ii[i]; 2136 olen = 0; 2137 dlen = 0; 2138 for (j=0; j<jend; j++) { 2139 if (*jj < rstart || *jj >= rend) olen++; 2140 else dlen++; 2141 jj++; 2142 } 2143 olens[i] = olen; 2144 dlens[i] = dlen; 2145 } 2146 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2147 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2148 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2149 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2150 ierr = PetscFree(dlens);CHKERRQ(ierr); 2151 } else { 2152 PetscInt ml,nl; 2153 2154 M = *newmat; 2155 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2156 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2157 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2158 /* 2159 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2160 rather than the slower MatSetValues(). 2161 */ 2162 M->was_assembled = PETSC_TRUE; 2163 M->assembled = PETSC_FALSE; 2164 } 2165 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2166 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2167 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2168 ii = aij->i; 2169 jj = aij->j; 2170 aa = aij->a; 2171 for (i=0; i<m; i++) { 2172 row = rstart/bs + i; 2173 nz = ii[i+1] - ii[i]; 2174 cwork = jj; jj += nz; 2175 vwork = aa; aa += nz; 2176 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2177 } 2178 2179 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2180 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 *newmat = M; 2182 2183 /* save submatrix used in processor for next request */ 2184 if (call == MAT_INITIAL_MATRIX) { 2185 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2186 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2187 } 2188 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatPermute_MPIBAIJ" 2194 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2195 { 2196 MPI_Comm comm,pcomm; 2197 PetscInt first,local_size,nrows; 2198 const PetscInt *rows; 2199 PetscMPIInt size; 2200 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2205 /* make a collective version of 'rowp' */ 2206 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2207 if (pcomm==comm) { 2208 crowp = rowp; 2209 } else { 2210 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2211 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2212 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2213 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2214 } 2215 /* collect the global row permutation and invert it */ 2216 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2217 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2218 if (pcomm!=comm) { 2219 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2220 } 2221 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2222 /* get the local target indices */ 2223 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2224 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2225 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2226 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2227 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2228 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2229 /* the column permutation is so much easier; 2230 make a local version of 'colp' and invert it */ 2231 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2232 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2233 if (size==1) { 2234 lcolp = colp; 2235 } else { 2236 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2237 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2238 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2239 } 2240 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2241 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2242 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2243 if (size>1) { 2244 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2245 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2246 } 2247 /* now we just get the submatrix */ 2248 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2249 /* clean up */ 2250 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2251 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2257 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2258 { 2259 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2260 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2261 2262 PetscFunctionBegin; 2263 if (nghosts) { *nghosts = B->nbs;} 2264 if (ghosts) {*ghosts = baij->garray;} 2265 PetscFunctionReturn(0); 2266 } 2267 2268 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2272 /* 2273 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2274 */ 2275 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2276 { 2277 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2278 PetscErrorCode ierr; 2279 PetscMPIInt size,*ncolsonproc,*disp,nn; 2280 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2281 const PetscInt *is; 2282 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2283 PetscInt *rowhit,M,cstart,cend,colb; 2284 PetscInt *columnsforrow,l; 2285 IS *isa; 2286 PetscBool done,flg; 2287 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2288 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2289 2290 PetscFunctionBegin; 2291 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2292 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2293 2294 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2295 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2296 M = mat->rmap->n/bs; 2297 cstart = mat->cmap->rstart/bs; 2298 cend = mat->cmap->rend/bs; 2299 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2300 c->N = mat->cmap->N/bs; 2301 c->m = mat->rmap->n/bs; 2302 c->rstart = mat->rmap->rstart/bs; 2303 2304 c->ncolors = nis; 2305 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2307 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2308 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2309 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2310 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2311 2312 /* Allow access to data structures of local part of matrix */ 2313 if (!baij->colmap) { 2314 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2315 } 2316 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2317 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2318 2319 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2320 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2321 2322 for (i=0; i<nis; i++) { 2323 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2324 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2325 c->ncolumns[i] = n; 2326 if (n) { 2327 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2328 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2329 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2330 } else { 2331 c->columns[i] = 0; 2332 } 2333 2334 if (ctype == IS_COLORING_GLOBAL){ 2335 /* Determine the total (parallel) number of columns of this color */ 2336 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2337 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2338 2339 nn = PetscMPIIntCast(n); 2340 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2341 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2342 if (!nctot) { 2343 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2344 } 2345 2346 disp[0] = 0; 2347 for (j=1; j<size; j++) { 2348 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2349 } 2350 2351 /* Get complete list of columns for color on each processor */ 2352 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2353 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2354 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2355 } else if (ctype == IS_COLORING_GHOSTED){ 2356 /* Determine local number of columns of this color on this process, including ghost points */ 2357 nctot = n; 2358 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2359 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2360 } else { 2361 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2362 } 2363 2364 /* 2365 Mark all rows affect by these columns 2366 */ 2367 /* Temporary option to allow for debugging/testing */ 2368 flg = PETSC_FALSE; 2369 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2370 if (!flg) {/*-----------------------------------------------------------------------------*/ 2371 /* crude, fast version */ 2372 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2373 /* loop over columns*/ 2374 for (j=0; j<nctot; j++) { 2375 if (ctype == IS_COLORING_GHOSTED) { 2376 col = ltog[cols[j]]; 2377 } else { 2378 col = cols[j]; 2379 } 2380 if (col >= cstart && col < cend) { 2381 /* column is in diagonal block of matrix */ 2382 rows = A_cj + A_ci[col-cstart]; 2383 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2384 } else { 2385 #if defined (PETSC_USE_CTABLE) 2386 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2387 colb --; 2388 #else 2389 colb = baij->colmap[col] - 1; 2390 #endif 2391 if (colb == -1) { 2392 m = 0; 2393 } else { 2394 colb = colb/bs; 2395 rows = B_cj + B_ci[colb]; 2396 m = B_ci[colb+1] - B_ci[colb]; 2397 } 2398 } 2399 /* loop over columns marking them in rowhit */ 2400 for (k=0; k<m; k++) { 2401 rowhit[*rows++] = col + 1; 2402 } 2403 } 2404 2405 /* count the number of hits */ 2406 nrows = 0; 2407 for (j=0; j<M; j++) { 2408 if (rowhit[j]) nrows++; 2409 } 2410 c->nrows[i] = nrows; 2411 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2412 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2413 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2414 nrows = 0; 2415 for (j=0; j<M; j++) { 2416 if (rowhit[j]) { 2417 c->rows[i][nrows] = j; 2418 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2419 nrows++; 2420 } 2421 } 2422 } else {/*-------------------------------------------------------------------------------*/ 2423 /* slow version, using rowhit as a linked list */ 2424 PetscInt currentcol,fm,mfm; 2425 rowhit[M] = M; 2426 nrows = 0; 2427 /* loop over columns*/ 2428 for (j=0; j<nctot; j++) { 2429 if (ctype == IS_COLORING_GHOSTED) { 2430 col = ltog[cols[j]]; 2431 } else { 2432 col = cols[j]; 2433 } 2434 if (col >= cstart && col < cend) { 2435 /* column is in diagonal block of matrix */ 2436 rows = A_cj + A_ci[col-cstart]; 2437 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2438 } else { 2439 #if defined (PETSC_USE_CTABLE) 2440 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2441 colb --; 2442 #else 2443 colb = baij->colmap[col] - 1; 2444 #endif 2445 if (colb == -1) { 2446 m = 0; 2447 } else { 2448 colb = colb/bs; 2449 rows = B_cj + B_ci[colb]; 2450 m = B_ci[colb+1] - B_ci[colb]; 2451 } 2452 } 2453 2454 /* loop over columns marking them in rowhit */ 2455 fm = M; /* fm points to first entry in linked list */ 2456 for (k=0; k<m; k++) { 2457 currentcol = *rows++; 2458 /* is it already in the list? */ 2459 do { 2460 mfm = fm; 2461 fm = rowhit[fm]; 2462 } while (fm < currentcol); 2463 /* not in list so add it */ 2464 if (fm != currentcol) { 2465 nrows++; 2466 columnsforrow[currentcol] = col; 2467 /* next three lines insert new entry into linked list */ 2468 rowhit[mfm] = currentcol; 2469 rowhit[currentcol] = fm; 2470 fm = currentcol; 2471 /* fm points to present position in list since we know the columns are sorted */ 2472 } else { 2473 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2474 } 2475 } 2476 } 2477 c->nrows[i] = nrows; 2478 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2479 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2480 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2481 /* now store the linked list of rows into c->rows[i] */ 2482 nrows = 0; 2483 fm = rowhit[M]; 2484 do { 2485 c->rows[i][nrows] = fm; 2486 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2487 fm = rowhit[fm]; 2488 } while (fm < M); 2489 } /* ---------------------------------------------------------------------------------------*/ 2490 ierr = PetscFree(cols);CHKERRQ(ierr); 2491 } 2492 2493 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2494 /* 2495 vscale will contain the "diagonal" on processor scalings followed by the off processor 2496 */ 2497 if (ctype == IS_COLORING_GLOBAL) { 2498 PetscInt *garray; 2499 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2500 for (i=0; i<baij->B->cmap->n/bs; i++) { 2501 for (j=0; j<bs; j++) { 2502 garray[i*bs+j] = bs*baij->garray[i]+j; 2503 } 2504 } 2505 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2506 ierr = PetscFree(garray);CHKERRQ(ierr); 2507 CHKMEMQ; 2508 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2509 for (k=0; k<c->ncolors; k++) { 2510 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2511 for (l=0; l<c->nrows[k]; l++) { 2512 col = c->columnsforrow[k][l]; 2513 if (col >= cstart && col < cend) { 2514 /* column is in diagonal block of matrix */ 2515 colb = col - cstart; 2516 } else { 2517 /* column is in "off-processor" part */ 2518 #if defined (PETSC_USE_CTABLE) 2519 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2520 colb --; 2521 #else 2522 colb = baij->colmap[col] - 1; 2523 #endif 2524 colb = colb/bs; 2525 colb += cend - cstart; 2526 } 2527 c->vscaleforrow[k][l] = colb; 2528 } 2529 } 2530 } else if (ctype == IS_COLORING_GHOSTED) { 2531 /* Get gtol mapping */ 2532 PetscInt N = mat->cmap->N, *gtol; 2533 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2534 for (i=0; i<N; i++) gtol[i] = -1; 2535 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2536 2537 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2538 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2539 for (k=0; k<c->ncolors; k++) { 2540 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2541 for (l=0; l<c->nrows[k]; l++) { 2542 col = c->columnsforrow[k][l]; /* global column index */ 2543 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2544 } 2545 } 2546 ierr = PetscFree(gtol);CHKERRQ(ierr); 2547 } 2548 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2549 2550 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2551 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2552 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2553 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2554 CHKMEMQ; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2561 { 2562 Mat B; 2563 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2564 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2565 Mat_SeqAIJ *b; 2566 PetscErrorCode ierr; 2567 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2568 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2569 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2570 2571 PetscFunctionBegin; 2572 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2573 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2574 2575 /* ---------------------------------------------------------------- 2576 Tell every processor the number of nonzeros per row 2577 */ 2578 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2579 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2580 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2581 } 2582 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2583 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2584 displs = recvcounts + size; 2585 for (i=0; i<size; i++) { 2586 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2587 displs[i] = A->rmap->range[i]/bs; 2588 } 2589 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2590 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2591 #else 2592 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2593 #endif 2594 /* --------------------------------------------------------------- 2595 Create the sequential matrix of the same type as the local block diagonal 2596 */ 2597 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2598 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2599 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2600 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2601 b = (Mat_SeqAIJ *)B->data; 2602 2603 /*-------------------------------------------------------------------- 2604 Copy my part of matrix column indices over 2605 */ 2606 sendcount = ad->nz + bd->nz; 2607 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2608 a_jsendbuf = ad->j; 2609 b_jsendbuf = bd->j; 2610 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2611 cnt = 0; 2612 for (i=0; i<n; i++) { 2613 2614 /* put in lower diagonal portion */ 2615 m = bd->i[i+1] - bd->i[i]; 2616 while (m > 0) { 2617 /* is it above diagonal (in bd (compressed) numbering) */ 2618 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2619 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2620 m--; 2621 } 2622 2623 /* put in diagonal portion */ 2624 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2625 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2626 } 2627 2628 /* put in upper diagonal portion */ 2629 while (m-- > 0) { 2630 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2631 } 2632 } 2633 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2634 2635 /*-------------------------------------------------------------------- 2636 Gather all column indices to all processors 2637 */ 2638 for (i=0; i<size; i++) { 2639 recvcounts[i] = 0; 2640 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2641 recvcounts[i] += lens[j]; 2642 } 2643 } 2644 displs[0] = 0; 2645 for (i=1; i<size; i++) { 2646 displs[i] = displs[i-1] + recvcounts[i-1]; 2647 } 2648 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2649 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2650 #else 2651 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2652 #endif 2653 /*-------------------------------------------------------------------- 2654 Assemble the matrix into useable form (note numerical values not yet set) 2655 */ 2656 /* set the b->ilen (length of each row) values */ 2657 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2658 /* set the b->i indices */ 2659 b->i[0] = 0; 2660 for (i=1; i<=A->rmap->N/bs; i++) { 2661 b->i[i] = b->i[i-1] + lens[i-1]; 2662 } 2663 ierr = PetscFree(lens);CHKERRQ(ierr); 2664 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2665 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2666 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2667 2668 if (A->symmetric){ 2669 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2670 } else if (A->hermitian) { 2671 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2672 } else if (A->structurally_symmetric) { 2673 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2674 } 2675 *newmat = B; 2676 PetscFunctionReturn(0); 2677 } 2678 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatSOR_MPIBAIJ" 2681 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2682 { 2683 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2684 PetscErrorCode ierr; 2685 Vec bb1 = 0; 2686 2687 PetscFunctionBegin; 2688 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2689 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2690 } 2691 2692 if (flag == SOR_APPLY_UPPER) { 2693 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2694 PetscFunctionReturn(0); 2695 } 2696 2697 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2698 if (flag & SOR_ZERO_INITIAL_GUESS) { 2699 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2700 its--; 2701 } 2702 2703 while (its--) { 2704 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2705 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 2707 /* update rhs: bb1 = bb - B*x */ 2708 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2709 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2710 2711 /* local sweep */ 2712 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2713 } 2714 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2715 if (flag & SOR_ZERO_INITIAL_GUESS) { 2716 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2717 its--; 2718 } 2719 while (its--) { 2720 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2721 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2722 2723 /* update rhs: bb1 = bb - B*x */ 2724 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2725 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2726 2727 /* local sweep */ 2728 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2729 } 2730 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2731 if (flag & SOR_ZERO_INITIAL_GUESS) { 2732 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2733 its--; 2734 } 2735 while (its--) { 2736 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2737 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2738 2739 /* update rhs: bb1 = bb - B*x */ 2740 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2741 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2742 2743 /* local sweep */ 2744 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2745 } 2746 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2747 2748 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2753 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2756 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values) 2757 { 2758 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2759 PetscErrorCode ierr; 2760 2761 PetscFunctionBegin; 2762 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2763 PetscFunctionReturn(0); 2764 } 2765 2766 2767 /* -------------------------------------------------------------------*/ 2768 static struct _MatOps MatOps_Values = { 2769 MatSetValues_MPIBAIJ, 2770 MatGetRow_MPIBAIJ, 2771 MatRestoreRow_MPIBAIJ, 2772 MatMult_MPIBAIJ, 2773 /* 4*/ MatMultAdd_MPIBAIJ, 2774 MatMultTranspose_MPIBAIJ, 2775 MatMultTransposeAdd_MPIBAIJ, 2776 0, 2777 0, 2778 0, 2779 /*10*/ 0, 2780 0, 2781 0, 2782 MatSOR_MPIBAIJ, 2783 MatTranspose_MPIBAIJ, 2784 /*15*/ MatGetInfo_MPIBAIJ, 2785 MatEqual_MPIBAIJ, 2786 MatGetDiagonal_MPIBAIJ, 2787 MatDiagonalScale_MPIBAIJ, 2788 MatNorm_MPIBAIJ, 2789 /*20*/ MatAssemblyBegin_MPIBAIJ, 2790 MatAssemblyEnd_MPIBAIJ, 2791 MatSetOption_MPIBAIJ, 2792 MatZeroEntries_MPIBAIJ, 2793 /*24*/ MatZeroRows_MPIBAIJ, 2794 0, 2795 0, 2796 0, 2797 0, 2798 /*29*/ MatSetUp_MPIBAIJ, 2799 0, 2800 0, 2801 0, 2802 0, 2803 /*34*/ MatDuplicate_MPIBAIJ, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /*39*/ MatAXPY_MPIBAIJ, 2809 MatGetSubMatrices_MPIBAIJ, 2810 MatIncreaseOverlap_MPIBAIJ, 2811 MatGetValues_MPIBAIJ, 2812 MatCopy_MPIBAIJ, 2813 /*44*/ 0, 2814 MatScale_MPIBAIJ, 2815 0, 2816 0, 2817 0, 2818 /*49*/ MatSetBlockSize_MPIBAIJ, 2819 0, 2820 0, 2821 0, 2822 0, 2823 /*54*/ MatFDColoringCreate_MPIBAIJ, 2824 0, 2825 MatSetUnfactored_MPIBAIJ, 2826 MatPermute_MPIBAIJ, 2827 MatSetValuesBlocked_MPIBAIJ, 2828 /*59*/ MatGetSubMatrix_MPIBAIJ, 2829 MatDestroy_MPIBAIJ, 2830 MatView_MPIBAIJ, 2831 0, 2832 0, 2833 /*64*/ 0, 2834 0, 2835 0, 2836 0, 2837 0, 2838 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2839 0, 2840 0, 2841 0, 2842 0, 2843 /*74*/ 0, 2844 MatFDColoringApply_BAIJ, 2845 0, 2846 0, 2847 0, 2848 /*79*/ 0, 2849 0, 2850 0, 2851 0, 2852 MatLoad_MPIBAIJ, 2853 /*84*/ 0, 2854 0, 2855 0, 2856 0, 2857 0, 2858 /*89*/ 0, 2859 0, 2860 0, 2861 0, 2862 0, 2863 /*94*/ 0, 2864 0, 2865 0, 2866 0, 2867 0, 2868 /*99*/ 0, 2869 0, 2870 0, 2871 0, 2872 0, 2873 /*104*/0, 2874 MatRealPart_MPIBAIJ, 2875 MatImaginaryPart_MPIBAIJ, 2876 0, 2877 0, 2878 /*109*/0, 2879 0, 2880 0, 2881 0, 2882 0, 2883 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2884 0, 2885 MatGetGhosts_MPIBAIJ, 2886 0, 2887 0, 2888 /*119*/0, 2889 0, 2890 0, 2891 0, 2892 0, 2893 /*124*/0, 2894 0, 2895 MatInvertBlockDiagonal_MPIBAIJ 2896 }; 2897 2898 EXTERN_C_BEGIN 2899 #undef __FUNCT__ 2900 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2901 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2902 { 2903 PetscFunctionBegin; 2904 *a = ((Mat_MPIBAIJ *)A->data)->A; 2905 PetscFunctionReturn(0); 2906 } 2907 EXTERN_C_END 2908 2909 EXTERN_C_BEGIN 2910 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2911 EXTERN_C_END 2912 2913 EXTERN_C_BEGIN 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2916 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2917 { 2918 PetscInt m,rstart,cstart,cend; 2919 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2920 const PetscInt *JJ=0; 2921 PetscScalar *values=0; 2922 PetscErrorCode ierr; 2923 2924 PetscFunctionBegin; 2925 2926 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2927 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2928 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2929 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2930 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2931 m = B->rmap->n/bs; 2932 rstart = B->rmap->rstart/bs; 2933 cstart = B->cmap->rstart/bs; 2934 cend = B->cmap->rend/bs; 2935 2936 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2937 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2938 for (i=0; i<m; i++) { 2939 nz = ii[i+1] - ii[i]; 2940 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2941 nz_max = PetscMax(nz_max,nz); 2942 JJ = jj + ii[i]; 2943 for (j=0; j<nz; j++) { 2944 if (*JJ >= cstart) break; 2945 JJ++; 2946 } 2947 d = 0; 2948 for (; j<nz; j++) { 2949 if (*JJ++ >= cend) break; 2950 d++; 2951 } 2952 d_nnz[i] = d; 2953 o_nnz[i] = nz - d; 2954 } 2955 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2956 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2957 2958 values = (PetscScalar*)V; 2959 if (!values) { 2960 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2961 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2962 } 2963 for (i=0; i<m; i++) { 2964 PetscInt row = i + rstart; 2965 PetscInt ncols = ii[i+1] - ii[i]; 2966 const PetscInt *icols = jj + ii[i]; 2967 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2968 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2969 } 2970 2971 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2972 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2973 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2974 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2975 PetscFunctionReturn(0); 2976 } 2977 EXTERN_C_END 2978 2979 #undef __FUNCT__ 2980 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2981 /*@C 2982 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2983 (the default parallel PETSc format). 2984 2985 Collective on MPI_Comm 2986 2987 Input Parameters: 2988 + A - the matrix 2989 . bs - the block size 2990 . i - the indices into j for the start of each local row (starts with zero) 2991 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2992 - v - optional values in the matrix 2993 2994 Level: developer 2995 2996 .keywords: matrix, aij, compressed row, sparse, parallel 2997 2998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2999 @*/ 3000 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3001 { 3002 PetscErrorCode ierr; 3003 3004 PetscFunctionBegin; 3005 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3006 PetscValidType(B,1); 3007 PetscValidLogicalCollectiveInt(B,bs,2); 3008 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 EXTERN_C_BEGIN 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3015 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3016 { 3017 Mat_MPIBAIJ *b; 3018 PetscErrorCode ierr; 3019 PetscInt i, newbs = PetscAbs(bs); 3020 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3021 3022 PetscFunctionBegin; 3023 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3024 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3025 if (bs < 0) { 3026 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3027 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3028 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3029 bs = PetscAbs(bs); 3030 } 3031 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3032 bs = newbs; 3033 3034 3035 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3036 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3037 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3038 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3039 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3040 3041 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3042 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3043 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3044 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3045 3046 if (d_nnz) { 3047 for (i=0; i<B->rmap->n/bs; i++) { 3048 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3049 } 3050 } 3051 if (o_nnz) { 3052 for (i=0; i<B->rmap->n/bs; i++) { 3053 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3054 } 3055 } 3056 3057 b = (Mat_MPIBAIJ*)B->data; 3058 b->bs2 = bs*bs; 3059 b->mbs = B->rmap->n/bs; 3060 b->nbs = B->cmap->n/bs; 3061 b->Mbs = B->rmap->N/bs; 3062 b->Nbs = B->cmap->N/bs; 3063 3064 for (i=0; i<=b->size; i++) { 3065 b->rangebs[i] = B->rmap->range[i]/bs; 3066 } 3067 b->rstartbs = B->rmap->rstart/bs; 3068 b->rendbs = B->rmap->rend/bs; 3069 b->cstartbs = B->cmap->rstart/bs; 3070 b->cendbs = B->cmap->rend/bs; 3071 3072 if (!B->preallocated) { 3073 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3074 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3075 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3076 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3077 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3078 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3079 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3080 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3081 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3082 } 3083 3084 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3085 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3086 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3087 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3088 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3089 B->preallocated = PETSC_TRUE; 3090 PetscFunctionReturn(0); 3091 } 3092 EXTERN_C_END 3093 3094 EXTERN_C_BEGIN 3095 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3096 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3097 EXTERN_C_END 3098 3099 3100 EXTERN_C_BEGIN 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3103 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3104 { 3105 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3106 PetscErrorCode ierr; 3107 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3108 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3109 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3110 3111 PetscFunctionBegin; 3112 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3113 ii[0] = 0; 3114 CHKMEMQ; 3115 for (i=0; i<M; i++) { 3116 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3117 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3118 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3119 /* remove one from count of matrix has diagonal */ 3120 for (j=id[i]; j<id[i+1]; j++) { 3121 if (jd[j] == i) {ii[i+1]--;break;} 3122 } 3123 CHKMEMQ; 3124 } 3125 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3126 cnt = 0; 3127 for (i=0; i<M; i++) { 3128 for (j=io[i]; j<io[i+1]; j++) { 3129 if (garray[jo[j]] > rstart) break; 3130 jj[cnt++] = garray[jo[j]]; 3131 CHKMEMQ; 3132 } 3133 for (k=id[i]; k<id[i+1]; k++) { 3134 if (jd[k] != i) { 3135 jj[cnt++] = rstart + jd[k]; 3136 CHKMEMQ; 3137 } 3138 } 3139 for (;j<io[i+1]; j++) { 3140 jj[cnt++] = garray[jo[j]]; 3141 CHKMEMQ; 3142 } 3143 } 3144 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3145 PetscFunctionReturn(0); 3146 } 3147 EXTERN_C_END 3148 3149 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3150 EXTERN_C_BEGIN 3151 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3152 EXTERN_C_END 3153 3154 EXTERN_C_BEGIN 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3157 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3158 { 3159 PetscErrorCode ierr; 3160 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3161 Mat B; 3162 Mat_MPIAIJ *b; 3163 3164 PetscFunctionBegin; 3165 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3166 3167 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3168 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3169 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3170 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3171 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3172 b = (Mat_MPIAIJ*) B->data; 3173 3174 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3175 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3176 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3177 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3178 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3179 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3180 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3181 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3182 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3183 if (reuse == MAT_REUSE_MATRIX) { 3184 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3185 } else { 3186 *newmat = B; 3187 } 3188 PetscFunctionReturn(0); 3189 } 3190 EXTERN_C_END 3191 3192 EXTERN_C_BEGIN 3193 #if defined(PETSC_HAVE_MUMPS) 3194 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3195 #endif 3196 EXTERN_C_END 3197 3198 /*MC 3199 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3200 3201 Options Database Keys: 3202 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3203 . -mat_block_size <bs> - set the blocksize used to store the matrix 3204 - -mat_use_hash_table <fact> 3205 3206 Level: beginner 3207 3208 .seealso: MatCreateMPIBAIJ 3209 M*/ 3210 3211 EXTERN_C_BEGIN 3212 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3213 EXTERN_C_END 3214 3215 EXTERN_C_BEGIN 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatCreate_MPIBAIJ" 3218 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3219 { 3220 Mat_MPIBAIJ *b; 3221 PetscErrorCode ierr; 3222 PetscBool flg; 3223 3224 PetscFunctionBegin; 3225 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3226 B->data = (void*)b; 3227 3228 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3229 B->assembled = PETSC_FALSE; 3230 3231 B->insertmode = NOT_SET_VALUES; 3232 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3233 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3234 3235 /* build local table of row and column ownerships */ 3236 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3237 3238 /* build cache for off array entries formed */ 3239 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3240 b->donotstash = PETSC_FALSE; 3241 b->colmap = PETSC_NULL; 3242 b->garray = PETSC_NULL; 3243 b->roworiented = PETSC_TRUE; 3244 3245 /* stuff used in block assembly */ 3246 b->barray = 0; 3247 3248 /* stuff used for matrix vector multiply */ 3249 b->lvec = 0; 3250 b->Mvctx = 0; 3251 3252 /* stuff for MatGetRow() */ 3253 b->rowindices = 0; 3254 b->rowvalues = 0; 3255 b->getrowactive = PETSC_FALSE; 3256 3257 /* hash table stuff */ 3258 b->ht = 0; 3259 b->hd = 0; 3260 b->ht_size = 0; 3261 b->ht_flag = PETSC_FALSE; 3262 b->ht_fact = 0; 3263 b->ht_total_ct = 0; 3264 b->ht_insert_ct = 0; 3265 3266 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3267 b->ijonly = PETSC_FALSE; 3268 3269 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3270 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3271 if (flg) { 3272 PetscReal fact = 1.39; 3273 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3274 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3275 if (fact <= 1.0) fact = 1.39; 3276 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3277 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3278 } 3279 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3280 3281 #if defined(PETSC_HAVE_MUMPS) 3282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3283 #endif 3284 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3285 "MatConvert_MPIBAIJ_MPIAdj", 3286 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3287 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3288 "MatConvert_MPIBAIJ_MPIAIJ", 3289 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3291 "MatConvert_MPIBAIJ_MPISBAIJ", 3292 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3293 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3294 "MatStoreValues_MPIBAIJ", 3295 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3296 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3297 "MatRetrieveValues_MPIBAIJ", 3298 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3299 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3300 "MatGetDiagonalBlock_MPIBAIJ", 3301 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3302 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3303 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3304 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3305 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3306 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3307 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3308 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3309 "MatDiagonalScaleLocal_MPIBAIJ", 3310 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3311 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3312 "MatSetHashTableFactor_MPIBAIJ", 3313 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3314 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3315 "MatConvert_MPIBAIJ_MPIBSTRM", 3316 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3317 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 EXTERN_C_END 3321 3322 /*MC 3323 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3324 3325 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3326 and MATMPIBAIJ otherwise. 3327 3328 Options Database Keys: 3329 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3330 3331 Level: beginner 3332 3333 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3334 M*/ 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3338 /*@C 3339 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3340 (block compressed row). For good matrix assembly performance 3341 the user should preallocate the matrix storage by setting the parameters 3342 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3343 performance can be increased by more than a factor of 50. 3344 3345 Collective on Mat 3346 3347 Input Parameters: 3348 + A - the matrix 3349 . bs - size of blockk 3350 . d_nz - number of block nonzeros per block row in diagonal portion of local 3351 submatrix (same for all local rows) 3352 . d_nnz - array containing the number of block nonzeros in the various block rows 3353 of the in diagonal portion of the local (possibly different for each block 3354 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3355 set it even if it is zero. 3356 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3357 submatrix (same for all local rows). 3358 - o_nnz - array containing the number of nonzeros in the various block rows of the 3359 off-diagonal portion of the local submatrix (possibly different for 3360 each block row) or PETSC_NULL. 3361 3362 If the *_nnz parameter is given then the *_nz parameter is ignored 3363 3364 Options Database Keys: 3365 + -mat_block_size - size of the blocks to use 3366 - -mat_use_hash_table <fact> 3367 3368 Notes: 3369 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3370 than it must be used on all processors that share the object for that argument. 3371 3372 Storage Information: 3373 For a square global matrix we define each processor's diagonal portion 3374 to be its local rows and the corresponding columns (a square submatrix); 3375 each processor's off-diagonal portion encompasses the remainder of the 3376 local matrix (a rectangular submatrix). 3377 3378 The user can specify preallocated storage for the diagonal part of 3379 the local submatrix with either d_nz or d_nnz (not both). Set 3380 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3381 memory allocation. Likewise, specify preallocated storage for the 3382 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3383 3384 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3385 the figure below we depict these three local rows and all columns (0-11). 3386 3387 .vb 3388 0 1 2 3 4 5 6 7 8 9 10 11 3389 ------------------- 3390 row 3 | o o o d d d o o o o o o 3391 row 4 | o o o d d d o o o o o o 3392 row 5 | o o o d d d o o o o o o 3393 ------------------- 3394 .ve 3395 3396 Thus, any entries in the d locations are stored in the d (diagonal) 3397 submatrix, and any entries in the o locations are stored in the 3398 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3399 stored simply in the MATSEQBAIJ format for compressed row storage. 3400 3401 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3402 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3403 In general, for PDE problems in which most nonzeros are near the diagonal, 3404 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3405 or you will get TERRIBLE performance; see the users' manual chapter on 3406 matrices. 3407 3408 You can call MatGetInfo() to get information on how effective the preallocation was; 3409 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3410 You can also run with the option -info and look for messages with the string 3411 malloc in them to see if additional memory allocation was needed. 3412 3413 Level: intermediate 3414 3415 .keywords: matrix, block, aij, compressed row, sparse, parallel 3416 3417 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3418 @*/ 3419 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3420 { 3421 PetscErrorCode ierr; 3422 3423 PetscFunctionBegin; 3424 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3425 PetscValidType(B,1); 3426 PetscValidLogicalCollectiveInt(B,bs,2); 3427 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3428 PetscFunctionReturn(0); 3429 } 3430 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "MatCreateMPIBAIJ" 3433 /*@C 3434 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3435 (block compressed row). For good matrix assembly performance 3436 the user should preallocate the matrix storage by setting the parameters 3437 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3438 performance can be increased by more than a factor of 50. 3439 3440 Collective on MPI_Comm 3441 3442 Input Parameters: 3443 + comm - MPI communicator 3444 . bs - size of blockk 3445 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3446 This value should be the same as the local size used in creating the 3447 y vector for the matrix-vector product y = Ax. 3448 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3449 This value should be the same as the local size used in creating the 3450 x vector for the matrix-vector product y = Ax. 3451 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3452 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3453 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3454 submatrix (same for all local rows) 3455 . d_nnz - array containing the number of nonzero blocks in the various block rows 3456 of the in diagonal portion of the local (possibly different for each block 3457 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3458 and set it even if it is zero. 3459 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3460 submatrix (same for all local rows). 3461 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3462 off-diagonal portion of the local submatrix (possibly different for 3463 each block row) or PETSC_NULL. 3464 3465 Output Parameter: 3466 . A - the matrix 3467 3468 Options Database Keys: 3469 + -mat_block_size - size of the blocks to use 3470 - -mat_use_hash_table <fact> 3471 3472 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3473 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3474 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3475 3476 Notes: 3477 If the *_nnz parameter is given then the *_nz parameter is ignored 3478 3479 A nonzero block is any block that as 1 or more nonzeros in it 3480 3481 The user MUST specify either the local or global matrix dimensions 3482 (possibly both). 3483 3484 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3485 than it must be used on all processors that share the object for that argument. 3486 3487 Storage Information: 3488 For a square global matrix we define each processor's diagonal portion 3489 to be its local rows and the corresponding columns (a square submatrix); 3490 each processor's off-diagonal portion encompasses the remainder of the 3491 local matrix (a rectangular submatrix). 3492 3493 The user can specify preallocated storage for the diagonal part of 3494 the local submatrix with either d_nz or d_nnz (not both). Set 3495 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3496 memory allocation. Likewise, specify preallocated storage for the 3497 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3498 3499 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3500 the figure below we depict these three local rows and all columns (0-11). 3501 3502 .vb 3503 0 1 2 3 4 5 6 7 8 9 10 11 3504 ------------------- 3505 row 3 | o o o d d d o o o o o o 3506 row 4 | o o o d d d o o o o o o 3507 row 5 | o o o d d d o o o o o o 3508 ------------------- 3509 .ve 3510 3511 Thus, any entries in the d locations are stored in the d (diagonal) 3512 submatrix, and any entries in the o locations are stored in the 3513 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3514 stored simply in the MATSEQBAIJ format for compressed row storage. 3515 3516 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3517 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3518 In general, for PDE problems in which most nonzeros are near the diagonal, 3519 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3520 or you will get TERRIBLE performance; see the users' manual chapter on 3521 matrices. 3522 3523 Level: intermediate 3524 3525 .keywords: matrix, block, aij, compressed row, sparse, parallel 3526 3527 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3528 @*/ 3529 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3530 { 3531 PetscErrorCode ierr; 3532 PetscMPIInt size; 3533 3534 PetscFunctionBegin; 3535 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3536 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3537 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3538 if (size > 1) { 3539 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3540 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3541 } else { 3542 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3543 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3544 } 3545 PetscFunctionReturn(0); 3546 } 3547 3548 #undef __FUNCT__ 3549 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3550 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3551 { 3552 Mat mat; 3553 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3554 PetscErrorCode ierr; 3555 PetscInt len=0; 3556 3557 PetscFunctionBegin; 3558 *newmat = 0; 3559 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3560 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3561 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3562 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3563 3564 mat->factortype = matin->factortype; 3565 mat->preallocated = PETSC_TRUE; 3566 mat->assembled = PETSC_TRUE; 3567 mat->insertmode = NOT_SET_VALUES; 3568 3569 a = (Mat_MPIBAIJ*)mat->data; 3570 mat->rmap->bs = matin->rmap->bs; 3571 a->bs2 = oldmat->bs2; 3572 a->mbs = oldmat->mbs; 3573 a->nbs = oldmat->nbs; 3574 a->Mbs = oldmat->Mbs; 3575 a->Nbs = oldmat->Nbs; 3576 3577 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3578 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3579 3580 a->size = oldmat->size; 3581 a->rank = oldmat->rank; 3582 a->donotstash = oldmat->donotstash; 3583 a->roworiented = oldmat->roworiented; 3584 a->rowindices = 0; 3585 a->rowvalues = 0; 3586 a->getrowactive = PETSC_FALSE; 3587 a->barray = 0; 3588 a->rstartbs = oldmat->rstartbs; 3589 a->rendbs = oldmat->rendbs; 3590 a->cstartbs = oldmat->cstartbs; 3591 a->cendbs = oldmat->cendbs; 3592 3593 /* hash table stuff */ 3594 a->ht = 0; 3595 a->hd = 0; 3596 a->ht_size = 0; 3597 a->ht_flag = oldmat->ht_flag; 3598 a->ht_fact = oldmat->ht_fact; 3599 a->ht_total_ct = 0; 3600 a->ht_insert_ct = 0; 3601 3602 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3603 if (oldmat->colmap) { 3604 #if defined (PETSC_USE_CTABLE) 3605 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3606 #else 3607 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3608 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3609 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3610 #endif 3611 } else a->colmap = 0; 3612 3613 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3614 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3615 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3616 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3617 } else a->garray = 0; 3618 3619 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3620 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3621 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3622 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3623 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3624 3625 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3626 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3627 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3628 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3629 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3630 *newmat = mat; 3631 3632 PetscFunctionReturn(0); 3633 } 3634 3635 #undef __FUNCT__ 3636 #define __FUNCT__ "MatLoad_MPIBAIJ" 3637 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3638 { 3639 PetscErrorCode ierr; 3640 int fd; 3641 PetscInt i,nz,j,rstart,rend; 3642 PetscScalar *vals,*buf; 3643 MPI_Comm comm = ((PetscObject)viewer)->comm; 3644 MPI_Status status; 3645 PetscMPIInt rank,size,maxnz; 3646 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3647 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3648 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3649 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3650 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3651 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3652 3653 PetscFunctionBegin; 3654 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3655 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3656 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3657 3658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3659 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3660 if (!rank) { 3661 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3662 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3663 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3664 } 3665 3666 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3667 3668 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3669 M = header[1]; N = header[2]; 3670 3671 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3672 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3673 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3674 3675 /* If global sizes are set, check if they are consistent with that given in the file */ 3676 if (sizesset) { 3677 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3678 } 3679 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3680 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3681 3682 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3683 3684 /* 3685 This code adds extra rows to make sure the number of rows is 3686 divisible by the blocksize 3687 */ 3688 Mbs = M/bs; 3689 extra_rows = bs - M + bs*Mbs; 3690 if (extra_rows == bs) extra_rows = 0; 3691 else Mbs++; 3692 if (extra_rows && !rank) { 3693 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3694 } 3695 3696 /* determine ownership of all rows */ 3697 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3698 mbs = Mbs/size + ((Mbs % size) > rank); 3699 m = mbs*bs; 3700 } else { /* User set */ 3701 m = newmat->rmap->n; 3702 mbs = m/bs; 3703 } 3704 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3705 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3706 3707 /* process 0 needs enough room for process with most rows */ 3708 if (!rank) { 3709 mmax = rowners[1]; 3710 for (i=2; i<size; i++) { 3711 mmax = PetscMax(mmax,rowners[i]); 3712 } 3713 mmax*=bs; 3714 } else mmax = m; 3715 3716 rowners[0] = 0; 3717 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3718 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3719 rstart = rowners[rank]; 3720 rend = rowners[rank+1]; 3721 3722 /* distribute row lengths to all processors */ 3723 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3724 if (!rank) { 3725 mend = m; 3726 if (size == 1) mend = mend - extra_rows; 3727 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3728 for (j=mend; j<m; j++) locrowlens[j] = 1; 3729 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3730 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3731 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3732 for (j=0; j<m; j++) { 3733 procsnz[0] += locrowlens[j]; 3734 } 3735 for (i=1; i<size; i++) { 3736 mend = browners[i+1] - browners[i]; 3737 if (i == size-1) mend = mend - extra_rows; 3738 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3739 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3740 /* calculate the number of nonzeros on each processor */ 3741 for (j=0; j<browners[i+1]-browners[i]; j++) { 3742 procsnz[i] += rowlengths[j]; 3743 } 3744 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3745 } 3746 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3747 } else { 3748 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3749 } 3750 3751 if (!rank) { 3752 /* determine max buffer needed and allocate it */ 3753 maxnz = procsnz[0]; 3754 for (i=1; i<size; i++) { 3755 maxnz = PetscMax(maxnz,procsnz[i]); 3756 } 3757 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3758 3759 /* read in my part of the matrix column indices */ 3760 nz = procsnz[0]; 3761 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3762 mycols = ibuf; 3763 if (size == 1) nz -= extra_rows; 3764 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3765 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3766 3767 /* read in every ones (except the last) and ship off */ 3768 for (i=1; i<size-1; i++) { 3769 nz = procsnz[i]; 3770 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3771 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3772 } 3773 /* read in the stuff for the last proc */ 3774 if (size != 1) { 3775 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3776 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3777 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3778 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3779 } 3780 ierr = PetscFree(cols);CHKERRQ(ierr); 3781 } else { 3782 /* determine buffer space needed for message */ 3783 nz = 0; 3784 for (i=0; i<m; i++) { 3785 nz += locrowlens[i]; 3786 } 3787 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3788 mycols = ibuf; 3789 /* receive message of column indices*/ 3790 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3791 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3792 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3793 } 3794 3795 /* loop over local rows, determining number of off diagonal entries */ 3796 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3797 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3798 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3799 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3800 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3801 rowcount = 0; nzcount = 0; 3802 for (i=0; i<mbs; i++) { 3803 dcount = 0; 3804 odcount = 0; 3805 for (j=0; j<bs; j++) { 3806 kmax = locrowlens[rowcount]; 3807 for (k=0; k<kmax; k++) { 3808 tmp = mycols[nzcount++]/bs; 3809 if (!mask[tmp]) { 3810 mask[tmp] = 1; 3811 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3812 else masked1[dcount++] = tmp; 3813 } 3814 } 3815 rowcount++; 3816 } 3817 3818 dlens[i] = dcount; 3819 odlens[i] = odcount; 3820 3821 /* zero out the mask elements we set */ 3822 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3823 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3824 } 3825 3826 3827 if (!sizesset) { 3828 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3829 } 3830 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3831 3832 if (!rank) { 3833 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3834 /* read in my part of the matrix numerical values */ 3835 nz = procsnz[0]; 3836 vals = buf; 3837 mycols = ibuf; 3838 if (size == 1) nz -= extra_rows; 3839 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3840 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3841 3842 /* insert into matrix */ 3843 jj = rstart*bs; 3844 for (i=0; i<m; i++) { 3845 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3846 mycols += locrowlens[i]; 3847 vals += locrowlens[i]; 3848 jj++; 3849 } 3850 /* read in other processors (except the last one) and ship out */ 3851 for (i=1; i<size-1; i++) { 3852 nz = procsnz[i]; 3853 vals = buf; 3854 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3855 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3856 } 3857 /* the last proc */ 3858 if (size != 1){ 3859 nz = procsnz[i] - extra_rows; 3860 vals = buf; 3861 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3862 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3863 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3864 } 3865 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3866 } else { 3867 /* receive numeric values */ 3868 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3869 3870 /* receive message of values*/ 3871 vals = buf; 3872 mycols = ibuf; 3873 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3874 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3875 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3876 3877 /* insert into matrix */ 3878 jj = rstart*bs; 3879 for (i=0; i<m; i++) { 3880 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3881 mycols += locrowlens[i]; 3882 vals += locrowlens[i]; 3883 jj++; 3884 } 3885 } 3886 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3887 ierr = PetscFree(buf);CHKERRQ(ierr); 3888 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3889 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3890 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3891 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3892 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 3895 PetscFunctionReturn(0); 3896 } 3897 3898 #undef __FUNCT__ 3899 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3900 /*@ 3901 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3902 3903 Input Parameters: 3904 . mat - the matrix 3905 . fact - factor 3906 3907 Not Collective, each process can use a different factor 3908 3909 Level: advanced 3910 3911 Notes: 3912 This can also be set by the command line option: -mat_use_hash_table <fact> 3913 3914 .keywords: matrix, hashtable, factor, HT 3915 3916 .seealso: MatSetOption() 3917 @*/ 3918 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3919 { 3920 PetscErrorCode ierr; 3921 3922 PetscFunctionBegin; 3923 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3924 PetscFunctionReturn(0); 3925 } 3926 3927 EXTERN_C_BEGIN 3928 #undef __FUNCT__ 3929 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3930 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3931 { 3932 Mat_MPIBAIJ *baij; 3933 3934 PetscFunctionBegin; 3935 baij = (Mat_MPIBAIJ*)mat->data; 3936 baij->ht_fact = fact; 3937 PetscFunctionReturn(0); 3938 } 3939 EXTERN_C_END 3940 3941 #undef __FUNCT__ 3942 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3943 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3944 { 3945 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3946 PetscFunctionBegin; 3947 *Ad = a->A; 3948 *Ao = a->B; 3949 *colmap = a->garray; 3950 PetscFunctionReturn(0); 3951 } 3952 3953 /* 3954 Special version for direct calls from Fortran (to eliminate two function call overheads 3955 */ 3956 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3957 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3958 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3959 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3960 #endif 3961 3962 #undef __FUNCT__ 3963 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3964 /*@C 3965 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3966 3967 Collective on Mat 3968 3969 Input Parameters: 3970 + mat - the matrix 3971 . min - number of input rows 3972 . im - input rows 3973 . nin - number of input columns 3974 . in - input columns 3975 . v - numerical values input 3976 - addvin - INSERT_VALUES or ADD_VALUES 3977 3978 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3979 3980 Level: advanced 3981 3982 .seealso: MatSetValuesBlocked() 3983 @*/ 3984 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3985 { 3986 /* convert input arguments to C version */ 3987 Mat mat = *matin; 3988 PetscInt m = *min, n = *nin; 3989 InsertMode addv = *addvin; 3990 3991 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3992 const MatScalar *value; 3993 MatScalar *barray=baij->barray; 3994 PetscBool roworiented = baij->roworiented; 3995 PetscErrorCode ierr; 3996 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3997 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3998 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3999 4000 PetscFunctionBegin; 4001 /* tasks normally handled by MatSetValuesBlocked() */ 4002 if (mat->insertmode == NOT_SET_VALUES) { 4003 mat->insertmode = addv; 4004 } 4005 #if defined(PETSC_USE_DEBUG) 4006 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4007 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4008 #endif 4009 if (mat->assembled) { 4010 mat->was_assembled = PETSC_TRUE; 4011 mat->assembled = PETSC_FALSE; 4012 } 4013 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4014 4015 4016 if(!barray) { 4017 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4018 baij->barray = barray; 4019 } 4020 4021 if (roworiented) { 4022 stepval = (n-1)*bs; 4023 } else { 4024 stepval = (m-1)*bs; 4025 } 4026 for (i=0; i<m; i++) { 4027 if (im[i] < 0) continue; 4028 #if defined(PETSC_USE_DEBUG) 4029 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4030 #endif 4031 if (im[i] >= rstart && im[i] < rend) { 4032 row = im[i] - rstart; 4033 for (j=0; j<n; j++) { 4034 /* If NumCol = 1 then a copy is not required */ 4035 if ((roworiented) && (n == 1)) { 4036 barray = (MatScalar*)v + i*bs2; 4037 } else if((!roworiented) && (m == 1)) { 4038 barray = (MatScalar*)v + j*bs2; 4039 } else { /* Here a copy is required */ 4040 if (roworiented) { 4041 value = v + i*(stepval+bs)*bs + j*bs; 4042 } else { 4043 value = v + j*(stepval+bs)*bs + i*bs; 4044 } 4045 for (ii=0; ii<bs; ii++,value+=stepval) { 4046 for (jj=0; jj<bs; jj++) { 4047 *barray++ = *value++; 4048 } 4049 } 4050 barray -=bs2; 4051 } 4052 4053 if (in[j] >= cstart && in[j] < cend){ 4054 col = in[j] - cstart; 4055 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4056 } 4057 else if (in[j] < 0) continue; 4058 #if defined(PETSC_USE_DEBUG) 4059 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4060 #endif 4061 else { 4062 if (mat->was_assembled) { 4063 if (!baij->colmap) { 4064 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4065 } 4066 4067 #if defined(PETSC_USE_DEBUG) 4068 #if defined (PETSC_USE_CTABLE) 4069 { PetscInt data; 4070 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4071 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4072 } 4073 #else 4074 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4075 #endif 4076 #endif 4077 #if defined (PETSC_USE_CTABLE) 4078 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4079 col = (col - 1)/bs; 4080 #else 4081 col = (baij->colmap[in[j]] - 1)/bs; 4082 #endif 4083 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4084 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4085 col = in[j]; 4086 } 4087 } 4088 else col = in[j]; 4089 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4090 } 4091 } 4092 } else { 4093 if (!baij->donotstash) { 4094 if (roworiented) { 4095 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4096 } else { 4097 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4098 } 4099 } 4100 } 4101 } 4102 4103 /* task normally handled by MatSetValuesBlocked() */ 4104 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4105 PetscFunctionReturn(0); 4106 } 4107 4108 #undef __FUNCT__ 4109 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4110 /*@ 4111 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4112 CSR format the local rows. 4113 4114 Collective on MPI_Comm 4115 4116 Input Parameters: 4117 + comm - MPI communicator 4118 . bs - the block size, only a block size of 1 is supported 4119 . m - number of local rows (Cannot be PETSC_DECIDE) 4120 . n - This value should be the same as the local size used in creating the 4121 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4122 calculated if N is given) For square matrices n is almost always m. 4123 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4124 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4125 . i - row indices 4126 . j - column indices 4127 - a - matrix values 4128 4129 Output Parameter: 4130 . mat - the matrix 4131 4132 Level: intermediate 4133 4134 Notes: 4135 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4136 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4137 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4138 4139 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4140 4141 .keywords: matrix, aij, compressed row, sparse, parallel 4142 4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4144 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4145 @*/ 4146 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4147 { 4148 PetscErrorCode ierr; 4149 4150 4151 PetscFunctionBegin; 4152 if (i[0]) { 4153 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4154 } 4155 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4156 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4157 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4158 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4159 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4160 PetscFunctionReturn(0); 4161 } 4162