1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1060 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(&A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1074 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1075 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1078 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1079 int fd; 1080 PetscScalar *column_values; 1081 FILE *file; 1082 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1083 PetscInt message_count,flowcontrolcount; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 for (i=0; i<a->mbs; i++) { 1109 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1110 } 1111 /* store the row lengths to the file */ 1112 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1126 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1127 for (k=0; k<rlen; k++) { 1128 for (j=0; j<bs; j++) { 1129 row_lens[k*bs+j] = bs*crow_lens[k]; 1130 } 1131 } 1132 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1133 } 1134 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1135 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1138 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1139 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1142 1143 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1144 information needed to make it for each row from a block row. This does require more communication but still not more than 1145 the communication needed for the nonzero values */ 1146 nzmax = nz; /* space a largest processor needs */ 1147 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1148 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1149 cnt = 0; 1150 for (i=0; i<a->mbs; i++) { 1151 pcnt = cnt; 1152 for (j=B->i[i]; j<B->i[i+1]; j++) { 1153 if ( (col = garray[B->j[j]]) > cstart) break; 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*col+l; 1156 } 1157 } 1158 for (k=A->i[i]; k<A->i[i+1]; k++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1161 } 1162 } 1163 for (; j<B->i[i+1]; j++) { 1164 for (l=0; l<bs; l++) { 1165 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1166 } 1167 } 1168 len = cnt - pcnt; 1169 for (k=1; k<bs; k++) { 1170 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1171 cnt += len; 1172 } 1173 } 1174 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1175 1176 /* store the columns to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1193 } 1194 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1195 1196 /* load up the numerical values */ 1197 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<a->mbs; i++) { 1200 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( garray[B->j[j]] > cstart) break; 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 for (; j<B->i[i+1]; j++) { 1219 for (l=0; l<bs; l++) { 1220 for (ll=0; ll<bs; ll++) { 1221 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1222 } 1223 } 1224 cnt += bs; 1225 } 1226 cnt += (bs-1)*rlen; 1227 } 1228 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1229 1230 /* store the column values to the file */ 1231 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1232 if (!rank) { 1233 MPI_Status status; 1234 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1237 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1238 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1245 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_values);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1251 if (file) { 1252 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatView_MPIBAIJ" 1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1260 { 1261 PetscErrorCode ierr; 1262 PetscBool iascii,isdraw,issocket,isbinary; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 if (iascii || isdraw || issocket) { 1270 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1271 } else if (isbinary) { 1272 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1282 { 1283 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 #if defined(PETSC_USE_LOG) 1288 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1289 #endif 1290 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1291 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1292 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1294 #if defined (PETSC_USE_CTABLE) 1295 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1296 #else 1297 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1300 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1301 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1302 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1306 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1307 1308 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMult_MPIBAIJ" 1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt nt; 1328 1329 PetscFunctionBegin; 1330 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1331 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1332 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1333 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1359 { 1360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscBool merged; 1363 1364 PetscFunctionBegin; 1365 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1366 /* do nondiagonal part */ 1367 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1368 if (!merged) { 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* do local part */ 1372 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1373 /* receive remote parts: note this assumes the values are not actually */ 1374 /* inserted in yy until the next line */ 1375 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 } else { 1377 /* do local part */ 1378 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* values actually were received in the Begin() but we need to call this nop */ 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1390 { 1391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 /* do nondiagonal part */ 1396 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1397 /* send it on its way */ 1398 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1399 /* do local part */ 1400 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1401 /* receive remote parts: note this assumes the values are not actually */ 1402 /* inserted in yy until the next line, which is true for my implementation*/ 1403 /* but is not perhaps always true. */ 1404 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* 1409 This only works correctly for square matrices where the subblock A->A is the 1410 diagonal block 1411 */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1421 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatScale_MPIBAIJ" 1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1430 PetscErrorCode ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1434 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1441 { 1442 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1443 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1444 PetscErrorCode ierr; 1445 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1446 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1447 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1448 1449 PetscFunctionBegin; 1450 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1451 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1452 mat->getrowactive = PETSC_TRUE; 1453 1454 if (!mat->rowvalues && (idx || v)) { 1455 /* 1456 allocate enough space to hold information from the longest row. 1457 */ 1458 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1459 PetscInt max = 1,mbs = mat->mbs,tmp; 1460 for (i=0; i<mbs; i++) { 1461 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1462 if (max < tmp) { max = tmp; } 1463 } 1464 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1465 } 1466 lrow = row - brstart; 1467 1468 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1469 if (!v) {pvA = 0; pvB = 0;} 1470 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1471 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1473 nztot = nzA + nzB; 1474 1475 cmap = mat->garray; 1476 if (v || idx) { 1477 if (nztot) { 1478 /* Sort by increasing column numbers, assuming A and B already sorted */ 1479 PetscInt imark = -1; 1480 if (v) { 1481 *v = v_p = mat->rowvalues; 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1484 else break; 1485 } 1486 imark = i; 1487 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1488 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1489 } 1490 if (idx) { 1491 *idx = idx_p = mat->rowindices; 1492 if (imark > -1) { 1493 for (i=0; i<imark; i++) { 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1495 } 1496 } else { 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) 1499 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1526 baij->getrowactive = PETSC_FALSE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1533 { 1534 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1539 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1548 Mat A = a->A,B = a->B; 1549 PetscErrorCode ierr; 1550 PetscReal isend[5],irecv[5]; 1551 1552 PetscFunctionBegin; 1553 info->block_size = (PetscReal)matin->rmap->bs; 1554 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1556 isend[3] = info->memory; isend[4] = info->mallocs; 1557 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else { 1581 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 } 1583 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1584 info->fill_ratio_needed = 0; 1585 info->factor_mallocs = 0; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1592 { 1593 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 switch (op) { 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1600 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1601 case MAT_KEEP_NONZERO_PATTERN: 1602 case MAT_NEW_NONZERO_LOCATION_ERR: 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_ROW_ORIENTED: 1607 a->roworiented = flg; 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1610 break; 1611 case MAT_NEW_DIAGONALS: 1612 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1613 break; 1614 case MAT_IGNORE_OFF_PROC_ENTRIES: 1615 a->donotstash = flg; 1616 break; 1617 case MAT_USE_HASH_TABLE: 1618 a->ht_flag = flg; 1619 break; 1620 case MAT_SYMMETRIC: 1621 case MAT_STRUCTURALLY_SYMMETRIC: 1622 case MAT_HERMITIAN: 1623 case MAT_SYMMETRY_ETERNAL: 1624 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1625 break; 1626 default: 1627 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1637 Mat_SeqBAIJ *Aloc; 1638 Mat B; 1639 PetscErrorCode ierr; 1640 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1641 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1642 MatScalar *a; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1649 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1650 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1651 } else { 1652 B = *matout; 1653 } 1654 1655 /* copy over the A part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1659 1660 for (i=0; i<mbs; i++) { 1661 rvals[0] = bs*(baij->rstartbs + i); 1662 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1663 for (j=ai[i]; j<ai[i+1]; j++) { 1664 col = (baij->cstartbs+aj[j])*bs; 1665 for (k=0; k<bs; k++) { 1666 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1667 col++; a += bs; 1668 } 1669 } 1670 } 1671 /* copy over the B part */ 1672 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1674 for (i=0; i<mbs; i++) { 1675 rvals[0] = bs*(baij->rstartbs + i); 1676 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1677 for (j=ai[i]; j<ai[i+1]; j++) { 1678 col = baij->garray[aj[j]]*bs; 1679 for (k=0; k<bs; k++) { 1680 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1681 col++; a += bs; 1682 } 1683 } 1684 } 1685 ierr = PetscFree(rvals);CHKERRQ(ierr); 1686 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 1689 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1690 *matout = B; 1691 } else { 1692 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1700 { 1701 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1702 Mat a = baij->A,b = baij->B; 1703 PetscErrorCode ierr; 1704 PetscInt s1,s2,s3; 1705 1706 PetscFunctionBegin; 1707 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1708 if (rr) { 1709 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1710 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1711 /* Overlap communication with computation. */ 1712 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1713 } 1714 if (ll) { 1715 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1716 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1717 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1718 } 1719 /* scale the diagonal block */ 1720 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1721 1722 if (rr) { 1723 /* Do a scatter end and then right scale the off-diagonal block */ 1724 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1726 } 1727 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1734 { 1735 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1738 PetscInt i,*owners = A->rmap->range; 1739 PetscInt *nprocs,j,idx,nsends,row; 1740 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1741 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1742 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1743 MPI_Comm comm = ((PetscObject)A)->comm; 1744 MPI_Request *send_waits,*recv_waits; 1745 MPI_Status recv_status,*send_status; 1746 const PetscScalar *xx; 1747 PetscScalar *bb; 1748 #if defined(PETSC_DEBUG) 1749 PetscBool found = PETSC_FALSE; 1750 #endif 1751 1752 PetscFunctionBegin; 1753 /* first count number of contributors to each processor */ 1754 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1755 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1757 j = 0; 1758 for (i=0; i<N; i++) { 1759 if (lastidx > (idx = rows[i])) j = 0; 1760 lastidx = idx; 1761 for (; j<size; j++) { 1762 if (idx >= owners[j] && idx < owners[j+1]) { 1763 nprocs[2*j]++; 1764 nprocs[2*j+1] = 1; 1765 owner[i] = j; 1766 #if defined(PETSC_DEBUG) 1767 found = PETSC_TRUE; 1768 #endif 1769 break; 1770 } 1771 } 1772 #if defined(PETSC_DEBUG) 1773 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1774 found = PETSC_FALSE; 1775 #endif 1776 } 1777 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1778 1779 if (A->nooffproczerorows) { 1780 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1781 nrecvs = nsends; 1782 nmax = N; 1783 } else { 1784 /* inform other processors of number of messages and max length*/ 1785 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1786 } 1787 1788 /* post receives: */ 1789 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1790 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1791 for (i=0; i<nrecvs; i++) { 1792 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1793 } 1794 1795 /* do sends: 1796 1) starts[i] gives the starting index in svalues for stuff going to 1797 the ith processor 1798 */ 1799 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1800 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1801 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1802 starts[0] = 0; 1803 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1804 for (i=0; i<N; i++) { 1805 svalues[starts[owner[i]]++] = rows[i]; 1806 } 1807 1808 starts[0] = 0; 1809 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1810 count = 0; 1811 for (i=0; i<size; i++) { 1812 if (nprocs[2*i+1]) { 1813 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1814 } 1815 } 1816 ierr = PetscFree(starts);CHKERRQ(ierr); 1817 1818 base = owners[rank]; 1819 1820 /* wait on receives */ 1821 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1822 count = nrecvs; 1823 slen = 0; 1824 while (count) { 1825 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1826 /* unpack receives into our local space */ 1827 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1828 source[imdex] = recv_status.MPI_SOURCE; 1829 lens[imdex] = n; 1830 slen += n; 1831 count--; 1832 } 1833 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1834 1835 /* move the data into the send scatter */ 1836 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1837 count = 0; 1838 for (i=0; i<nrecvs; i++) { 1839 values = rvalues + i*nmax; 1840 for (j=0; j<lens[i]; j++) { 1841 lrows[count++] = values[j] - base; 1842 } 1843 } 1844 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1845 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1846 ierr = PetscFree(owner);CHKERRQ(ierr); 1847 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1848 1849 /* fix right hand side if needed */ 1850 if (x && b) { 1851 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1852 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1853 for (i=0; i<slen; i++) { 1854 bb[lrows[i]] = diag*xx[lrows[i]]; 1855 } 1856 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1857 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1858 } 1859 1860 /* actually zap the local rows */ 1861 /* 1862 Zero the required rows. If the "diagonal block" of the matrix 1863 is square and the user wishes to set the diagonal we use separate 1864 code so that MatSetValues() is not called for each diagonal allocating 1865 new memory, thus calling lots of mallocs and slowing things down. 1866 1867 */ 1868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1869 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1872 } else if (diag != 0.0) { 1873 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1874 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1875 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1876 for (i=0; i<slen; i++) { 1877 row = lrows[i] + rstart_bs; 1878 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1879 } 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 } else { 1883 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1884 } 1885 1886 ierr = PetscFree(lrows);CHKERRQ(ierr); 1887 1888 /* wait on sends */ 1889 if (nsends) { 1890 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1891 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1892 ierr = PetscFree(send_status);CHKERRQ(ierr); 1893 } 1894 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1895 ierr = PetscFree(svalues);CHKERRQ(ierr); 1896 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1902 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1903 { 1904 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatEqual_MPIBAIJ" 1916 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1917 { 1918 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1919 Mat a,b,c,d; 1920 PetscBool flg; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 a = matA->A; b = matA->B; 1925 c = matB->A; d = matB->B; 1926 1927 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1928 if (flg) { 1929 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1930 } 1931 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatCopy_MPIBAIJ" 1937 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1938 { 1939 PetscErrorCode ierr; 1940 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1941 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1942 1943 PetscFunctionBegin; 1944 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1945 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1946 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1947 } else { 1948 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1949 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1950 } 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1956 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1957 { 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1967 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1968 { 1969 PetscErrorCode ierr; 1970 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1971 PetscBLASInt bnz,one=1; 1972 Mat_SeqBAIJ *x,*y; 1973 1974 PetscFunctionBegin; 1975 if (str == SAME_NONZERO_PATTERN) { 1976 PetscScalar alpha = a; 1977 x = (Mat_SeqBAIJ *)xx->A->data; 1978 y = (Mat_SeqBAIJ *)yy->A->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 x = (Mat_SeqBAIJ *)xx->B->data; 1982 y = (Mat_SeqBAIJ *)yy->B->data; 1983 bnz = PetscBLASIntCast(x->nz); 1984 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1985 } else { 1986 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1993 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1994 { 1995 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1996 PetscInt rbs,cbs; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2001 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2002 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2003 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2004 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2005 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2011 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2012 { 2013 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2018 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2025 { 2026 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2031 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2038 { 2039 PetscErrorCode ierr; 2040 IS iscol_local; 2041 PetscInt csize; 2042 2043 PetscFunctionBegin; 2044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2045 if (call == MAT_REUSE_MATRIX) { 2046 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2047 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2048 } else { 2049 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2050 } 2051 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2052 if (call == MAT_INITIAL_MATRIX) { 2053 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2054 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2061 /* 2062 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2063 in local and then by concatenating the local matrices the end result. 2064 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2065 */ 2066 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2067 { 2068 PetscErrorCode ierr; 2069 PetscMPIInt rank,size; 2070 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2071 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2072 Mat *local,M,Mreuse; 2073 MatScalar *vwork,*aa; 2074 MPI_Comm comm = ((PetscObject)mat)->comm; 2075 Mat_SeqBAIJ *aij; 2076 2077 2078 PetscFunctionBegin; 2079 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2081 2082 if (call == MAT_REUSE_MATRIX) { 2083 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2084 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2085 local = &Mreuse; 2086 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2089 Mreuse = *local; 2090 ierr = PetscFree(local);CHKERRQ(ierr); 2091 } 2092 2093 /* 2094 m - number of local rows 2095 n - number of columns (same on all processors) 2096 rstart - first row in new global matrix generated 2097 */ 2098 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2099 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2100 m = m/bs; 2101 n = n/bs; 2102 2103 if (call == MAT_INITIAL_MATRIX) { 2104 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2105 ii = aij->i; 2106 jj = aij->j; 2107 2108 /* 2109 Determine the number of non-zeros in the diagonal and off-diagonal 2110 portions of the matrix in order to do correct preallocation 2111 */ 2112 2113 /* first get start and end of "diagonal" columns */ 2114 if (csize == PETSC_DECIDE) { 2115 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2116 if (mglobal == n*bs) { /* square matrix */ 2117 nlocal = m; 2118 } else { 2119 nlocal = n/size + ((n % size) > rank); 2120 } 2121 } else { 2122 nlocal = csize/bs; 2123 } 2124 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2125 rstart = rend - nlocal; 2126 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2130 olens = dlens + m; 2131 for (i=0; i<m; i++) { 2132 jend = ii[i+1] - ii[i]; 2133 olen = 0; 2134 dlen = 0; 2135 for (j=0; j<jend; j++) { 2136 if (*jj < rstart || *jj >= rend) olen++; 2137 else dlen++; 2138 jj++; 2139 } 2140 olens[i] = olen; 2141 dlens[i] = dlen; 2142 } 2143 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2144 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2145 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2146 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2147 ierr = PetscFree(dlens);CHKERRQ(ierr); 2148 } else { 2149 PetscInt ml,nl; 2150 2151 M = *newmat; 2152 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2153 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2154 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2155 /* 2156 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2157 rather than the slower MatSetValues(). 2158 */ 2159 M->was_assembled = PETSC_TRUE; 2160 M->assembled = PETSC_FALSE; 2161 } 2162 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2163 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2164 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 aa = aij->a; 2168 for (i=0; i<m; i++) { 2169 row = rstart/bs + i; 2170 nz = ii[i+1] - ii[i]; 2171 cwork = jj; jj += nz; 2172 vwork = aa; aa += nz; 2173 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 2176 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 *newmat = M; 2179 2180 /* save submatrix used in processor for next request */ 2181 if (call == MAT_INITIAL_MATRIX) { 2182 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2183 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2184 } 2185 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatPermute_MPIBAIJ" 2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2192 { 2193 MPI_Comm comm,pcomm; 2194 PetscInt first,local_size,nrows; 2195 const PetscInt *rows; 2196 PetscMPIInt size; 2197 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2202 /* make a collective version of 'rowp' */ 2203 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2204 if (pcomm==comm) { 2205 crowp = rowp; 2206 } else { 2207 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2208 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2209 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2211 } 2212 /* collect the global row permutation and invert it */ 2213 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2214 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2215 if (pcomm!=comm) { 2216 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2217 } 2218 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2219 /* get the local target indices */ 2220 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2224 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2225 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2226 /* the column permutation is so much easier; 2227 make a local version of 'colp' and invert it */ 2228 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2229 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2230 if (size==1) { 2231 lcolp = colp; 2232 } else { 2233 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2234 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2235 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2236 } 2237 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2238 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2239 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2240 if (size>1) { 2241 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2242 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2243 } 2244 /* now we just get the submatrix */ 2245 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2246 /* clean up */ 2247 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2248 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2254 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2255 { 2256 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2257 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2258 2259 PetscFunctionBegin; 2260 if (nghosts) { *nghosts = B->nbs;} 2261 if (ghosts) {*ghosts = baij->garray;} 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2269 /* 2270 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2271 */ 2272 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2273 { 2274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2275 PetscErrorCode ierr; 2276 PetscMPIInt size,*ncolsonproc,*disp,nn; 2277 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2278 const PetscInt *is; 2279 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2280 PetscInt *rowhit,M,cstart,cend,colb; 2281 PetscInt *columnsforrow,l; 2282 IS *isa; 2283 PetscBool done,flg; 2284 ISLocalToGlobalMapping map = mat->cbmapping; 2285 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2286 2287 PetscFunctionBegin; 2288 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2289 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2290 2291 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2292 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2293 M = mat->rmap->n/bs; 2294 cstart = mat->cmap->rstart/bs; 2295 cend = mat->cmap->rend/bs; 2296 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2297 c->N = mat->cmap->N/bs; 2298 c->m = mat->rmap->n/bs; 2299 c->rstart = mat->rmap->rstart/bs; 2300 2301 c->ncolors = nis; 2302 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2307 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2308 2309 /* Allow access to data structures of local part of matrix */ 2310 if (!baij->colmap) { 2311 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2312 } 2313 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2314 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2315 2316 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2317 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2318 2319 for (i=0; i<nis; i++) { 2320 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2321 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2322 c->ncolumns[i] = n; 2323 if (n) { 2324 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2325 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2327 } else { 2328 c->columns[i] = 0; 2329 } 2330 2331 if (ctype == IS_COLORING_GLOBAL){ 2332 /* Determine the total (parallel) number of columns of this color */ 2333 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2334 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2335 2336 nn = PetscMPIIntCast(n); 2337 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2338 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2339 if (!nctot) { 2340 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2341 } 2342 2343 disp[0] = 0; 2344 for (j=1; j<size; j++) { 2345 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2346 } 2347 2348 /* Get complete list of columns for color on each processor */ 2349 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2350 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2351 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2352 } else if (ctype == IS_COLORING_GHOSTED){ 2353 /* Determine local number of columns of this color on this process, including ghost points */ 2354 nctot = n; 2355 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2356 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2357 } else { 2358 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2359 } 2360 2361 /* 2362 Mark all rows affect by these columns 2363 */ 2364 /* Temporary option to allow for debugging/testing */ 2365 flg = PETSC_FALSE; 2366 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2367 if (!flg) {/*-----------------------------------------------------------------------------*/ 2368 /* crude, fast version */ 2369 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2370 /* loop over columns*/ 2371 for (j=0; j<nctot; j++) { 2372 if (ctype == IS_COLORING_GHOSTED) { 2373 col = ltog[cols[j]]; 2374 } else { 2375 col = cols[j]; 2376 } 2377 if (col >= cstart && col < cend) { 2378 /* column is in diagonal block of matrix */ 2379 rows = A_cj + A_ci[col-cstart]; 2380 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2381 } else { 2382 #if defined (PETSC_USE_CTABLE) 2383 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2384 colb --; 2385 #else 2386 colb = baij->colmap[col] - 1; 2387 #endif 2388 if (colb == -1) { 2389 m = 0; 2390 } else { 2391 colb = colb/bs; 2392 rows = B_cj + B_ci[colb]; 2393 m = B_ci[colb+1] - B_ci[colb]; 2394 } 2395 } 2396 /* loop over columns marking them in rowhit */ 2397 for (k=0; k<m; k++) { 2398 rowhit[*rows++] = col + 1; 2399 } 2400 } 2401 2402 /* count the number of hits */ 2403 nrows = 0; 2404 for (j=0; j<M; j++) { 2405 if (rowhit[j]) nrows++; 2406 } 2407 c->nrows[i] = nrows; 2408 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2409 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2410 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2411 nrows = 0; 2412 for (j=0; j<M; j++) { 2413 if (rowhit[j]) { 2414 c->rows[i][nrows] = j; 2415 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2416 nrows++; 2417 } 2418 } 2419 } else {/*-------------------------------------------------------------------------------*/ 2420 /* slow version, using rowhit as a linked list */ 2421 PetscInt currentcol,fm,mfm; 2422 rowhit[M] = M; 2423 nrows = 0; 2424 /* loop over columns*/ 2425 for (j=0; j<nctot; j++) { 2426 if (ctype == IS_COLORING_GHOSTED) { 2427 col = ltog[cols[j]]; 2428 } else { 2429 col = cols[j]; 2430 } 2431 if (col >= cstart && col < cend) { 2432 /* column is in diagonal block of matrix */ 2433 rows = A_cj + A_ci[col-cstart]; 2434 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2435 } else { 2436 #if defined (PETSC_USE_CTABLE) 2437 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2438 colb --; 2439 #else 2440 colb = baij->colmap[col] - 1; 2441 #endif 2442 if (colb == -1) { 2443 m = 0; 2444 } else { 2445 colb = colb/bs; 2446 rows = B_cj + B_ci[colb]; 2447 m = B_ci[colb+1] - B_ci[colb]; 2448 } 2449 } 2450 2451 /* loop over columns marking them in rowhit */ 2452 fm = M; /* fm points to first entry in linked list */ 2453 for (k=0; k<m; k++) { 2454 currentcol = *rows++; 2455 /* is it already in the list? */ 2456 do { 2457 mfm = fm; 2458 fm = rowhit[fm]; 2459 } while (fm < currentcol); 2460 /* not in list so add it */ 2461 if (fm != currentcol) { 2462 nrows++; 2463 columnsforrow[currentcol] = col; 2464 /* next three lines insert new entry into linked list */ 2465 rowhit[mfm] = currentcol; 2466 rowhit[currentcol] = fm; 2467 fm = currentcol; 2468 /* fm points to present position in list since we know the columns are sorted */ 2469 } else { 2470 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2471 } 2472 } 2473 } 2474 c->nrows[i] = nrows; 2475 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2477 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2478 /* now store the linked list of rows into c->rows[i] */ 2479 nrows = 0; 2480 fm = rowhit[M]; 2481 do { 2482 c->rows[i][nrows] = fm; 2483 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2484 fm = rowhit[fm]; 2485 } while (fm < M); 2486 } /* ---------------------------------------------------------------------------------------*/ 2487 ierr = PetscFree(cols);CHKERRQ(ierr); 2488 } 2489 2490 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2491 /* 2492 vscale will contain the "diagonal" on processor scalings followed by the off processor 2493 */ 2494 if (ctype == IS_COLORING_GLOBAL) { 2495 PetscInt *garray; 2496 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2497 for (i=0; i<baij->B->cmap->n/bs; i++) { 2498 for (j=0; j<bs; j++) { 2499 garray[i*bs+j] = bs*baij->garray[i]+j; 2500 } 2501 } 2502 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2503 ierr = PetscFree(garray);CHKERRQ(ierr); 2504 CHKMEMQ; 2505 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2506 for (k=0; k<c->ncolors; k++) { 2507 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2508 for (l=0; l<c->nrows[k]; l++) { 2509 col = c->columnsforrow[k][l]; 2510 if (col >= cstart && col < cend) { 2511 /* column is in diagonal block of matrix */ 2512 colb = col - cstart; 2513 } else { 2514 /* column is in "off-processor" part */ 2515 #if defined (PETSC_USE_CTABLE) 2516 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2517 colb --; 2518 #else 2519 colb = baij->colmap[col] - 1; 2520 #endif 2521 colb = colb/bs; 2522 colb += cend - cstart; 2523 } 2524 c->vscaleforrow[k][l] = colb; 2525 } 2526 } 2527 } else if (ctype == IS_COLORING_GHOSTED) { 2528 /* Get gtol mapping */ 2529 PetscInt N = mat->cmap->N, *gtol; 2530 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2531 for (i=0; i<N; i++) gtol[i] = -1; 2532 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2533 2534 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2535 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2536 for (k=0; k<c->ncolors; k++) { 2537 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2538 for (l=0; l<c->nrows[k]; l++) { 2539 col = c->columnsforrow[k][l]; /* global column index */ 2540 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2541 } 2542 } 2543 ierr = PetscFree(gtol);CHKERRQ(ierr); 2544 } 2545 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2546 2547 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2548 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2549 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2550 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2551 CHKMEMQ; 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2557 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2558 { 2559 Mat B; 2560 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2561 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2562 Mat_SeqAIJ *b; 2563 PetscErrorCode ierr; 2564 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2565 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2566 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2567 2568 PetscFunctionBegin; 2569 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2570 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2571 2572 /* ---------------------------------------------------------------- 2573 Tell every processor the number of nonzeros per row 2574 */ 2575 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2576 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2577 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2578 } 2579 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2580 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2581 displs = recvcounts + size; 2582 for (i=0; i<size; i++) { 2583 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2584 displs[i] = A->rmap->range[i]/bs; 2585 } 2586 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2587 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2588 #else 2589 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2590 #endif 2591 /* --------------------------------------------------------------- 2592 Create the sequential matrix of the same type as the local block diagonal 2593 */ 2594 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2595 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2596 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2598 b = (Mat_SeqAIJ *)B->data; 2599 2600 /*-------------------------------------------------------------------- 2601 Copy my part of matrix column indices over 2602 */ 2603 sendcount = ad->nz + bd->nz; 2604 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2605 a_jsendbuf = ad->j; 2606 b_jsendbuf = bd->j; 2607 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2608 cnt = 0; 2609 for (i=0; i<n; i++) { 2610 2611 /* put in lower diagonal portion */ 2612 m = bd->i[i+1] - bd->i[i]; 2613 while (m > 0) { 2614 /* is it above diagonal (in bd (compressed) numbering) */ 2615 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2616 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2617 m--; 2618 } 2619 2620 /* put in diagonal portion */ 2621 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2622 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2623 } 2624 2625 /* put in upper diagonal portion */ 2626 while (m-- > 0) { 2627 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2628 } 2629 } 2630 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2631 2632 /*-------------------------------------------------------------------- 2633 Gather all column indices to all processors 2634 */ 2635 for (i=0; i<size; i++) { 2636 recvcounts[i] = 0; 2637 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2638 recvcounts[i] += lens[j]; 2639 } 2640 } 2641 displs[0] = 0; 2642 for (i=1; i<size; i++) { 2643 displs[i] = displs[i-1] + recvcounts[i-1]; 2644 } 2645 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2646 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2647 #else 2648 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2649 #endif 2650 /*-------------------------------------------------------------------- 2651 Assemble the matrix into useable form (note numerical values not yet set) 2652 */ 2653 /* set the b->ilen (length of each row) values */ 2654 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2655 /* set the b->i indices */ 2656 b->i[0] = 0; 2657 for (i=1; i<=A->rmap->N/bs; i++) { 2658 b->i[i] = b->i[i-1] + lens[i-1]; 2659 } 2660 ierr = PetscFree(lens);CHKERRQ(ierr); 2661 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2664 2665 if (A->symmetric){ 2666 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2667 } else if (A->hermitian) { 2668 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2669 } else if (A->structurally_symmetric) { 2670 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2671 } 2672 *newmat = B; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatSOR_MPIBAIJ" 2678 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2679 { 2680 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2681 PetscErrorCode ierr; 2682 Vec bb1 = 0; 2683 2684 PetscFunctionBegin; 2685 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2686 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2687 } 2688 2689 if (flag == SOR_APPLY_UPPER) { 2690 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2695 if (flag & SOR_ZERO_INITIAL_GUESS) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2697 its--; 2698 } 2699 2700 while (its--) { 2701 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 2704 /* update rhs: bb1 = bb - B*x */ 2705 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2706 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2707 2708 /* local sweep */ 2709 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2710 } 2711 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2712 if (flag & SOR_ZERO_INITIAL_GUESS) { 2713 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2714 its--; 2715 } 2716 while (its--) { 2717 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 2720 /* update rhs: bb1 = bb - B*x */ 2721 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2722 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2723 2724 /* local sweep */ 2725 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2726 } 2727 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2728 if (flag & SOR_ZERO_INITIAL_GUESS) { 2729 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2730 its--; 2731 } 2732 while (its--) { 2733 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2734 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 2736 /* update rhs: bb1 = bb - B*x */ 2737 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2738 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2739 2740 /* local sweep */ 2741 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2742 } 2743 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2744 2745 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2750 2751 2752 /* -------------------------------------------------------------------*/ 2753 static struct _MatOps MatOps_Values = { 2754 MatSetValues_MPIBAIJ, 2755 MatGetRow_MPIBAIJ, 2756 MatRestoreRow_MPIBAIJ, 2757 MatMult_MPIBAIJ, 2758 /* 4*/ MatMultAdd_MPIBAIJ, 2759 MatMultTranspose_MPIBAIJ, 2760 MatMultTransposeAdd_MPIBAIJ, 2761 0, 2762 0, 2763 0, 2764 /*10*/ 0, 2765 0, 2766 0, 2767 MatSOR_MPIBAIJ, 2768 MatTranspose_MPIBAIJ, 2769 /*15*/ MatGetInfo_MPIBAIJ, 2770 MatEqual_MPIBAIJ, 2771 MatGetDiagonal_MPIBAIJ, 2772 MatDiagonalScale_MPIBAIJ, 2773 MatNorm_MPIBAIJ, 2774 /*20*/ MatAssemblyBegin_MPIBAIJ, 2775 MatAssemblyEnd_MPIBAIJ, 2776 MatSetOption_MPIBAIJ, 2777 MatZeroEntries_MPIBAIJ, 2778 /*24*/ MatZeroRows_MPIBAIJ, 2779 0, 2780 0, 2781 0, 2782 0, 2783 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*34*/ MatDuplicate_MPIBAIJ, 2789 0, 2790 0, 2791 0, 2792 0, 2793 /*39*/ MatAXPY_MPIBAIJ, 2794 MatGetSubMatrices_MPIBAIJ, 2795 MatIncreaseOverlap_MPIBAIJ, 2796 MatGetValues_MPIBAIJ, 2797 MatCopy_MPIBAIJ, 2798 /*44*/ 0, 2799 MatScale_MPIBAIJ, 2800 0, 2801 0, 2802 0, 2803 /*49*/ MatSetBlockSize_MPIBAIJ, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /*54*/ MatFDColoringCreate_MPIBAIJ, 2809 0, 2810 MatSetUnfactored_MPIBAIJ, 2811 MatPermute_MPIBAIJ, 2812 MatSetValuesBlocked_MPIBAIJ, 2813 /*59*/ MatGetSubMatrix_MPIBAIJ, 2814 MatDestroy_MPIBAIJ, 2815 MatView_MPIBAIJ, 2816 0, 2817 0, 2818 /*64*/ 0, 2819 0, 2820 0, 2821 0, 2822 0, 2823 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2824 0, 2825 0, 2826 0, 2827 0, 2828 /*74*/ 0, 2829 MatFDColoringApply_BAIJ, 2830 0, 2831 0, 2832 0, 2833 /*79*/ 0, 2834 0, 2835 0, 2836 0, 2837 MatLoad_MPIBAIJ, 2838 /*84*/ 0, 2839 0, 2840 0, 2841 0, 2842 0, 2843 /*89*/ 0, 2844 0, 2845 0, 2846 0, 2847 0, 2848 /*94*/ 0, 2849 0, 2850 0, 2851 0, 2852 0, 2853 /*99*/ 0, 2854 0, 2855 0, 2856 0, 2857 0, 2858 /*104*/0, 2859 MatRealPart_MPIBAIJ, 2860 MatImaginaryPart_MPIBAIJ, 2861 0, 2862 0, 2863 /*109*/0, 2864 0, 2865 0, 2866 0, 2867 0, 2868 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2869 0, 2870 MatGetGhosts_MPIBAIJ, 2871 0, 2872 0, 2873 /*119*/0, 2874 0, 2875 0, 2876 0 2877 }; 2878 2879 EXTERN_C_BEGIN 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2882 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2883 { 2884 PetscFunctionBegin; 2885 *a = ((Mat_MPIBAIJ *)A->data)->A; 2886 PetscFunctionReturn(0); 2887 } 2888 EXTERN_C_END 2889 2890 EXTERN_C_BEGIN 2891 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2892 EXTERN_C_END 2893 2894 EXTERN_C_BEGIN 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2897 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2898 { 2899 PetscInt m,rstart,cstart,cend; 2900 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2901 const PetscInt *JJ=0; 2902 PetscScalar *values=0; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 2907 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2908 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2911 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2912 m = B->rmap->n/bs; 2913 rstart = B->rmap->rstart/bs; 2914 cstart = B->cmap->rstart/bs; 2915 cend = B->cmap->rend/bs; 2916 2917 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2918 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2919 for (i=0; i<m; i++) { 2920 nz = ii[i+1] - ii[i]; 2921 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2922 nz_max = PetscMax(nz_max,nz); 2923 JJ = jj + ii[i]; 2924 for (j=0; j<nz; j++) { 2925 if (*JJ >= cstart) break; 2926 JJ++; 2927 } 2928 d = 0; 2929 for (; j<nz; j++) { 2930 if (*JJ++ >= cend) break; 2931 d++; 2932 } 2933 d_nnz[i] = d; 2934 o_nnz[i] = nz - d; 2935 } 2936 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2937 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2938 2939 values = (PetscScalar*)V; 2940 if (!values) { 2941 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2942 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2943 } 2944 for (i=0; i<m; i++) { 2945 PetscInt row = i + rstart; 2946 PetscInt ncols = ii[i+1] - ii[i]; 2947 const PetscInt *icols = jj + ii[i]; 2948 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2949 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2950 } 2951 2952 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 2956 PetscFunctionReturn(0); 2957 } 2958 EXTERN_C_END 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2962 /*@C 2963 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2964 (the default parallel PETSc format). 2965 2966 Collective on MPI_Comm 2967 2968 Input Parameters: 2969 + A - the matrix 2970 . bs - the block size 2971 . i - the indices into j for the start of each local row (starts with zero) 2972 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2973 - v - optional values in the matrix 2974 2975 Level: developer 2976 2977 .keywords: matrix, aij, compressed row, sparse, parallel 2978 2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2980 @*/ 2981 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2982 { 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 EXTERN_C_BEGIN 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2993 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2994 { 2995 Mat_MPIBAIJ *b; 2996 PetscErrorCode ierr; 2997 PetscInt i, newbs = PetscAbs(bs); 2998 2999 PetscFunctionBegin; 3000 if (bs < 0) { 3001 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3002 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3003 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3004 bs = PetscAbs(bs); 3005 } 3006 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3007 bs = newbs; 3008 3009 3010 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3011 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3012 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3013 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3014 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3015 3016 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3020 3021 if (d_nnz) { 3022 for (i=0; i<B->rmap->n/bs; i++) { 3023 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3024 } 3025 } 3026 if (o_nnz) { 3027 for (i=0; i<B->rmap->n/bs; i++) { 3028 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3029 } 3030 } 3031 3032 b = (Mat_MPIBAIJ*)B->data; 3033 b->bs2 = bs*bs; 3034 b->mbs = B->rmap->n/bs; 3035 b->nbs = B->cmap->n/bs; 3036 b->Mbs = B->rmap->N/bs; 3037 b->Nbs = B->cmap->N/bs; 3038 3039 for (i=0; i<=b->size; i++) { 3040 b->rangebs[i] = B->rmap->range[i]/bs; 3041 } 3042 b->rstartbs = B->rmap->rstart/bs; 3043 b->rendbs = B->rmap->rend/bs; 3044 b->cstartbs = B->cmap->rstart/bs; 3045 b->cendbs = B->cmap->rend/bs; 3046 3047 if (!B->preallocated) { 3048 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3049 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3050 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3051 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3052 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3053 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3054 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3055 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3056 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3057 } 3058 3059 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3060 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3061 B->preallocated = PETSC_TRUE; 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 EXTERN_C_BEGIN 3067 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3068 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3069 EXTERN_C_END 3070 3071 3072 EXTERN_C_BEGIN 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3075 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3076 { 3077 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3078 PetscErrorCode ierr; 3079 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3080 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3081 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3082 3083 PetscFunctionBegin; 3084 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3085 ii[0] = 0; 3086 CHKMEMQ; 3087 for (i=0; i<M; i++) { 3088 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3089 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3090 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3091 /* remove one from count of matrix has diagonal */ 3092 for (j=id[i]; j<id[i+1]; j++) { 3093 if (jd[j] == i) {ii[i+1]--;break;} 3094 } 3095 CHKMEMQ; 3096 } 3097 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3098 cnt = 0; 3099 for (i=0; i<M; i++) { 3100 for (j=io[i]; j<io[i+1]; j++) { 3101 if (garray[jo[j]] > rstart) break; 3102 jj[cnt++] = garray[jo[j]]; 3103 CHKMEMQ; 3104 } 3105 for (k=id[i]; k<id[i+1]; k++) { 3106 if (jd[k] != i) { 3107 jj[cnt++] = rstart + jd[k]; 3108 CHKMEMQ; 3109 } 3110 } 3111 for (;j<io[i+1]; j++) { 3112 jj[cnt++] = garray[jo[j]]; 3113 CHKMEMQ; 3114 } 3115 } 3116 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 EXTERN_C_END 3120 3121 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3122 EXTERN_C_BEGIN 3123 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3124 EXTERN_C_END 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3129 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3130 { 3131 PetscErrorCode ierr; 3132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3133 Mat B; 3134 Mat_MPIAIJ *b; 3135 3136 PetscFunctionBegin; 3137 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3138 3139 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3140 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3141 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3142 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3143 b = (Mat_MPIAIJ*) B->data; 3144 3145 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3146 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3147 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3148 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3149 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3150 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 if (reuse == MAT_REUSE_MATRIX) { 3155 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3156 } else { 3157 *newmat = B; 3158 } 3159 PetscFunctionReturn(0); 3160 } 3161 EXTERN_C_END 3162 3163 EXTERN_C_BEGIN 3164 #if defined(PETSC_HAVE_MUMPS) 3165 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3166 #endif 3167 EXTERN_C_END 3168 3169 /*MC 3170 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3171 3172 Options Database Keys: 3173 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3174 . -mat_block_size <bs> - set the blocksize used to store the matrix 3175 - -mat_use_hash_table <fact> 3176 3177 Level: beginner 3178 3179 .seealso: MatCreateMPIBAIJ 3180 M*/ 3181 3182 EXTERN_C_BEGIN 3183 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3184 EXTERN_C_END 3185 3186 EXTERN_C_BEGIN 3187 #undef __FUNCT__ 3188 #define __FUNCT__ "MatCreate_MPIBAIJ" 3189 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3190 { 3191 Mat_MPIBAIJ *b; 3192 PetscErrorCode ierr; 3193 PetscBool flg; 3194 3195 PetscFunctionBegin; 3196 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3197 B->data = (void*)b; 3198 3199 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3200 B->assembled = PETSC_FALSE; 3201 3202 B->insertmode = NOT_SET_VALUES; 3203 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3204 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3205 3206 /* build local table of row and column ownerships */ 3207 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3208 3209 /* build cache for off array entries formed */ 3210 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3211 b->donotstash = PETSC_FALSE; 3212 b->colmap = PETSC_NULL; 3213 b->garray = PETSC_NULL; 3214 b->roworiented = PETSC_TRUE; 3215 3216 /* stuff used in block assembly */ 3217 b->barray = 0; 3218 3219 /* stuff used for matrix vector multiply */ 3220 b->lvec = 0; 3221 b->Mvctx = 0; 3222 3223 /* stuff for MatGetRow() */ 3224 b->rowindices = 0; 3225 b->rowvalues = 0; 3226 b->getrowactive = PETSC_FALSE; 3227 3228 /* hash table stuff */ 3229 b->ht = 0; 3230 b->hd = 0; 3231 b->ht_size = 0; 3232 b->ht_flag = PETSC_FALSE; 3233 b->ht_fact = 0; 3234 b->ht_total_ct = 0; 3235 b->ht_insert_ct = 0; 3236 3237 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3238 b->ijonly = PETSC_FALSE; 3239 3240 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3241 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3242 if (flg) { 3243 PetscReal fact = 1.39; 3244 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3245 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3246 if (fact <= 1.0) fact = 1.39; 3247 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3248 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3249 } 3250 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3251 3252 #if defined(PETSC_HAVE_MUMPS) 3253 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3254 #endif 3255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3256 "MatConvert_MPIBAIJ_MPIAdj", 3257 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3259 "MatConvert_MPIBAIJ_MPIAIJ", 3260 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3262 "MatConvert_MPIBAIJ_MPISBAIJ", 3263 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3265 "MatStoreValues_MPIBAIJ", 3266 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3268 "MatRetrieveValues_MPIBAIJ", 3269 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3271 "MatGetDiagonalBlock_MPIBAIJ", 3272 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3274 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3275 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3277 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3278 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3280 "MatDiagonalScaleLocal_MPIBAIJ", 3281 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3283 "MatSetHashTableFactor_MPIBAIJ", 3284 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3286 "MatConvert_MPIBAIJ_MPIBSTRM", 3287 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3288 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } 3291 EXTERN_C_END 3292 3293 /*MC 3294 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3295 3296 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3297 and MATMPIBAIJ otherwise. 3298 3299 Options Database Keys: 3300 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3301 3302 Level: beginner 3303 3304 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3305 M*/ 3306 3307 EXTERN_C_BEGIN 3308 #undef __FUNCT__ 3309 #define __FUNCT__ "MatCreate_BAIJ" 3310 PetscErrorCode MatCreate_BAIJ(Mat A) 3311 { 3312 PetscErrorCode ierr; 3313 PetscMPIInt size; 3314 3315 PetscFunctionBegin; 3316 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3317 if (size == 1) { 3318 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3319 } else { 3320 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3321 } 3322 PetscFunctionReturn(0); 3323 } 3324 EXTERN_C_END 3325 3326 #undef __FUNCT__ 3327 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3328 /*@C 3329 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3330 (block compressed row). For good matrix assembly performance 3331 the user should preallocate the matrix storage by setting the parameters 3332 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3333 performance can be increased by more than a factor of 50. 3334 3335 Collective on Mat 3336 3337 Input Parameters: 3338 + A - the matrix 3339 . bs - size of blockk 3340 . d_nz - number of block nonzeros per block row in diagonal portion of local 3341 submatrix (same for all local rows) 3342 . d_nnz - array containing the number of block nonzeros in the various block rows 3343 of the in diagonal portion of the local (possibly different for each block 3344 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3345 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3346 submatrix (same for all local rows). 3347 - o_nnz - array containing the number of nonzeros in the various block rows of the 3348 off-diagonal portion of the local submatrix (possibly different for 3349 each block row) or PETSC_NULL. 3350 3351 If the *_nnz parameter is given then the *_nz parameter is ignored 3352 3353 Options Database Keys: 3354 + -mat_block_size - size of the blocks to use 3355 - -mat_use_hash_table <fact> 3356 3357 Notes: 3358 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3359 than it must be used on all processors that share the object for that argument. 3360 3361 Storage Information: 3362 For a square global matrix we define each processor's diagonal portion 3363 to be its local rows and the corresponding columns (a square submatrix); 3364 each processor's off-diagonal portion encompasses the remainder of the 3365 local matrix (a rectangular submatrix). 3366 3367 The user can specify preallocated storage for the diagonal part of 3368 the local submatrix with either d_nz or d_nnz (not both). Set 3369 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3370 memory allocation. Likewise, specify preallocated storage for the 3371 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3372 3373 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3374 the figure below we depict these three local rows and all columns (0-11). 3375 3376 .vb 3377 0 1 2 3 4 5 6 7 8 9 10 11 3378 ------------------- 3379 row 3 | o o o d d d o o o o o o 3380 row 4 | o o o d d d o o o o o o 3381 row 5 | o o o d d d o o o o o o 3382 ------------------- 3383 .ve 3384 3385 Thus, any entries in the d locations are stored in the d (diagonal) 3386 submatrix, and any entries in the o locations are stored in the 3387 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3388 stored simply in the MATSEQBAIJ format for compressed row storage. 3389 3390 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3391 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3392 In general, for PDE problems in which most nonzeros are near the diagonal, 3393 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3394 or you will get TERRIBLE performance; see the users' manual chapter on 3395 matrices. 3396 3397 You can call MatGetInfo() to get information on how effective the preallocation was; 3398 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3399 You can also run with the option -info and look for messages with the string 3400 malloc in them to see if additional memory allocation was needed. 3401 3402 Level: intermediate 3403 3404 .keywords: matrix, block, aij, compressed row, sparse, parallel 3405 3406 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3407 @*/ 3408 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3409 { 3410 PetscErrorCode ierr; 3411 3412 PetscFunctionBegin; 3413 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3414 PetscFunctionReturn(0); 3415 } 3416 3417 #undef __FUNCT__ 3418 #define __FUNCT__ "MatCreateMPIBAIJ" 3419 /*@C 3420 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3421 (block compressed row). For good matrix assembly performance 3422 the user should preallocate the matrix storage by setting the parameters 3423 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3424 performance can be increased by more than a factor of 50. 3425 3426 Collective on MPI_Comm 3427 3428 Input Parameters: 3429 + comm - MPI communicator 3430 . bs - size of blockk 3431 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3432 This value should be the same as the local size used in creating the 3433 y vector for the matrix-vector product y = Ax. 3434 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3435 This value should be the same as the local size used in creating the 3436 x vector for the matrix-vector product y = Ax. 3437 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3438 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3439 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3440 submatrix (same for all local rows) 3441 . d_nnz - array containing the number of nonzero blocks in the various block rows 3442 of the in diagonal portion of the local (possibly different for each block 3443 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3444 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3445 submatrix (same for all local rows). 3446 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3447 off-diagonal portion of the local submatrix (possibly different for 3448 each block row) or PETSC_NULL. 3449 3450 Output Parameter: 3451 . A - the matrix 3452 3453 Options Database Keys: 3454 + -mat_block_size - size of the blocks to use 3455 - -mat_use_hash_table <fact> 3456 3457 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3458 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3459 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3460 3461 Notes: 3462 If the *_nnz parameter is given then the *_nz parameter is ignored 3463 3464 A nonzero block is any block that as 1 or more nonzeros in it 3465 3466 The user MUST specify either the local or global matrix dimensions 3467 (possibly both). 3468 3469 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3470 than it must be used on all processors that share the object for that argument. 3471 3472 Storage Information: 3473 For a square global matrix we define each processor's diagonal portion 3474 to be its local rows and the corresponding columns (a square submatrix); 3475 each processor's off-diagonal portion encompasses the remainder of the 3476 local matrix (a rectangular submatrix). 3477 3478 The user can specify preallocated storage for the diagonal part of 3479 the local submatrix with either d_nz or d_nnz (not both). Set 3480 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3481 memory allocation. Likewise, specify preallocated storage for the 3482 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3483 3484 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3485 the figure below we depict these three local rows and all columns (0-11). 3486 3487 .vb 3488 0 1 2 3 4 5 6 7 8 9 10 11 3489 ------------------- 3490 row 3 | o o o d d d o o o o o o 3491 row 4 | o o o d d d o o o o o o 3492 row 5 | o o o d d d o o o o o o 3493 ------------------- 3494 .ve 3495 3496 Thus, any entries in the d locations are stored in the d (diagonal) 3497 submatrix, and any entries in the o locations are stored in the 3498 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3499 stored simply in the MATSEQBAIJ format for compressed row storage. 3500 3501 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3502 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3503 In general, for PDE problems in which most nonzeros are near the diagonal, 3504 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3505 or you will get TERRIBLE performance; see the users' manual chapter on 3506 matrices. 3507 3508 Level: intermediate 3509 3510 .keywords: matrix, block, aij, compressed row, sparse, parallel 3511 3512 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3513 @*/ 3514 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3515 { 3516 PetscErrorCode ierr; 3517 PetscMPIInt size; 3518 3519 PetscFunctionBegin; 3520 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3521 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3522 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3523 if (size > 1) { 3524 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3525 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3526 } else { 3527 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3528 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3529 } 3530 PetscFunctionReturn(0); 3531 } 3532 3533 #undef __FUNCT__ 3534 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3535 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3536 { 3537 Mat mat; 3538 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3539 PetscErrorCode ierr; 3540 PetscInt len=0; 3541 3542 PetscFunctionBegin; 3543 *newmat = 0; 3544 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3545 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3546 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3547 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3548 3549 mat->factortype = matin->factortype; 3550 mat->preallocated = PETSC_TRUE; 3551 mat->assembled = PETSC_TRUE; 3552 mat->insertmode = NOT_SET_VALUES; 3553 3554 a = (Mat_MPIBAIJ*)mat->data; 3555 mat->rmap->bs = matin->rmap->bs; 3556 a->bs2 = oldmat->bs2; 3557 a->mbs = oldmat->mbs; 3558 a->nbs = oldmat->nbs; 3559 a->Mbs = oldmat->Mbs; 3560 a->Nbs = oldmat->Nbs; 3561 3562 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3563 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3564 3565 a->size = oldmat->size; 3566 a->rank = oldmat->rank; 3567 a->donotstash = oldmat->donotstash; 3568 a->roworiented = oldmat->roworiented; 3569 a->rowindices = 0; 3570 a->rowvalues = 0; 3571 a->getrowactive = PETSC_FALSE; 3572 a->barray = 0; 3573 a->rstartbs = oldmat->rstartbs; 3574 a->rendbs = oldmat->rendbs; 3575 a->cstartbs = oldmat->cstartbs; 3576 a->cendbs = oldmat->cendbs; 3577 3578 /* hash table stuff */ 3579 a->ht = 0; 3580 a->hd = 0; 3581 a->ht_size = 0; 3582 a->ht_flag = oldmat->ht_flag; 3583 a->ht_fact = oldmat->ht_fact; 3584 a->ht_total_ct = 0; 3585 a->ht_insert_ct = 0; 3586 3587 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3588 if (oldmat->colmap) { 3589 #if defined (PETSC_USE_CTABLE) 3590 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3591 #else 3592 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3593 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3594 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3595 #endif 3596 } else a->colmap = 0; 3597 3598 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3599 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3600 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3601 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3602 } else a->garray = 0; 3603 3604 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3605 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3606 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3607 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3608 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3609 3610 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3611 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3612 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3613 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3614 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3615 *newmat = mat; 3616 3617 PetscFunctionReturn(0); 3618 } 3619 3620 #undef __FUNCT__ 3621 #define __FUNCT__ "MatLoad_MPIBAIJ" 3622 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3623 { 3624 PetscErrorCode ierr; 3625 int fd; 3626 PetscInt i,nz,j,rstart,rend; 3627 PetscScalar *vals,*buf; 3628 MPI_Comm comm = ((PetscObject)viewer)->comm; 3629 MPI_Status status; 3630 PetscMPIInt rank,size,maxnz; 3631 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3632 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3633 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3634 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3635 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3636 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3637 3638 PetscFunctionBegin; 3639 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3640 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3641 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3642 3643 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3644 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3645 if (!rank) { 3646 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3647 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3648 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3649 } 3650 3651 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3652 3653 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3654 M = header[1]; N = header[2]; 3655 3656 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3657 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3658 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3659 3660 /* If global sizes are set, check if they are consistent with that given in the file */ 3661 if (sizesset) { 3662 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3663 } 3664 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3665 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3666 3667 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3668 3669 /* 3670 This code adds extra rows to make sure the number of rows is 3671 divisible by the blocksize 3672 */ 3673 Mbs = M/bs; 3674 extra_rows = bs - M + bs*Mbs; 3675 if (extra_rows == bs) extra_rows = 0; 3676 else Mbs++; 3677 if (extra_rows && !rank) { 3678 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3679 } 3680 3681 /* determine ownership of all rows */ 3682 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3683 mbs = Mbs/size + ((Mbs % size) > rank); 3684 m = mbs*bs; 3685 } else { /* User set */ 3686 m = newmat->rmap->n; 3687 mbs = m/bs; 3688 } 3689 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3690 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3691 3692 /* process 0 needs enough room for process with most rows */ 3693 if (!rank) { 3694 mmax = rowners[1]; 3695 for (i=2; i<size; i++) { 3696 mmax = PetscMax(mmax,rowners[i]); 3697 } 3698 mmax*=bs; 3699 } else mmax = m; 3700 3701 rowners[0] = 0; 3702 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3703 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3704 rstart = rowners[rank]; 3705 rend = rowners[rank+1]; 3706 3707 /* distribute row lengths to all processors */ 3708 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3709 if (!rank) { 3710 mend = m; 3711 if (size == 1) mend = mend - extra_rows; 3712 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3713 for (j=mend; j<m; j++) locrowlens[j] = 1; 3714 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3715 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3716 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3717 for (j=0; j<m; j++) { 3718 procsnz[0] += locrowlens[j]; 3719 } 3720 for (i=1; i<size; i++) { 3721 mend = browners[i+1] - browners[i]; 3722 if (i == size-1) mend = mend - extra_rows; 3723 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3724 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3725 /* calculate the number of nonzeros on each processor */ 3726 for (j=0; j<browners[i+1]-browners[i]; j++) { 3727 procsnz[i] += rowlengths[j]; 3728 } 3729 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3730 } 3731 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3732 } else { 3733 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3734 } 3735 3736 if (!rank) { 3737 /* determine max buffer needed and allocate it */ 3738 maxnz = procsnz[0]; 3739 for (i=1; i<size; i++) { 3740 maxnz = PetscMax(maxnz,procsnz[i]); 3741 } 3742 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3743 3744 /* read in my part of the matrix column indices */ 3745 nz = procsnz[0]; 3746 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3747 mycols = ibuf; 3748 if (size == 1) nz -= extra_rows; 3749 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3750 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3751 3752 /* read in every ones (except the last) and ship off */ 3753 for (i=1; i<size-1; i++) { 3754 nz = procsnz[i]; 3755 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3756 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3757 } 3758 /* read in the stuff for the last proc */ 3759 if (size != 1) { 3760 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3761 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3762 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3763 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3764 } 3765 ierr = PetscFree(cols);CHKERRQ(ierr); 3766 } else { 3767 /* determine buffer space needed for message */ 3768 nz = 0; 3769 for (i=0; i<m; i++) { 3770 nz += locrowlens[i]; 3771 } 3772 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3773 mycols = ibuf; 3774 /* receive message of column indices*/ 3775 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3776 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3777 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3778 } 3779 3780 /* loop over local rows, determining number of off diagonal entries */ 3781 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3782 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3783 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3784 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3785 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3786 rowcount = 0; nzcount = 0; 3787 for (i=0; i<mbs; i++) { 3788 dcount = 0; 3789 odcount = 0; 3790 for (j=0; j<bs; j++) { 3791 kmax = locrowlens[rowcount]; 3792 for (k=0; k<kmax; k++) { 3793 tmp = mycols[nzcount++]/bs; 3794 if (!mask[tmp]) { 3795 mask[tmp] = 1; 3796 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3797 else masked1[dcount++] = tmp; 3798 } 3799 } 3800 rowcount++; 3801 } 3802 3803 dlens[i] = dcount; 3804 odlens[i] = odcount; 3805 3806 /* zero out the mask elements we set */ 3807 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3808 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3809 } 3810 3811 3812 if (!sizesset) { 3813 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3814 } 3815 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3816 3817 if (!rank) { 3818 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3819 /* read in my part of the matrix numerical values */ 3820 nz = procsnz[0]; 3821 vals = buf; 3822 mycols = ibuf; 3823 if (size == 1) nz -= extra_rows; 3824 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3825 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3826 3827 /* insert into matrix */ 3828 jj = rstart*bs; 3829 for (i=0; i<m; i++) { 3830 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3831 mycols += locrowlens[i]; 3832 vals += locrowlens[i]; 3833 jj++; 3834 } 3835 /* read in other processors (except the last one) and ship out */ 3836 for (i=1; i<size-1; i++) { 3837 nz = procsnz[i]; 3838 vals = buf; 3839 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3840 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3841 } 3842 /* the last proc */ 3843 if (size != 1){ 3844 nz = procsnz[i] - extra_rows; 3845 vals = buf; 3846 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3847 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3848 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3849 } 3850 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3851 } else { 3852 /* receive numeric values */ 3853 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3854 3855 /* receive message of values*/ 3856 vals = buf; 3857 mycols = ibuf; 3858 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3859 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3860 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3861 3862 /* insert into matrix */ 3863 jj = rstart*bs; 3864 for (i=0; i<m; i++) { 3865 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3866 mycols += locrowlens[i]; 3867 vals += locrowlens[i]; 3868 jj++; 3869 } 3870 } 3871 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3872 ierr = PetscFree(buf);CHKERRQ(ierr); 3873 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3874 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3875 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3876 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3877 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 3880 PetscFunctionReturn(0); 3881 } 3882 3883 #undef __FUNCT__ 3884 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3885 /*@ 3886 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3887 3888 Input Parameters: 3889 . mat - the matrix 3890 . fact - factor 3891 3892 Not Collective, each process can use a different factor 3893 3894 Level: advanced 3895 3896 Notes: 3897 This can also be set by the command line option: -mat_use_hash_table <fact> 3898 3899 .keywords: matrix, hashtable, factor, HT 3900 3901 .seealso: MatSetOption() 3902 @*/ 3903 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3904 { 3905 PetscErrorCode ierr; 3906 3907 PetscFunctionBegin; 3908 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3909 PetscFunctionReturn(0); 3910 } 3911 3912 EXTERN_C_BEGIN 3913 #undef __FUNCT__ 3914 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3915 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3916 { 3917 Mat_MPIBAIJ *baij; 3918 3919 PetscFunctionBegin; 3920 baij = (Mat_MPIBAIJ*)mat->data; 3921 baij->ht_fact = fact; 3922 PetscFunctionReturn(0); 3923 } 3924 EXTERN_C_END 3925 3926 #undef __FUNCT__ 3927 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3928 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3929 { 3930 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3931 PetscFunctionBegin; 3932 *Ad = a->A; 3933 *Ao = a->B; 3934 *colmap = a->garray; 3935 PetscFunctionReturn(0); 3936 } 3937 3938 /* 3939 Special version for direct calls from Fortran (to eliminate two function call overheads 3940 */ 3941 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3942 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3943 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3944 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3945 #endif 3946 3947 #undef __FUNCT__ 3948 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3949 /*@C 3950 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3951 3952 Collective on Mat 3953 3954 Input Parameters: 3955 + mat - the matrix 3956 . min - number of input rows 3957 . im - input rows 3958 . nin - number of input columns 3959 . in - input columns 3960 . v - numerical values input 3961 - addvin - INSERT_VALUES or ADD_VALUES 3962 3963 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3964 3965 Level: advanced 3966 3967 .seealso: MatSetValuesBlocked() 3968 @*/ 3969 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3970 { 3971 /* convert input arguments to C version */ 3972 Mat mat = *matin; 3973 PetscInt m = *min, n = *nin; 3974 InsertMode addv = *addvin; 3975 3976 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3977 const MatScalar *value; 3978 MatScalar *barray=baij->barray; 3979 PetscBool roworiented = baij->roworiented; 3980 PetscErrorCode ierr; 3981 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3982 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3983 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3984 3985 PetscFunctionBegin; 3986 /* tasks normally handled by MatSetValuesBlocked() */ 3987 if (mat->insertmode == NOT_SET_VALUES) { 3988 mat->insertmode = addv; 3989 } 3990 #if defined(PETSC_USE_DEBUG) 3991 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3992 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3993 #endif 3994 if (mat->assembled) { 3995 mat->was_assembled = PETSC_TRUE; 3996 mat->assembled = PETSC_FALSE; 3997 } 3998 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3999 4000 4001 if(!barray) { 4002 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4003 baij->barray = barray; 4004 } 4005 4006 if (roworiented) { 4007 stepval = (n-1)*bs; 4008 } else { 4009 stepval = (m-1)*bs; 4010 } 4011 for (i=0; i<m; i++) { 4012 if (im[i] < 0) continue; 4013 #if defined(PETSC_USE_DEBUG) 4014 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4015 #endif 4016 if (im[i] >= rstart && im[i] < rend) { 4017 row = im[i] - rstart; 4018 for (j=0; j<n; j++) { 4019 /* If NumCol = 1 then a copy is not required */ 4020 if ((roworiented) && (n == 1)) { 4021 barray = (MatScalar*)v + i*bs2; 4022 } else if((!roworiented) && (m == 1)) { 4023 barray = (MatScalar*)v + j*bs2; 4024 } else { /* Here a copy is required */ 4025 if (roworiented) { 4026 value = v + i*(stepval+bs)*bs + j*bs; 4027 } else { 4028 value = v + j*(stepval+bs)*bs + i*bs; 4029 } 4030 for (ii=0; ii<bs; ii++,value+=stepval) { 4031 for (jj=0; jj<bs; jj++) { 4032 *barray++ = *value++; 4033 } 4034 } 4035 barray -=bs2; 4036 } 4037 4038 if (in[j] >= cstart && in[j] < cend){ 4039 col = in[j] - cstart; 4040 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4041 } 4042 else if (in[j] < 0) continue; 4043 #if defined(PETSC_USE_DEBUG) 4044 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4045 #endif 4046 else { 4047 if (mat->was_assembled) { 4048 if (!baij->colmap) { 4049 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4050 } 4051 4052 #if defined(PETSC_USE_DEBUG) 4053 #if defined (PETSC_USE_CTABLE) 4054 { PetscInt data; 4055 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4056 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4057 } 4058 #else 4059 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4060 #endif 4061 #endif 4062 #if defined (PETSC_USE_CTABLE) 4063 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4064 col = (col - 1)/bs; 4065 #else 4066 col = (baij->colmap[in[j]] - 1)/bs; 4067 #endif 4068 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4069 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4070 col = in[j]; 4071 } 4072 } 4073 else col = in[j]; 4074 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4075 } 4076 } 4077 } else { 4078 if (!baij->donotstash) { 4079 if (roworiented) { 4080 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4081 } else { 4082 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4083 } 4084 } 4085 } 4086 } 4087 4088 /* task normally handled by MatSetValuesBlocked() */ 4089 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4090 PetscFunctionReturn(0); 4091 } 4092 4093 #undef __FUNCT__ 4094 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4095 /*@ 4096 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4097 CSR format the local rows. 4098 4099 Collective on MPI_Comm 4100 4101 Input Parameters: 4102 + comm - MPI communicator 4103 . bs - the block size, only a block size of 1 is supported 4104 . m - number of local rows (Cannot be PETSC_DECIDE) 4105 . n - This value should be the same as the local size used in creating the 4106 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4107 calculated if N is given) For square matrices n is almost always m. 4108 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4109 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4110 . i - row indices 4111 . j - column indices 4112 - a - matrix values 4113 4114 Output Parameter: 4115 . mat - the matrix 4116 4117 Level: intermediate 4118 4119 Notes: 4120 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4121 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4122 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4123 4124 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4125 4126 .keywords: matrix, aij, compressed row, sparse, parallel 4127 4128 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4129 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4130 @*/ 4131 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4132 { 4133 PetscErrorCode ierr; 4134 4135 4136 PetscFunctionBegin; 4137 if (i[0]) { 4138 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4139 } 4140 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4141 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4142 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4143 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4144 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4145 PetscFunctionReturn(0); 4146 } 4147