1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1060 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(&A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1074 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1075 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1078 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1079 int fd; 1080 PetscScalar *column_values; 1081 FILE *file; 1082 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1083 PetscInt message_count,flowcontrolcount; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 for (i=0; i<a->mbs; i++) { 1109 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1110 } 1111 /* store the row lengths to the file */ 1112 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1126 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1127 for (k=0; k<rlen; k++) { 1128 for (j=0; j<bs; j++) { 1129 row_lens[k*bs+j] = bs*crow_lens[k]; 1130 } 1131 } 1132 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1133 } 1134 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1135 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1138 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1139 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1142 1143 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1144 information needed to make it for each row from a block row. This does require more communication but still not more than 1145 the communication needed for the nonzero values */ 1146 nzmax = nz; /* space a largest processor needs */ 1147 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1148 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1149 cnt = 0; 1150 for (i=0; i<a->mbs; i++) { 1151 pcnt = cnt; 1152 for (j=B->i[i]; j<B->i[i+1]; j++) { 1153 if ( (col = garray[B->j[j]]) > cstart) break; 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*col+l; 1156 } 1157 } 1158 for (k=A->i[i]; k<A->i[i+1]; k++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1161 } 1162 } 1163 for (; j<B->i[i+1]; j++) { 1164 for (l=0; l<bs; l++) { 1165 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1166 } 1167 } 1168 len = cnt - pcnt; 1169 for (k=1; k<bs; k++) { 1170 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1171 cnt += len; 1172 } 1173 } 1174 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1175 1176 /* store the columns to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1193 } 1194 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1195 1196 /* load up the numerical values */ 1197 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<a->mbs; i++) { 1200 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( garray[B->j[j]] > cstart) break; 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 for (; j<B->i[i+1]; j++) { 1219 for (l=0; l<bs; l++) { 1220 for (ll=0; ll<bs; ll++) { 1221 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1222 } 1223 } 1224 cnt += bs; 1225 } 1226 cnt += (bs-1)*rlen; 1227 } 1228 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1229 1230 /* store the column values to the file */ 1231 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1232 if (!rank) { 1233 MPI_Status status; 1234 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1237 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1238 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1245 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_values);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1251 if (file) { 1252 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatView_MPIBAIJ" 1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1260 { 1261 PetscErrorCode ierr; 1262 PetscBool iascii,isdraw,issocket,isbinary; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 if (iascii || isdraw || issocket) { 1270 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1271 } else if (isbinary) { 1272 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1282 { 1283 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 #if defined(PETSC_USE_LOG) 1288 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1289 #endif 1290 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1291 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1292 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1294 #if defined (PETSC_USE_CTABLE) 1295 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1296 #else 1297 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1300 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1301 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1302 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1306 ierr = PetscFree(baij);CHKERRQ(ierr); 1307 1308 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMult_MPIBAIJ" 1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt nt; 1328 1329 PetscFunctionBegin; 1330 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1331 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1332 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1333 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1359 { 1360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscBool merged; 1363 1364 PetscFunctionBegin; 1365 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1366 /* do nondiagonal part */ 1367 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1368 if (!merged) { 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* do local part */ 1372 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1373 /* receive remote parts: note this assumes the values are not actually */ 1374 /* inserted in yy until the next line */ 1375 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 } else { 1377 /* do local part */ 1378 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* values actually were received in the Begin() but we need to call this nop */ 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1390 { 1391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 /* do nondiagonal part */ 1396 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1397 /* send it on its way */ 1398 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1399 /* do local part */ 1400 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1401 /* receive remote parts: note this assumes the values are not actually */ 1402 /* inserted in yy until the next line, which is true for my implementation*/ 1403 /* but is not perhaps always true. */ 1404 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* 1409 This only works correctly for square matrices where the subblock A->A is the 1410 diagonal block 1411 */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1421 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatScale_MPIBAIJ" 1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1430 PetscErrorCode ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1434 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1441 { 1442 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1443 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1444 PetscErrorCode ierr; 1445 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1446 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1447 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1448 1449 PetscFunctionBegin; 1450 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1451 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1452 mat->getrowactive = PETSC_TRUE; 1453 1454 if (!mat->rowvalues && (idx || v)) { 1455 /* 1456 allocate enough space to hold information from the longest row. 1457 */ 1458 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1459 PetscInt max = 1,mbs = mat->mbs,tmp; 1460 for (i=0; i<mbs; i++) { 1461 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1462 if (max < tmp) { max = tmp; } 1463 } 1464 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1465 } 1466 lrow = row - brstart; 1467 1468 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1469 if (!v) {pvA = 0; pvB = 0;} 1470 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1471 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1473 nztot = nzA + nzB; 1474 1475 cmap = mat->garray; 1476 if (v || idx) { 1477 if (nztot) { 1478 /* Sort by increasing column numbers, assuming A and B already sorted */ 1479 PetscInt imark = -1; 1480 if (v) { 1481 *v = v_p = mat->rowvalues; 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1484 else break; 1485 } 1486 imark = i; 1487 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1488 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1489 } 1490 if (idx) { 1491 *idx = idx_p = mat->rowindices; 1492 if (imark > -1) { 1493 for (i=0; i<imark; i++) { 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1495 } 1496 } else { 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) 1499 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1526 baij->getrowactive = PETSC_FALSE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1533 { 1534 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1539 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1548 Mat A = a->A,B = a->B; 1549 PetscErrorCode ierr; 1550 PetscReal isend[5],irecv[5]; 1551 1552 PetscFunctionBegin; 1553 info->block_size = (PetscReal)matin->rmap->bs; 1554 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1556 isend[3] = info->memory; isend[4] = info->mallocs; 1557 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else { 1581 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 } 1583 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1584 info->fill_ratio_needed = 0; 1585 info->factor_mallocs = 0; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1592 { 1593 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 switch (op) { 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1600 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1601 case MAT_KEEP_NONZERO_PATTERN: 1602 case MAT_NEW_NONZERO_LOCATION_ERR: 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_ROW_ORIENTED: 1607 a->roworiented = flg; 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1610 break; 1611 case MAT_NEW_DIAGONALS: 1612 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1613 break; 1614 case MAT_IGNORE_OFF_PROC_ENTRIES: 1615 a->donotstash = flg; 1616 break; 1617 case MAT_USE_HASH_TABLE: 1618 a->ht_flag = flg; 1619 break; 1620 case MAT_SYMMETRIC: 1621 case MAT_STRUCTURALLY_SYMMETRIC: 1622 case MAT_HERMITIAN: 1623 case MAT_SYMMETRY_ETERNAL: 1624 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1625 break; 1626 default: 1627 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1637 Mat_SeqBAIJ *Aloc; 1638 Mat B; 1639 PetscErrorCode ierr; 1640 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1641 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1642 MatScalar *a; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1649 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1650 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1651 } else { 1652 B = *matout; 1653 } 1654 1655 /* copy over the A part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1659 1660 for (i=0; i<mbs; i++) { 1661 rvals[0] = bs*(baij->rstartbs + i); 1662 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1663 for (j=ai[i]; j<ai[i+1]; j++) { 1664 col = (baij->cstartbs+aj[j])*bs; 1665 for (k=0; k<bs; k++) { 1666 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1667 col++; a += bs; 1668 } 1669 } 1670 } 1671 /* copy over the B part */ 1672 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1674 for (i=0; i<mbs; i++) { 1675 rvals[0] = bs*(baij->rstartbs + i); 1676 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1677 for (j=ai[i]; j<ai[i+1]; j++) { 1678 col = baij->garray[aj[j]]*bs; 1679 for (k=0; k<bs; k++) { 1680 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1681 col++; a += bs; 1682 } 1683 } 1684 } 1685 ierr = PetscFree(rvals);CHKERRQ(ierr); 1686 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 1689 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1690 *matout = B; 1691 } else { 1692 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1700 { 1701 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1702 Mat a = baij->A,b = baij->B; 1703 PetscErrorCode ierr; 1704 PetscInt s1,s2,s3; 1705 1706 PetscFunctionBegin; 1707 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1708 if (rr) { 1709 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1710 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1711 /* Overlap communication with computation. */ 1712 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1713 } 1714 if (ll) { 1715 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1716 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1717 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1718 } 1719 /* scale the diagonal block */ 1720 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1721 1722 if (rr) { 1723 /* Do a scatter end and then right scale the off-diagonal block */ 1724 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1726 } 1727 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1734 { 1735 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1738 PetscInt i,*owners = A->rmap->range; 1739 PetscInt *nprocs,j,idx,nsends,row; 1740 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1741 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1742 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1743 MPI_Comm comm = ((PetscObject)A)->comm; 1744 MPI_Request *send_waits,*recv_waits; 1745 MPI_Status recv_status,*send_status; 1746 const PetscScalar *xx; 1747 PetscScalar *bb; 1748 #if defined(PETSC_DEBUG) 1749 PetscBool found = PETSC_FALSE; 1750 #endif 1751 1752 PetscFunctionBegin; 1753 /* first count number of contributors to each processor */ 1754 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1755 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1757 j = 0; 1758 for (i=0; i<N; i++) { 1759 if (lastidx > (idx = rows[i])) j = 0; 1760 lastidx = idx; 1761 for (; j<size; j++) { 1762 if (idx >= owners[j] && idx < owners[j+1]) { 1763 nprocs[2*j]++; 1764 nprocs[2*j+1] = 1; 1765 owner[i] = j; 1766 #if defined(PETSC_DEBUG) 1767 found = PETSC_TRUE; 1768 #endif 1769 break; 1770 } 1771 } 1772 #if defined(PETSC_DEBUG) 1773 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1774 found = PETSC_FALSE; 1775 #endif 1776 } 1777 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1778 1779 if (A->nooffproczerorows) { 1780 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1781 nrecvs = nsends; 1782 nmax = N; 1783 } else { 1784 /* inform other processors of number of messages and max length*/ 1785 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1786 } 1787 1788 /* post receives: */ 1789 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1790 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1791 for (i=0; i<nrecvs; i++) { 1792 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1793 } 1794 1795 /* do sends: 1796 1) starts[i] gives the starting index in svalues for stuff going to 1797 the ith processor 1798 */ 1799 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1800 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1801 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1802 starts[0] = 0; 1803 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1804 for (i=0; i<N; i++) { 1805 svalues[starts[owner[i]]++] = rows[i]; 1806 } 1807 1808 starts[0] = 0; 1809 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1810 count = 0; 1811 for (i=0; i<size; i++) { 1812 if (nprocs[2*i+1]) { 1813 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1814 } 1815 } 1816 ierr = PetscFree(starts);CHKERRQ(ierr); 1817 1818 base = owners[rank]; 1819 1820 /* wait on receives */ 1821 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1822 count = nrecvs; 1823 slen = 0; 1824 while (count) { 1825 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1826 /* unpack receives into our local space */ 1827 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1828 source[imdex] = recv_status.MPI_SOURCE; 1829 lens[imdex] = n; 1830 slen += n; 1831 count--; 1832 } 1833 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1834 1835 /* move the data into the send scatter */ 1836 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1837 count = 0; 1838 for (i=0; i<nrecvs; i++) { 1839 values = rvalues + i*nmax; 1840 for (j=0; j<lens[i]; j++) { 1841 lrows[count++] = values[j] - base; 1842 } 1843 } 1844 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1845 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1846 ierr = PetscFree(owner);CHKERRQ(ierr); 1847 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1848 1849 /* fix right hand side if needed */ 1850 if (x && b) { 1851 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1852 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1853 for (i=0; i<slen; i++) { 1854 bb[lrows[i]] = diag*xx[lrows[i]]; 1855 } 1856 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1857 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1858 } 1859 1860 /* actually zap the local rows */ 1861 /* 1862 Zero the required rows. If the "diagonal block" of the matrix 1863 is square and the user wishes to set the diagonal we use separate 1864 code so that MatSetValues() is not called for each diagonal allocating 1865 new memory, thus calling lots of mallocs and slowing things down. 1866 1867 */ 1868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1869 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1872 } else if (diag != 0.0) { 1873 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1874 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1875 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1876 for (i=0; i<slen; i++) { 1877 row = lrows[i] + rstart_bs; 1878 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1879 } 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 } else { 1883 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1884 } 1885 1886 ierr = PetscFree(lrows);CHKERRQ(ierr); 1887 1888 /* wait on sends */ 1889 if (nsends) { 1890 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1891 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1892 ierr = PetscFree(send_status);CHKERRQ(ierr); 1893 } 1894 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1895 ierr = PetscFree(svalues);CHKERRQ(ierr); 1896 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1902 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1903 { 1904 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatEqual_MPIBAIJ" 1916 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1917 { 1918 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1919 Mat a,b,c,d; 1920 PetscBool flg; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 a = matA->A; b = matA->B; 1925 c = matB->A; d = matB->B; 1926 1927 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1928 if (flg) { 1929 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1930 } 1931 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatCopy_MPIBAIJ" 1937 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1938 { 1939 PetscErrorCode ierr; 1940 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1941 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1942 1943 PetscFunctionBegin; 1944 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1945 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1946 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1947 } else { 1948 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1949 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1950 } 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1956 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1957 { 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1967 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1968 { 1969 PetscErrorCode ierr; 1970 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1971 PetscBLASInt bnz,one=1; 1972 Mat_SeqBAIJ *x,*y; 1973 1974 PetscFunctionBegin; 1975 if (str == SAME_NONZERO_PATTERN) { 1976 PetscScalar alpha = a; 1977 x = (Mat_SeqBAIJ *)xx->A->data; 1978 y = (Mat_SeqBAIJ *)yy->A->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 x = (Mat_SeqBAIJ *)xx->B->data; 1982 y = (Mat_SeqBAIJ *)yy->B->data; 1983 bnz = PetscBLASIntCast(x->nz); 1984 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1985 } else { 1986 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1993 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1994 { 1995 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1996 PetscInt rbs,cbs; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2001 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2002 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2003 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2004 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2005 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2011 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2012 { 2013 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2018 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2025 { 2026 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2031 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2038 { 2039 PetscErrorCode ierr; 2040 IS iscol_local; 2041 PetscInt csize; 2042 2043 PetscFunctionBegin; 2044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2045 if (call == MAT_REUSE_MATRIX) { 2046 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2047 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2048 } else { 2049 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2050 } 2051 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2052 if (call == MAT_INITIAL_MATRIX) { 2053 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2054 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2061 /* 2062 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2063 in local and then by concatenating the local matrices the end result. 2064 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2065 */ 2066 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2067 { 2068 PetscErrorCode ierr; 2069 PetscMPIInt rank,size; 2070 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2071 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2072 Mat *local,M,Mreuse; 2073 MatScalar *vwork,*aa; 2074 MPI_Comm comm = ((PetscObject)mat)->comm; 2075 Mat_SeqBAIJ *aij; 2076 2077 2078 PetscFunctionBegin; 2079 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2081 2082 if (call == MAT_REUSE_MATRIX) { 2083 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2084 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2085 local = &Mreuse; 2086 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2089 Mreuse = *local; 2090 ierr = PetscFree(local);CHKERRQ(ierr); 2091 } 2092 2093 /* 2094 m - number of local rows 2095 n - number of columns (same on all processors) 2096 rstart - first row in new global matrix generated 2097 */ 2098 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2099 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2100 m = m/bs; 2101 n = n/bs; 2102 2103 if (call == MAT_INITIAL_MATRIX) { 2104 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2105 ii = aij->i; 2106 jj = aij->j; 2107 2108 /* 2109 Determine the number of non-zeros in the diagonal and off-diagonal 2110 portions of the matrix in order to do correct preallocation 2111 */ 2112 2113 /* first get start and end of "diagonal" columns */ 2114 if (csize == PETSC_DECIDE) { 2115 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2116 if (mglobal == n*bs) { /* square matrix */ 2117 nlocal = m; 2118 } else { 2119 nlocal = n/size + ((n % size) > rank); 2120 } 2121 } else { 2122 nlocal = csize/bs; 2123 } 2124 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2125 rstart = rend - nlocal; 2126 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2130 olens = dlens + m; 2131 for (i=0; i<m; i++) { 2132 jend = ii[i+1] - ii[i]; 2133 olen = 0; 2134 dlen = 0; 2135 for (j=0; j<jend; j++) { 2136 if (*jj < rstart || *jj >= rend) olen++; 2137 else dlen++; 2138 jj++; 2139 } 2140 olens[i] = olen; 2141 dlens[i] = dlen; 2142 } 2143 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2144 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2145 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2146 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2147 ierr = PetscFree(dlens);CHKERRQ(ierr); 2148 } else { 2149 PetscInt ml,nl; 2150 2151 M = *newmat; 2152 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2153 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2154 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2155 /* 2156 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2157 rather than the slower MatSetValues(). 2158 */ 2159 M->was_assembled = PETSC_TRUE; 2160 M->assembled = PETSC_FALSE; 2161 } 2162 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2163 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2164 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 aa = aij->a; 2168 for (i=0; i<m; i++) { 2169 row = rstart/bs + i; 2170 nz = ii[i+1] - ii[i]; 2171 cwork = jj; jj += nz; 2172 vwork = aa; aa += nz; 2173 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 2176 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 *newmat = M; 2179 2180 /* save submatrix used in processor for next request */ 2181 if (call == MAT_INITIAL_MATRIX) { 2182 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2183 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2184 } 2185 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatPermute_MPIBAIJ" 2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2192 { 2193 MPI_Comm comm,pcomm; 2194 PetscInt first,local_size,nrows; 2195 const PetscInt *rows; 2196 PetscMPIInt size; 2197 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2202 /* make a collective version of 'rowp' */ 2203 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2204 if (pcomm==comm) { 2205 crowp = rowp; 2206 } else { 2207 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2208 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2209 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2211 } 2212 /* collect the global row permutation and invert it */ 2213 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2214 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2215 if (pcomm!=comm) { 2216 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2217 } 2218 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2219 /* get the local target indices */ 2220 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2224 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2225 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2226 /* the column permutation is so much easier; 2227 make a local version of 'colp' and invert it */ 2228 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2229 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2230 if (size==1) { 2231 lcolp = colp; 2232 } else { 2233 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2234 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2235 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2236 } 2237 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2238 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2239 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2240 if (size>1) { 2241 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2242 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2243 } 2244 /* now we just get the submatrix */ 2245 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2246 /* clean up */ 2247 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2248 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2254 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2255 { 2256 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2257 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2258 2259 PetscFunctionBegin; 2260 if (nghosts) { *nghosts = B->nbs;} 2261 if (ghosts) {*ghosts = baij->garray;} 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2269 /* 2270 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2271 */ 2272 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2273 { 2274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2275 PetscErrorCode ierr; 2276 PetscMPIInt size,*ncolsonproc,*disp,nn; 2277 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2278 const PetscInt *is; 2279 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2280 PetscInt *rowhit,M,cstart,cend,colb; 2281 PetscInt *columnsforrow,l; 2282 IS *isa; 2283 PetscBool done,flg; 2284 ISLocalToGlobalMapping map = mat->cbmapping; 2285 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2286 2287 PetscFunctionBegin; 2288 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2289 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2290 2291 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2292 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2293 M = mat->rmap->n/bs; 2294 cstart = mat->cmap->rstart/bs; 2295 cend = mat->cmap->rend/bs; 2296 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2297 c->N = mat->cmap->N/bs; 2298 c->m = mat->rmap->n/bs; 2299 c->rstart = mat->rmap->rstart/bs; 2300 2301 c->ncolors = nis; 2302 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2307 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2308 2309 /* Allow access to data structures of local part of matrix */ 2310 if (!baij->colmap) { 2311 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2312 } 2313 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2314 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2315 2316 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2317 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2318 2319 for (i=0; i<nis; i++) { 2320 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2321 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2322 c->ncolumns[i] = n; 2323 if (n) { 2324 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2325 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2327 } else { 2328 c->columns[i] = 0; 2329 } 2330 2331 if (ctype == IS_COLORING_GLOBAL){ 2332 /* Determine the total (parallel) number of columns of this color */ 2333 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2334 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2335 2336 nn = PetscMPIIntCast(n); 2337 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2338 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2339 if (!nctot) { 2340 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2341 } 2342 2343 disp[0] = 0; 2344 for (j=1; j<size; j++) { 2345 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2346 } 2347 2348 /* Get complete list of columns for color on each processor */ 2349 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2350 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2351 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2352 } else if (ctype == IS_COLORING_GHOSTED){ 2353 /* Determine local number of columns of this color on this process, including ghost points */ 2354 nctot = n; 2355 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2356 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2357 } else { 2358 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2359 } 2360 2361 /* 2362 Mark all rows affect by these columns 2363 */ 2364 /* Temporary option to allow for debugging/testing */ 2365 flg = PETSC_FALSE; 2366 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2367 if (!flg) {/*-----------------------------------------------------------------------------*/ 2368 /* crude, fast version */ 2369 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2370 /* loop over columns*/ 2371 for (j=0; j<nctot; j++) { 2372 if (ctype == IS_COLORING_GHOSTED) { 2373 col = ltog[cols[j]]; 2374 } else { 2375 col = cols[j]; 2376 } 2377 if (col >= cstart && col < cend) { 2378 /* column is in diagonal block of matrix */ 2379 rows = A_cj + A_ci[col-cstart]; 2380 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2381 } else { 2382 #if defined (PETSC_USE_CTABLE) 2383 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2384 colb --; 2385 #else 2386 colb = baij->colmap[col] - 1; 2387 #endif 2388 if (colb == -1) { 2389 m = 0; 2390 } else { 2391 colb = colb/bs; 2392 rows = B_cj + B_ci[colb]; 2393 m = B_ci[colb+1] - B_ci[colb]; 2394 } 2395 } 2396 /* loop over columns marking them in rowhit */ 2397 for (k=0; k<m; k++) { 2398 rowhit[*rows++] = col + 1; 2399 } 2400 } 2401 2402 /* count the number of hits */ 2403 nrows = 0; 2404 for (j=0; j<M; j++) { 2405 if (rowhit[j]) nrows++; 2406 } 2407 c->nrows[i] = nrows; 2408 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2409 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2410 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2411 nrows = 0; 2412 for (j=0; j<M; j++) { 2413 if (rowhit[j]) { 2414 c->rows[i][nrows] = j; 2415 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2416 nrows++; 2417 } 2418 } 2419 } else {/*-------------------------------------------------------------------------------*/ 2420 /* slow version, using rowhit as a linked list */ 2421 PetscInt currentcol,fm,mfm; 2422 rowhit[M] = M; 2423 nrows = 0; 2424 /* loop over columns*/ 2425 for (j=0; j<nctot; j++) { 2426 if (ctype == IS_COLORING_GHOSTED) { 2427 col = ltog[cols[j]]; 2428 } else { 2429 col = cols[j]; 2430 } 2431 if (col >= cstart && col < cend) { 2432 /* column is in diagonal block of matrix */ 2433 rows = A_cj + A_ci[col-cstart]; 2434 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2435 } else { 2436 #if defined (PETSC_USE_CTABLE) 2437 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2438 colb --; 2439 #else 2440 colb = baij->colmap[col] - 1; 2441 #endif 2442 if (colb == -1) { 2443 m = 0; 2444 } else { 2445 colb = colb/bs; 2446 rows = B_cj + B_ci[colb]; 2447 m = B_ci[colb+1] - B_ci[colb]; 2448 } 2449 } 2450 2451 /* loop over columns marking them in rowhit */ 2452 fm = M; /* fm points to first entry in linked list */ 2453 for (k=0; k<m; k++) { 2454 currentcol = *rows++; 2455 /* is it already in the list? */ 2456 do { 2457 mfm = fm; 2458 fm = rowhit[fm]; 2459 } while (fm < currentcol); 2460 /* not in list so add it */ 2461 if (fm != currentcol) { 2462 nrows++; 2463 columnsforrow[currentcol] = col; 2464 /* next three lines insert new entry into linked list */ 2465 rowhit[mfm] = currentcol; 2466 rowhit[currentcol] = fm; 2467 fm = currentcol; 2468 /* fm points to present position in list since we know the columns are sorted */ 2469 } else { 2470 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2471 } 2472 } 2473 } 2474 c->nrows[i] = nrows; 2475 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2477 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2478 /* now store the linked list of rows into c->rows[i] */ 2479 nrows = 0; 2480 fm = rowhit[M]; 2481 do { 2482 c->rows[i][nrows] = fm; 2483 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2484 fm = rowhit[fm]; 2485 } while (fm < M); 2486 } /* ---------------------------------------------------------------------------------------*/ 2487 ierr = PetscFree(cols);CHKERRQ(ierr); 2488 } 2489 2490 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2491 /* 2492 vscale will contain the "diagonal" on processor scalings followed by the off processor 2493 */ 2494 if (ctype == IS_COLORING_GLOBAL) { 2495 PetscInt *garray; 2496 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2497 for (i=0; i<baij->B->cmap->n/bs; i++) { 2498 for (j=0; j<bs; j++) { 2499 garray[i*bs+j] = bs*baij->garray[i]+j; 2500 } 2501 } 2502 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2503 ierr = PetscFree(garray);CHKERRQ(ierr); 2504 CHKMEMQ; 2505 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2506 for (k=0; k<c->ncolors; k++) { 2507 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2508 for (l=0; l<c->nrows[k]; l++) { 2509 col = c->columnsforrow[k][l]; 2510 if (col >= cstart && col < cend) { 2511 /* column is in diagonal block of matrix */ 2512 colb = col - cstart; 2513 } else { 2514 /* column is in "off-processor" part */ 2515 #if defined (PETSC_USE_CTABLE) 2516 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2517 colb --; 2518 #else 2519 colb = baij->colmap[col] - 1; 2520 #endif 2521 colb = colb/bs; 2522 colb += cend - cstart; 2523 } 2524 c->vscaleforrow[k][l] = colb; 2525 } 2526 } 2527 } else if (ctype == IS_COLORING_GHOSTED) { 2528 /* Get gtol mapping */ 2529 PetscInt N = mat->cmap->N, *gtol; 2530 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2531 for (i=0; i<N; i++) gtol[i] = -1; 2532 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2533 2534 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2535 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2536 for (k=0; k<c->ncolors; k++) { 2537 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2538 for (l=0; l<c->nrows[k]; l++) { 2539 col = c->columnsforrow[k][l]; /* global column index */ 2540 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2541 } 2542 } 2543 ierr = PetscFree(gtol);CHKERRQ(ierr); 2544 } 2545 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2546 2547 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2548 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2549 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2550 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2551 CHKMEMQ; 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2557 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2558 { 2559 Mat B; 2560 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2561 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2562 Mat_SeqAIJ *b; 2563 PetscErrorCode ierr; 2564 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2565 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2566 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2567 2568 PetscFunctionBegin; 2569 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2570 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2571 2572 /* ---------------------------------------------------------------- 2573 Tell every processor the number of nonzeros per row 2574 */ 2575 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2576 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2577 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2578 } 2579 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2580 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2581 displs = recvcounts + size; 2582 for (i=0; i<size; i++) { 2583 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2584 displs[i] = A->rmap->range[i]/bs; 2585 } 2586 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2587 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2588 #else 2589 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2590 #endif 2591 /* --------------------------------------------------------------- 2592 Create the sequential matrix of the same type as the local block diagonal 2593 */ 2594 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2595 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2596 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2598 b = (Mat_SeqAIJ *)B->data; 2599 2600 /*-------------------------------------------------------------------- 2601 Copy my part of matrix column indices over 2602 */ 2603 sendcount = ad->nz + bd->nz; 2604 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2605 a_jsendbuf = ad->j; 2606 b_jsendbuf = bd->j; 2607 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2608 cnt = 0; 2609 for (i=0; i<n; i++) { 2610 2611 /* put in lower diagonal portion */ 2612 m = bd->i[i+1] - bd->i[i]; 2613 while (m > 0) { 2614 /* is it above diagonal (in bd (compressed) numbering) */ 2615 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2616 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2617 m--; 2618 } 2619 2620 /* put in diagonal portion */ 2621 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2622 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2623 } 2624 2625 /* put in upper diagonal portion */ 2626 while (m-- > 0) { 2627 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2628 } 2629 } 2630 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2631 2632 /*-------------------------------------------------------------------- 2633 Gather all column indices to all processors 2634 */ 2635 for (i=0; i<size; i++) { 2636 recvcounts[i] = 0; 2637 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2638 recvcounts[i] += lens[j]; 2639 } 2640 } 2641 displs[0] = 0; 2642 for (i=1; i<size; i++) { 2643 displs[i] = displs[i-1] + recvcounts[i-1]; 2644 } 2645 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2646 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2647 #else 2648 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2649 #endif 2650 /*-------------------------------------------------------------------- 2651 Assemble the matrix into useable form (note numerical values not yet set) 2652 */ 2653 /* set the b->ilen (length of each row) values */ 2654 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2655 /* set the b->i indices */ 2656 b->i[0] = 0; 2657 for (i=1; i<=A->rmap->N/bs; i++) { 2658 b->i[i] = b->i[i-1] + lens[i-1]; 2659 } 2660 ierr = PetscFree(lens);CHKERRQ(ierr); 2661 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2664 2665 if (A->symmetric){ 2666 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2667 } else if (A->hermitian) { 2668 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2669 } else if (A->structurally_symmetric) { 2670 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2671 } 2672 *newmat = B; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatSOR_MPIBAIJ" 2678 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2679 { 2680 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2681 PetscErrorCode ierr; 2682 Vec bb1 = 0; 2683 2684 PetscFunctionBegin; 2685 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2686 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2687 } 2688 2689 if (flag == SOR_APPLY_UPPER) { 2690 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2695 if (flag & SOR_ZERO_INITIAL_GUESS) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2697 its--; 2698 } 2699 2700 while (its--) { 2701 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 2704 /* update rhs: bb1 = bb - B*x */ 2705 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2706 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2707 2708 /* local sweep */ 2709 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2710 } 2711 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2712 if (flag & SOR_ZERO_INITIAL_GUESS) { 2713 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2714 its--; 2715 } 2716 while (its--) { 2717 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 2720 /* update rhs: bb1 = bb - B*x */ 2721 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2722 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2723 2724 /* local sweep */ 2725 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2726 } 2727 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2728 if (flag & SOR_ZERO_INITIAL_GUESS) { 2729 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2730 its--; 2731 } 2732 while (its--) { 2733 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2734 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 2736 /* update rhs: bb1 = bb - B*x */ 2737 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2738 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2739 2740 /* local sweep */ 2741 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2742 } 2743 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2744 2745 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2750 2751 2752 /* -------------------------------------------------------------------*/ 2753 static struct _MatOps MatOps_Values = { 2754 MatSetValues_MPIBAIJ, 2755 MatGetRow_MPIBAIJ, 2756 MatRestoreRow_MPIBAIJ, 2757 MatMult_MPIBAIJ, 2758 /* 4*/ MatMultAdd_MPIBAIJ, 2759 MatMultTranspose_MPIBAIJ, 2760 MatMultTransposeAdd_MPIBAIJ, 2761 0, 2762 0, 2763 0, 2764 /*10*/ 0, 2765 0, 2766 0, 2767 MatSOR_MPIBAIJ, 2768 MatTranspose_MPIBAIJ, 2769 /*15*/ MatGetInfo_MPIBAIJ, 2770 MatEqual_MPIBAIJ, 2771 MatGetDiagonal_MPIBAIJ, 2772 MatDiagonalScale_MPIBAIJ, 2773 MatNorm_MPIBAIJ, 2774 /*20*/ MatAssemblyBegin_MPIBAIJ, 2775 MatAssemblyEnd_MPIBAIJ, 2776 MatSetOption_MPIBAIJ, 2777 MatZeroEntries_MPIBAIJ, 2778 /*24*/ MatZeroRows_MPIBAIJ, 2779 0, 2780 0, 2781 0, 2782 0, 2783 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*34*/ MatDuplicate_MPIBAIJ, 2789 0, 2790 0, 2791 0, 2792 0, 2793 /*39*/ MatAXPY_MPIBAIJ, 2794 MatGetSubMatrices_MPIBAIJ, 2795 MatIncreaseOverlap_MPIBAIJ, 2796 MatGetValues_MPIBAIJ, 2797 MatCopy_MPIBAIJ, 2798 /*44*/ 0, 2799 MatScale_MPIBAIJ, 2800 0, 2801 0, 2802 0, 2803 /*49*/ MatSetBlockSize_MPIBAIJ, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /*54*/ MatFDColoringCreate_MPIBAIJ, 2809 0, 2810 MatSetUnfactored_MPIBAIJ, 2811 MatPermute_MPIBAIJ, 2812 MatSetValuesBlocked_MPIBAIJ, 2813 /*59*/ MatGetSubMatrix_MPIBAIJ, 2814 MatDestroy_MPIBAIJ, 2815 MatView_MPIBAIJ, 2816 0, 2817 0, 2818 /*64*/ 0, 2819 0, 2820 0, 2821 0, 2822 0, 2823 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2824 0, 2825 0, 2826 0, 2827 0, 2828 /*74*/ 0, 2829 MatFDColoringApply_BAIJ, 2830 0, 2831 0, 2832 0, 2833 /*79*/ 0, 2834 0, 2835 0, 2836 0, 2837 MatLoad_MPIBAIJ, 2838 /*84*/ 0, 2839 0, 2840 0, 2841 0, 2842 0, 2843 /*89*/ 0, 2844 0, 2845 0, 2846 0, 2847 0, 2848 /*94*/ 0, 2849 0, 2850 0, 2851 0, 2852 0, 2853 /*99*/ 0, 2854 0, 2855 0, 2856 0, 2857 0, 2858 /*104*/0, 2859 MatRealPart_MPIBAIJ, 2860 MatImaginaryPart_MPIBAIJ, 2861 0, 2862 0, 2863 /*109*/0, 2864 0, 2865 0, 2866 0, 2867 0, 2868 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2869 0, 2870 MatGetGhosts_MPIBAIJ, 2871 0, 2872 0, 2873 /*119*/0, 2874 0, 2875 0, 2876 0 2877 }; 2878 2879 EXTERN_C_BEGIN 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2882 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2883 { 2884 PetscFunctionBegin; 2885 *a = ((Mat_MPIBAIJ *)A->data)->A; 2886 *iscopy = PETSC_FALSE; 2887 PetscFunctionReturn(0); 2888 } 2889 EXTERN_C_END 2890 2891 EXTERN_C_BEGIN 2892 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2893 EXTERN_C_END 2894 2895 EXTERN_C_BEGIN 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2898 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2899 { 2900 PetscInt m,rstart,cstart,cend; 2901 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2902 const PetscInt *JJ=0; 2903 PetscScalar *values=0; 2904 PetscErrorCode ierr; 2905 2906 PetscFunctionBegin; 2907 2908 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2909 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2911 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2912 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2913 m = B->rmap->n/bs; 2914 rstart = B->rmap->rstart/bs; 2915 cstart = B->cmap->rstart/bs; 2916 cend = B->cmap->rend/bs; 2917 2918 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2919 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2920 for (i=0; i<m; i++) { 2921 nz = ii[i+1] - ii[i]; 2922 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2923 nz_max = PetscMax(nz_max,nz); 2924 JJ = jj + ii[i]; 2925 for (j=0; j<nz; j++) { 2926 if (*JJ >= cstart) break; 2927 JJ++; 2928 } 2929 d = 0; 2930 for (; j<nz; j++) { 2931 if (*JJ++ >= cend) break; 2932 d++; 2933 } 2934 d_nnz[i] = d; 2935 o_nnz[i] = nz - d; 2936 } 2937 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2938 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2939 2940 values = (PetscScalar*)V; 2941 if (!values) { 2942 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2943 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2944 } 2945 for (i=0; i<m; i++) { 2946 PetscInt row = i + rstart; 2947 PetscInt ncols = ii[i+1] - ii[i]; 2948 const PetscInt *icols = jj + ii[i]; 2949 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2950 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2951 } 2952 2953 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2954 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2956 2957 PetscFunctionReturn(0); 2958 } 2959 EXTERN_C_END 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2963 /*@C 2964 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2965 (the default parallel PETSc format). 2966 2967 Collective on MPI_Comm 2968 2969 Input Parameters: 2970 + A - the matrix 2971 . bs - the block size 2972 . i - the indices into j for the start of each local row (starts with zero) 2973 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2974 - v - optional values in the matrix 2975 2976 Level: developer 2977 2978 .keywords: matrix, aij, compressed row, sparse, parallel 2979 2980 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2981 @*/ 2982 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2983 { 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 EXTERN_C_BEGIN 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2994 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2995 { 2996 Mat_MPIBAIJ *b; 2997 PetscErrorCode ierr; 2998 PetscInt i, newbs = PetscAbs(bs); 2999 3000 PetscFunctionBegin; 3001 if (bs < 0) { 3002 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3003 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3004 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3005 bs = PetscAbs(bs); 3006 } 3007 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3008 bs = newbs; 3009 3010 3011 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3012 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3013 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3014 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3015 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3016 3017 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3020 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3021 3022 if (d_nnz) { 3023 for (i=0; i<B->rmap->n/bs; i++) { 3024 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3025 } 3026 } 3027 if (o_nnz) { 3028 for (i=0; i<B->rmap->n/bs; i++) { 3029 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3030 } 3031 } 3032 3033 b = (Mat_MPIBAIJ*)B->data; 3034 b->bs2 = bs*bs; 3035 b->mbs = B->rmap->n/bs; 3036 b->nbs = B->cmap->n/bs; 3037 b->Mbs = B->rmap->N/bs; 3038 b->Nbs = B->cmap->N/bs; 3039 3040 for (i=0; i<=b->size; i++) { 3041 b->rangebs[i] = B->rmap->range[i]/bs; 3042 } 3043 b->rstartbs = B->rmap->rstart/bs; 3044 b->rendbs = B->rmap->rend/bs; 3045 b->cstartbs = B->cmap->rstart/bs; 3046 b->cendbs = B->cmap->rend/bs; 3047 3048 if (!B->preallocated) { 3049 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3050 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3051 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3052 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3053 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3054 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3055 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3056 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3057 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3058 } 3059 3060 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3061 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3062 B->preallocated = PETSC_TRUE; 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 EXTERN_C_BEGIN 3068 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3069 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3070 EXTERN_C_END 3071 3072 3073 EXTERN_C_BEGIN 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3076 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3077 { 3078 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3079 PetscErrorCode ierr; 3080 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3081 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3082 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3083 3084 PetscFunctionBegin; 3085 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3086 ii[0] = 0; 3087 CHKMEMQ; 3088 for (i=0; i<M; i++) { 3089 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3090 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3091 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3092 /* remove one from count of matrix has diagonal */ 3093 for (j=id[i]; j<id[i+1]; j++) { 3094 if (jd[j] == i) {ii[i+1]--;break;} 3095 } 3096 CHKMEMQ; 3097 } 3098 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3099 cnt = 0; 3100 for (i=0; i<M; i++) { 3101 for (j=io[i]; j<io[i+1]; j++) { 3102 if (garray[jo[j]] > rstart) break; 3103 jj[cnt++] = garray[jo[j]]; 3104 CHKMEMQ; 3105 } 3106 for (k=id[i]; k<id[i+1]; k++) { 3107 if (jd[k] != i) { 3108 jj[cnt++] = rstart + jd[k]; 3109 CHKMEMQ; 3110 } 3111 } 3112 for (;j<io[i+1]; j++) { 3113 jj[cnt++] = garray[jo[j]]; 3114 CHKMEMQ; 3115 } 3116 } 3117 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3118 PetscFunctionReturn(0); 3119 } 3120 EXTERN_C_END 3121 3122 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3123 EXTERN_C_BEGIN 3124 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3125 EXTERN_C_END 3126 3127 EXTERN_C_BEGIN 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3130 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3131 { 3132 PetscErrorCode ierr; 3133 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3134 Mat B; 3135 Mat_MPIAIJ *b; 3136 3137 PetscFunctionBegin; 3138 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3139 3140 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3141 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3142 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3143 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3144 b = (Mat_MPIAIJ*) B->data; 3145 3146 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3147 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3148 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3149 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3150 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3151 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3155 if (reuse == MAT_REUSE_MATRIX) { 3156 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3157 } else { 3158 *newmat = B; 3159 } 3160 PetscFunctionReturn(0); 3161 } 3162 EXTERN_C_END 3163 3164 EXTERN_C_BEGIN 3165 #if defined(PETSC_HAVE_MUMPS) 3166 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3167 #endif 3168 EXTERN_C_END 3169 3170 /*MC 3171 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3172 3173 Options Database Keys: 3174 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3175 . -mat_block_size <bs> - set the blocksize used to store the matrix 3176 - -mat_use_hash_table <fact> 3177 3178 Level: beginner 3179 3180 .seealso: MatCreateMPIBAIJ 3181 M*/ 3182 3183 EXTERN_C_BEGIN 3184 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3185 EXTERN_C_END 3186 3187 EXTERN_C_BEGIN 3188 #undef __FUNCT__ 3189 #define __FUNCT__ "MatCreate_MPIBAIJ" 3190 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3191 { 3192 Mat_MPIBAIJ *b; 3193 PetscErrorCode ierr; 3194 PetscBool flg; 3195 3196 PetscFunctionBegin; 3197 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3198 B->data = (void*)b; 3199 3200 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3201 B->assembled = PETSC_FALSE; 3202 3203 B->insertmode = NOT_SET_VALUES; 3204 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3205 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3206 3207 /* build local table of row and column ownerships */ 3208 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3209 3210 /* build cache for off array entries formed */ 3211 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3212 b->donotstash = PETSC_FALSE; 3213 b->colmap = PETSC_NULL; 3214 b->garray = PETSC_NULL; 3215 b->roworiented = PETSC_TRUE; 3216 3217 /* stuff used in block assembly */ 3218 b->barray = 0; 3219 3220 /* stuff used for matrix vector multiply */ 3221 b->lvec = 0; 3222 b->Mvctx = 0; 3223 3224 /* stuff for MatGetRow() */ 3225 b->rowindices = 0; 3226 b->rowvalues = 0; 3227 b->getrowactive = PETSC_FALSE; 3228 3229 /* hash table stuff */ 3230 b->ht = 0; 3231 b->hd = 0; 3232 b->ht_size = 0; 3233 b->ht_flag = PETSC_FALSE; 3234 b->ht_fact = 0; 3235 b->ht_total_ct = 0; 3236 b->ht_insert_ct = 0; 3237 3238 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3239 b->ijonly = PETSC_FALSE; 3240 3241 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3242 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3243 if (flg) { 3244 PetscReal fact = 1.39; 3245 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3246 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3247 if (fact <= 1.0) fact = 1.39; 3248 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3249 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3250 } 3251 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3252 3253 #if defined(PETSC_HAVE_MUMPS) 3254 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3255 #endif 3256 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3257 "MatConvert_MPIBAIJ_MPIAdj", 3258 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3259 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3260 "MatConvert_MPIBAIJ_MPIAIJ", 3261 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3262 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3263 "MatConvert_MPIBAIJ_MPISBAIJ", 3264 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3265 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3266 "MatStoreValues_MPIBAIJ", 3267 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3268 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3269 "MatRetrieveValues_MPIBAIJ", 3270 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3272 "MatGetDiagonalBlock_MPIBAIJ", 3273 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3275 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3276 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3278 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3279 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3281 "MatDiagonalScaleLocal_MPIBAIJ", 3282 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3284 "MatSetHashTableFactor_MPIBAIJ", 3285 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3287 "MatConvert_MPIBAIJ_MPIBSTRM", 3288 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3289 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3290 PetscFunctionReturn(0); 3291 } 3292 EXTERN_C_END 3293 3294 /*MC 3295 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3296 3297 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3298 and MATMPIBAIJ otherwise. 3299 3300 Options Database Keys: 3301 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3302 3303 Level: beginner 3304 3305 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3306 M*/ 3307 3308 EXTERN_C_BEGIN 3309 #undef __FUNCT__ 3310 #define __FUNCT__ "MatCreate_BAIJ" 3311 PetscErrorCode MatCreate_BAIJ(Mat A) 3312 { 3313 PetscErrorCode ierr; 3314 PetscMPIInt size; 3315 3316 PetscFunctionBegin; 3317 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3318 if (size == 1) { 3319 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3320 } else { 3321 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3322 } 3323 PetscFunctionReturn(0); 3324 } 3325 EXTERN_C_END 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3329 /*@C 3330 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3331 (block compressed row). For good matrix assembly performance 3332 the user should preallocate the matrix storage by setting the parameters 3333 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3334 performance can be increased by more than a factor of 50. 3335 3336 Collective on Mat 3337 3338 Input Parameters: 3339 + A - the matrix 3340 . bs - size of blockk 3341 . d_nz - number of block nonzeros per block row in diagonal portion of local 3342 submatrix (same for all local rows) 3343 . d_nnz - array containing the number of block nonzeros in the various block rows 3344 of the in diagonal portion of the local (possibly different for each block 3345 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3346 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3347 submatrix (same for all local rows). 3348 - o_nnz - array containing the number of nonzeros in the various block rows of the 3349 off-diagonal portion of the local submatrix (possibly different for 3350 each block row) or PETSC_NULL. 3351 3352 If the *_nnz parameter is given then the *_nz parameter is ignored 3353 3354 Options Database Keys: 3355 + -mat_block_size - size of the blocks to use 3356 - -mat_use_hash_table <fact> 3357 3358 Notes: 3359 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3360 than it must be used on all processors that share the object for that argument. 3361 3362 Storage Information: 3363 For a square global matrix we define each processor's diagonal portion 3364 to be its local rows and the corresponding columns (a square submatrix); 3365 each processor's off-diagonal portion encompasses the remainder of the 3366 local matrix (a rectangular submatrix). 3367 3368 The user can specify preallocated storage for the diagonal part of 3369 the local submatrix with either d_nz or d_nnz (not both). Set 3370 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3371 memory allocation. Likewise, specify preallocated storage for the 3372 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3373 3374 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3375 the figure below we depict these three local rows and all columns (0-11). 3376 3377 .vb 3378 0 1 2 3 4 5 6 7 8 9 10 11 3379 ------------------- 3380 row 3 | o o o d d d o o o o o o 3381 row 4 | o o o d d d o o o o o o 3382 row 5 | o o o d d d o o o o o o 3383 ------------------- 3384 .ve 3385 3386 Thus, any entries in the d locations are stored in the d (diagonal) 3387 submatrix, and any entries in the o locations are stored in the 3388 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3389 stored simply in the MATSEQBAIJ format for compressed row storage. 3390 3391 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3392 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3393 In general, for PDE problems in which most nonzeros are near the diagonal, 3394 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3395 or you will get TERRIBLE performance; see the users' manual chapter on 3396 matrices. 3397 3398 You can call MatGetInfo() to get information on how effective the preallocation was; 3399 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3400 You can also run with the option -info and look for messages with the string 3401 malloc in them to see if additional memory allocation was needed. 3402 3403 Level: intermediate 3404 3405 .keywords: matrix, block, aij, compressed row, sparse, parallel 3406 3407 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3408 @*/ 3409 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3410 { 3411 PetscErrorCode ierr; 3412 3413 PetscFunctionBegin; 3414 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3415 PetscFunctionReturn(0); 3416 } 3417 3418 #undef __FUNCT__ 3419 #define __FUNCT__ "MatCreateMPIBAIJ" 3420 /*@C 3421 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3422 (block compressed row). For good matrix assembly performance 3423 the user should preallocate the matrix storage by setting the parameters 3424 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3425 performance can be increased by more than a factor of 50. 3426 3427 Collective on MPI_Comm 3428 3429 Input Parameters: 3430 + comm - MPI communicator 3431 . bs - size of blockk 3432 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3433 This value should be the same as the local size used in creating the 3434 y vector for the matrix-vector product y = Ax. 3435 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3436 This value should be the same as the local size used in creating the 3437 x vector for the matrix-vector product y = Ax. 3438 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3439 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3440 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3441 submatrix (same for all local rows) 3442 . d_nnz - array containing the number of nonzero blocks in the various block rows 3443 of the in diagonal portion of the local (possibly different for each block 3444 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3445 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3446 submatrix (same for all local rows). 3447 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3448 off-diagonal portion of the local submatrix (possibly different for 3449 each block row) or PETSC_NULL. 3450 3451 Output Parameter: 3452 . A - the matrix 3453 3454 Options Database Keys: 3455 + -mat_block_size - size of the blocks to use 3456 - -mat_use_hash_table <fact> 3457 3458 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3459 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3460 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3461 3462 Notes: 3463 If the *_nnz parameter is given then the *_nz parameter is ignored 3464 3465 A nonzero block is any block that as 1 or more nonzeros in it 3466 3467 The user MUST specify either the local or global matrix dimensions 3468 (possibly both). 3469 3470 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3471 than it must be used on all processors that share the object for that argument. 3472 3473 Storage Information: 3474 For a square global matrix we define each processor's diagonal portion 3475 to be its local rows and the corresponding columns (a square submatrix); 3476 each processor's off-diagonal portion encompasses the remainder of the 3477 local matrix (a rectangular submatrix). 3478 3479 The user can specify preallocated storage for the diagonal part of 3480 the local submatrix with either d_nz or d_nnz (not both). Set 3481 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3482 memory allocation. Likewise, specify preallocated storage for the 3483 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3484 3485 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3486 the figure below we depict these three local rows and all columns (0-11). 3487 3488 .vb 3489 0 1 2 3 4 5 6 7 8 9 10 11 3490 ------------------- 3491 row 3 | o o o d d d o o o o o o 3492 row 4 | o o o d d d o o o o o o 3493 row 5 | o o o d d d o o o o o o 3494 ------------------- 3495 .ve 3496 3497 Thus, any entries in the d locations are stored in the d (diagonal) 3498 submatrix, and any entries in the o locations are stored in the 3499 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3500 stored simply in the MATSEQBAIJ format for compressed row storage. 3501 3502 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3503 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3504 In general, for PDE problems in which most nonzeros are near the diagonal, 3505 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3506 or you will get TERRIBLE performance; see the users' manual chapter on 3507 matrices. 3508 3509 Level: intermediate 3510 3511 .keywords: matrix, block, aij, compressed row, sparse, parallel 3512 3513 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3514 @*/ 3515 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3516 { 3517 PetscErrorCode ierr; 3518 PetscMPIInt size; 3519 3520 PetscFunctionBegin; 3521 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3522 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3523 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3524 if (size > 1) { 3525 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3526 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3527 } else { 3528 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3529 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3530 } 3531 PetscFunctionReturn(0); 3532 } 3533 3534 #undef __FUNCT__ 3535 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3536 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3537 { 3538 Mat mat; 3539 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3540 PetscErrorCode ierr; 3541 PetscInt len=0; 3542 3543 PetscFunctionBegin; 3544 *newmat = 0; 3545 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3546 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3547 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3548 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3549 3550 mat->factortype = matin->factortype; 3551 mat->preallocated = PETSC_TRUE; 3552 mat->assembled = PETSC_TRUE; 3553 mat->insertmode = NOT_SET_VALUES; 3554 3555 a = (Mat_MPIBAIJ*)mat->data; 3556 mat->rmap->bs = matin->rmap->bs; 3557 a->bs2 = oldmat->bs2; 3558 a->mbs = oldmat->mbs; 3559 a->nbs = oldmat->nbs; 3560 a->Mbs = oldmat->Mbs; 3561 a->Nbs = oldmat->Nbs; 3562 3563 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3564 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3565 3566 a->size = oldmat->size; 3567 a->rank = oldmat->rank; 3568 a->donotstash = oldmat->donotstash; 3569 a->roworiented = oldmat->roworiented; 3570 a->rowindices = 0; 3571 a->rowvalues = 0; 3572 a->getrowactive = PETSC_FALSE; 3573 a->barray = 0; 3574 a->rstartbs = oldmat->rstartbs; 3575 a->rendbs = oldmat->rendbs; 3576 a->cstartbs = oldmat->cstartbs; 3577 a->cendbs = oldmat->cendbs; 3578 3579 /* hash table stuff */ 3580 a->ht = 0; 3581 a->hd = 0; 3582 a->ht_size = 0; 3583 a->ht_flag = oldmat->ht_flag; 3584 a->ht_fact = oldmat->ht_fact; 3585 a->ht_total_ct = 0; 3586 a->ht_insert_ct = 0; 3587 3588 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3589 if (oldmat->colmap) { 3590 #if defined (PETSC_USE_CTABLE) 3591 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3592 #else 3593 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3594 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3595 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3596 #endif 3597 } else a->colmap = 0; 3598 3599 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3600 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3601 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3602 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3603 } else a->garray = 0; 3604 3605 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3606 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3607 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3608 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3609 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3610 3611 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3612 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3613 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3614 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3615 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3616 *newmat = mat; 3617 3618 PetscFunctionReturn(0); 3619 } 3620 3621 #undef __FUNCT__ 3622 #define __FUNCT__ "MatLoad_MPIBAIJ" 3623 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3624 { 3625 PetscErrorCode ierr; 3626 int fd; 3627 PetscInt i,nz,j,rstart,rend; 3628 PetscScalar *vals,*buf; 3629 MPI_Comm comm = ((PetscObject)viewer)->comm; 3630 MPI_Status status; 3631 PetscMPIInt rank,size,maxnz; 3632 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3633 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3634 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3635 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3636 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3637 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3638 3639 PetscFunctionBegin; 3640 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3641 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3642 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3643 3644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3645 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3646 if (!rank) { 3647 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3648 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3649 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3650 } 3651 3652 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3653 3654 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3655 M = header[1]; N = header[2]; 3656 3657 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3658 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3659 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3660 3661 /* If global sizes are set, check if they are consistent with that given in the file */ 3662 if (sizesset) { 3663 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3664 } 3665 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3666 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3667 3668 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3669 3670 /* 3671 This code adds extra rows to make sure the number of rows is 3672 divisible by the blocksize 3673 */ 3674 Mbs = M/bs; 3675 extra_rows = bs - M + bs*Mbs; 3676 if (extra_rows == bs) extra_rows = 0; 3677 else Mbs++; 3678 if (extra_rows && !rank) { 3679 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3680 } 3681 3682 /* determine ownership of all rows */ 3683 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3684 mbs = Mbs/size + ((Mbs % size) > rank); 3685 m = mbs*bs; 3686 } else { /* User set */ 3687 m = newmat->rmap->n; 3688 mbs = m/bs; 3689 } 3690 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3691 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3692 3693 /* process 0 needs enough room for process with most rows */ 3694 if (!rank) { 3695 mmax = rowners[1]; 3696 for (i=2; i<size; i++) { 3697 mmax = PetscMax(mmax,rowners[i]); 3698 } 3699 mmax*=bs; 3700 } else mmax = m; 3701 3702 rowners[0] = 0; 3703 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3704 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3705 rstart = rowners[rank]; 3706 rend = rowners[rank+1]; 3707 3708 /* distribute row lengths to all processors */ 3709 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3710 if (!rank) { 3711 mend = m; 3712 if (size == 1) mend = mend - extra_rows; 3713 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3714 for (j=mend; j<m; j++) locrowlens[j] = 1; 3715 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3716 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3717 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3718 for (j=0; j<m; j++) { 3719 procsnz[0] += locrowlens[j]; 3720 } 3721 for (i=1; i<size; i++) { 3722 mend = browners[i+1] - browners[i]; 3723 if (i == size-1) mend = mend - extra_rows; 3724 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3725 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3726 /* calculate the number of nonzeros on each processor */ 3727 for (j=0; j<browners[i+1]-browners[i]; j++) { 3728 procsnz[i] += rowlengths[j]; 3729 } 3730 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3731 } 3732 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3733 } else { 3734 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3735 } 3736 3737 if (!rank) { 3738 /* determine max buffer needed and allocate it */ 3739 maxnz = procsnz[0]; 3740 for (i=1; i<size; i++) { 3741 maxnz = PetscMax(maxnz,procsnz[i]); 3742 } 3743 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3744 3745 /* read in my part of the matrix column indices */ 3746 nz = procsnz[0]; 3747 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3748 mycols = ibuf; 3749 if (size == 1) nz -= extra_rows; 3750 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3751 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3752 3753 /* read in every ones (except the last) and ship off */ 3754 for (i=1; i<size-1; i++) { 3755 nz = procsnz[i]; 3756 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3757 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3758 } 3759 /* read in the stuff for the last proc */ 3760 if (size != 1) { 3761 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3762 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3763 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3764 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3765 } 3766 ierr = PetscFree(cols);CHKERRQ(ierr); 3767 } else { 3768 /* determine buffer space needed for message */ 3769 nz = 0; 3770 for (i=0; i<m; i++) { 3771 nz += locrowlens[i]; 3772 } 3773 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3774 mycols = ibuf; 3775 /* receive message of column indices*/ 3776 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3777 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3778 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3779 } 3780 3781 /* loop over local rows, determining number of off diagonal entries */ 3782 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3783 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3784 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3785 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3786 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3787 rowcount = 0; nzcount = 0; 3788 for (i=0; i<mbs; i++) { 3789 dcount = 0; 3790 odcount = 0; 3791 for (j=0; j<bs; j++) { 3792 kmax = locrowlens[rowcount]; 3793 for (k=0; k<kmax; k++) { 3794 tmp = mycols[nzcount++]/bs; 3795 if (!mask[tmp]) { 3796 mask[tmp] = 1; 3797 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3798 else masked1[dcount++] = tmp; 3799 } 3800 } 3801 rowcount++; 3802 } 3803 3804 dlens[i] = dcount; 3805 odlens[i] = odcount; 3806 3807 /* zero out the mask elements we set */ 3808 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3809 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3810 } 3811 3812 3813 if (!sizesset) { 3814 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3815 } 3816 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3817 3818 if (!rank) { 3819 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3820 /* read in my part of the matrix numerical values */ 3821 nz = procsnz[0]; 3822 vals = buf; 3823 mycols = ibuf; 3824 if (size == 1) nz -= extra_rows; 3825 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3826 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3827 3828 /* insert into matrix */ 3829 jj = rstart*bs; 3830 for (i=0; i<m; i++) { 3831 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3832 mycols += locrowlens[i]; 3833 vals += locrowlens[i]; 3834 jj++; 3835 } 3836 /* read in other processors (except the last one) and ship out */ 3837 for (i=1; i<size-1; i++) { 3838 nz = procsnz[i]; 3839 vals = buf; 3840 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3841 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3842 } 3843 /* the last proc */ 3844 if (size != 1){ 3845 nz = procsnz[i] - extra_rows; 3846 vals = buf; 3847 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3848 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3849 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3850 } 3851 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3852 } else { 3853 /* receive numeric values */ 3854 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3855 3856 /* receive message of values*/ 3857 vals = buf; 3858 mycols = ibuf; 3859 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3860 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3861 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3862 3863 /* insert into matrix */ 3864 jj = rstart*bs; 3865 for (i=0; i<m; i++) { 3866 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3867 mycols += locrowlens[i]; 3868 vals += locrowlens[i]; 3869 jj++; 3870 } 3871 } 3872 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3873 ierr = PetscFree(buf);CHKERRQ(ierr); 3874 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3875 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3876 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3877 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3878 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 3881 PetscFunctionReturn(0); 3882 } 3883 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3886 /*@ 3887 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3888 3889 Input Parameters: 3890 . mat - the matrix 3891 . fact - factor 3892 3893 Not Collective, each process can use a different factor 3894 3895 Level: advanced 3896 3897 Notes: 3898 This can also be set by the command line option: -mat_use_hash_table <fact> 3899 3900 .keywords: matrix, hashtable, factor, HT 3901 3902 .seealso: MatSetOption() 3903 @*/ 3904 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3905 { 3906 PetscErrorCode ierr; 3907 3908 PetscFunctionBegin; 3909 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3910 PetscFunctionReturn(0); 3911 } 3912 3913 EXTERN_C_BEGIN 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3916 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3917 { 3918 Mat_MPIBAIJ *baij; 3919 3920 PetscFunctionBegin; 3921 baij = (Mat_MPIBAIJ*)mat->data; 3922 baij->ht_fact = fact; 3923 PetscFunctionReturn(0); 3924 } 3925 EXTERN_C_END 3926 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3929 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3930 { 3931 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3932 PetscFunctionBegin; 3933 *Ad = a->A; 3934 *Ao = a->B; 3935 *colmap = a->garray; 3936 PetscFunctionReturn(0); 3937 } 3938 3939 /* 3940 Special version for direct calls from Fortran (to eliminate two function call overheads 3941 */ 3942 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3943 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3944 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3945 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3946 #endif 3947 3948 #undef __FUNCT__ 3949 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3950 /*@C 3951 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3952 3953 Collective on Mat 3954 3955 Input Parameters: 3956 + mat - the matrix 3957 . min - number of input rows 3958 . im - input rows 3959 . nin - number of input columns 3960 . in - input columns 3961 . v - numerical values input 3962 - addvin - INSERT_VALUES or ADD_VALUES 3963 3964 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3965 3966 Level: advanced 3967 3968 .seealso: MatSetValuesBlocked() 3969 @*/ 3970 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3971 { 3972 /* convert input arguments to C version */ 3973 Mat mat = *matin; 3974 PetscInt m = *min, n = *nin; 3975 InsertMode addv = *addvin; 3976 3977 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3978 const MatScalar *value; 3979 MatScalar *barray=baij->barray; 3980 PetscBool roworiented = baij->roworiented; 3981 PetscErrorCode ierr; 3982 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3983 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3984 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3985 3986 PetscFunctionBegin; 3987 /* tasks normally handled by MatSetValuesBlocked() */ 3988 if (mat->insertmode == NOT_SET_VALUES) { 3989 mat->insertmode = addv; 3990 } 3991 #if defined(PETSC_USE_DEBUG) 3992 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3993 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3994 #endif 3995 if (mat->assembled) { 3996 mat->was_assembled = PETSC_TRUE; 3997 mat->assembled = PETSC_FALSE; 3998 } 3999 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4000 4001 4002 if(!barray) { 4003 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4004 baij->barray = barray; 4005 } 4006 4007 if (roworiented) { 4008 stepval = (n-1)*bs; 4009 } else { 4010 stepval = (m-1)*bs; 4011 } 4012 for (i=0; i<m; i++) { 4013 if (im[i] < 0) continue; 4014 #if defined(PETSC_USE_DEBUG) 4015 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4016 #endif 4017 if (im[i] >= rstart && im[i] < rend) { 4018 row = im[i] - rstart; 4019 for (j=0; j<n; j++) { 4020 /* If NumCol = 1 then a copy is not required */ 4021 if ((roworiented) && (n == 1)) { 4022 barray = (MatScalar*)v + i*bs2; 4023 } else if((!roworiented) && (m == 1)) { 4024 barray = (MatScalar*)v + j*bs2; 4025 } else { /* Here a copy is required */ 4026 if (roworiented) { 4027 value = v + i*(stepval+bs)*bs + j*bs; 4028 } else { 4029 value = v + j*(stepval+bs)*bs + i*bs; 4030 } 4031 for (ii=0; ii<bs; ii++,value+=stepval) { 4032 for (jj=0; jj<bs; jj++) { 4033 *barray++ = *value++; 4034 } 4035 } 4036 barray -=bs2; 4037 } 4038 4039 if (in[j] >= cstart && in[j] < cend){ 4040 col = in[j] - cstart; 4041 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4042 } 4043 else if (in[j] < 0) continue; 4044 #if defined(PETSC_USE_DEBUG) 4045 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4046 #endif 4047 else { 4048 if (mat->was_assembled) { 4049 if (!baij->colmap) { 4050 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4051 } 4052 4053 #if defined(PETSC_USE_DEBUG) 4054 #if defined (PETSC_USE_CTABLE) 4055 { PetscInt data; 4056 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4057 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4058 } 4059 #else 4060 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4061 #endif 4062 #endif 4063 #if defined (PETSC_USE_CTABLE) 4064 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4065 col = (col - 1)/bs; 4066 #else 4067 col = (baij->colmap[in[j]] - 1)/bs; 4068 #endif 4069 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4070 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4071 col = in[j]; 4072 } 4073 } 4074 else col = in[j]; 4075 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4076 } 4077 } 4078 } else { 4079 if (!baij->donotstash) { 4080 if (roworiented) { 4081 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4082 } else { 4083 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4084 } 4085 } 4086 } 4087 } 4088 4089 /* task normally handled by MatSetValuesBlocked() */ 4090 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4091 PetscFunctionReturn(0); 4092 } 4093 4094 #undef __FUNCT__ 4095 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4096 /*@ 4097 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4098 CSR format the local rows. 4099 4100 Collective on MPI_Comm 4101 4102 Input Parameters: 4103 + comm - MPI communicator 4104 . bs - the block size, only a block size of 1 is supported 4105 . m - number of local rows (Cannot be PETSC_DECIDE) 4106 . n - This value should be the same as the local size used in creating the 4107 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4108 calculated if N is given) For square matrices n is almost always m. 4109 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4110 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4111 . i - row indices 4112 . j - column indices 4113 - a - matrix values 4114 4115 Output Parameter: 4116 . mat - the matrix 4117 4118 Level: intermediate 4119 4120 Notes: 4121 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4122 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4123 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4124 4125 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4126 4127 .keywords: matrix, aij, compressed row, sparse, parallel 4128 4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4130 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4131 @*/ 4132 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4133 { 4134 PetscErrorCode ierr; 4135 4136 4137 PetscFunctionBegin; 4138 if (i[0]) { 4139 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4140 } 4141 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4142 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4143 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4144 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4145 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4146 PetscFunctionReturn(0); 4147 } 4148