1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = PetscSqrtReal(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1060 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(&A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1074 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1075 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1078 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1079 int fd; 1080 PetscScalar *column_values; 1081 FILE *file; 1082 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1083 PetscInt message_count,flowcontrolcount; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 for (i=0; i<a->mbs; i++) { 1109 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1110 } 1111 /* store the row lengths to the file */ 1112 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1126 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1127 for (k=0; k<rlen; k++) { 1128 for (j=0; j<bs; j++) { 1129 row_lens[k*bs+j] = bs*crow_lens[k]; 1130 } 1131 } 1132 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1133 } 1134 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1135 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1138 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1139 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1142 1143 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1144 information needed to make it for each row from a block row. This does require more communication but still not more than 1145 the communication needed for the nonzero values */ 1146 nzmax = nz; /* space a largest processor needs */ 1147 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1148 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1149 cnt = 0; 1150 for (i=0; i<a->mbs; i++) { 1151 pcnt = cnt; 1152 for (j=B->i[i]; j<B->i[i+1]; j++) { 1153 if ( (col = garray[B->j[j]]) > cstart) break; 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*col+l; 1156 } 1157 } 1158 for (k=A->i[i]; k<A->i[i+1]; k++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1161 } 1162 } 1163 for (; j<B->i[i+1]; j++) { 1164 for (l=0; l<bs; l++) { 1165 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1166 } 1167 } 1168 len = cnt - pcnt; 1169 for (k=1; k<bs; k++) { 1170 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1171 cnt += len; 1172 } 1173 } 1174 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1175 1176 /* store the columns to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1193 } 1194 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1195 1196 /* load up the numerical values */ 1197 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<a->mbs; i++) { 1200 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( garray[B->j[j]] > cstart) break; 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 for (; j<B->i[i+1]; j++) { 1219 for (l=0; l<bs; l++) { 1220 for (ll=0; ll<bs; ll++) { 1221 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1222 } 1223 } 1224 cnt += bs; 1225 } 1226 cnt += (bs-1)*rlen; 1227 } 1228 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1229 1230 /* store the column values to the file */ 1231 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1232 if (!rank) { 1233 MPI_Status status; 1234 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1237 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1238 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1245 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_values);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1251 if (file) { 1252 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatView_MPIBAIJ" 1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1260 { 1261 PetscErrorCode ierr; 1262 PetscBool iascii,isdraw,issocket,isbinary; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 if (iascii || isdraw || issocket) { 1270 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1271 } else if (isbinary) { 1272 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1282 { 1283 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 #if defined(PETSC_USE_LOG) 1288 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1289 #endif 1290 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1291 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1292 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1294 #if defined (PETSC_USE_CTABLE) 1295 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1296 #else 1297 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1300 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1301 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1302 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1306 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1307 1308 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMult_MPIBAIJ" 1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt nt; 1328 1329 PetscFunctionBegin; 1330 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1331 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1332 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1333 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1359 { 1360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscBool merged; 1363 1364 PetscFunctionBegin; 1365 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1366 /* do nondiagonal part */ 1367 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1368 if (!merged) { 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* do local part */ 1372 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1373 /* receive remote parts: note this assumes the values are not actually */ 1374 /* inserted in yy until the next line */ 1375 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 } else { 1377 /* do local part */ 1378 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* values actually were received in the Begin() but we need to call this nop */ 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1390 { 1391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 /* do nondiagonal part */ 1396 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1397 /* send it on its way */ 1398 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1399 /* do local part */ 1400 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1401 /* receive remote parts: note this assumes the values are not actually */ 1402 /* inserted in yy until the next line, which is true for my implementation*/ 1403 /* but is not perhaps always true. */ 1404 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* 1409 This only works correctly for square matrices where the subblock A->A is the 1410 diagonal block 1411 */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1421 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatScale_MPIBAIJ" 1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1430 PetscErrorCode ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1434 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1441 { 1442 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1443 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1444 PetscErrorCode ierr; 1445 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1446 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1447 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1448 1449 PetscFunctionBegin; 1450 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1451 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1452 mat->getrowactive = PETSC_TRUE; 1453 1454 if (!mat->rowvalues && (idx || v)) { 1455 /* 1456 allocate enough space to hold information from the longest row. 1457 */ 1458 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1459 PetscInt max = 1,mbs = mat->mbs,tmp; 1460 for (i=0; i<mbs; i++) { 1461 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1462 if (max < tmp) { max = tmp; } 1463 } 1464 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1465 } 1466 lrow = row - brstart; 1467 1468 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1469 if (!v) {pvA = 0; pvB = 0;} 1470 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1471 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1473 nztot = nzA + nzB; 1474 1475 cmap = mat->garray; 1476 if (v || idx) { 1477 if (nztot) { 1478 /* Sort by increasing column numbers, assuming A and B already sorted */ 1479 PetscInt imark = -1; 1480 if (v) { 1481 *v = v_p = mat->rowvalues; 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1484 else break; 1485 } 1486 imark = i; 1487 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1488 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1489 } 1490 if (idx) { 1491 *idx = idx_p = mat->rowindices; 1492 if (imark > -1) { 1493 for (i=0; i<imark; i++) { 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1495 } 1496 } else { 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) 1499 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1526 baij->getrowactive = PETSC_FALSE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1533 { 1534 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1539 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1548 Mat A = a->A,B = a->B; 1549 PetscErrorCode ierr; 1550 PetscReal isend[5],irecv[5]; 1551 1552 PetscFunctionBegin; 1553 info->block_size = (PetscReal)matin->rmap->bs; 1554 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1556 isend[3] = info->memory; isend[4] = info->mallocs; 1557 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else { 1581 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 } 1583 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1584 info->fill_ratio_needed = 0; 1585 info->factor_mallocs = 0; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1592 { 1593 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 switch (op) { 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1600 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1601 case MAT_KEEP_NONZERO_PATTERN: 1602 case MAT_NEW_NONZERO_LOCATION_ERR: 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_ROW_ORIENTED: 1607 a->roworiented = flg; 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1610 break; 1611 case MAT_NEW_DIAGONALS: 1612 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1613 break; 1614 case MAT_IGNORE_OFF_PROC_ENTRIES: 1615 a->donotstash = flg; 1616 break; 1617 case MAT_USE_HASH_TABLE: 1618 a->ht_flag = flg; 1619 break; 1620 case MAT_SYMMETRIC: 1621 case MAT_STRUCTURALLY_SYMMETRIC: 1622 case MAT_HERMITIAN: 1623 case MAT_SYMMETRY_ETERNAL: 1624 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1625 break; 1626 default: 1627 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1637 Mat_SeqBAIJ *Aloc; 1638 Mat B; 1639 PetscErrorCode ierr; 1640 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1641 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1642 MatScalar *a; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1649 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1650 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1651 } else { 1652 B = *matout; 1653 } 1654 1655 /* copy over the A part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1659 1660 for (i=0; i<mbs; i++) { 1661 rvals[0] = bs*(baij->rstartbs + i); 1662 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1663 for (j=ai[i]; j<ai[i+1]; j++) { 1664 col = (baij->cstartbs+aj[j])*bs; 1665 for (k=0; k<bs; k++) { 1666 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1667 col++; a += bs; 1668 } 1669 } 1670 } 1671 /* copy over the B part */ 1672 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1674 for (i=0; i<mbs; i++) { 1675 rvals[0] = bs*(baij->rstartbs + i); 1676 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1677 for (j=ai[i]; j<ai[i+1]; j++) { 1678 col = baij->garray[aj[j]]*bs; 1679 for (k=0; k<bs; k++) { 1680 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1681 col++; a += bs; 1682 } 1683 } 1684 } 1685 ierr = PetscFree(rvals);CHKERRQ(ierr); 1686 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 1689 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1690 *matout = B; 1691 } else { 1692 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1700 { 1701 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1702 Mat a = baij->A,b = baij->B; 1703 PetscErrorCode ierr; 1704 PetscInt s1,s2,s3; 1705 1706 PetscFunctionBegin; 1707 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1708 if (rr) { 1709 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1710 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1711 /* Overlap communication with computation. */ 1712 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1713 } 1714 if (ll) { 1715 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1716 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1717 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1718 } 1719 /* scale the diagonal block */ 1720 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1721 1722 if (rr) { 1723 /* Do a scatter end and then right scale the off-diagonal block */ 1724 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1726 } 1727 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1734 { 1735 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1738 PetscInt i,*owners = A->rmap->range; 1739 PetscInt *nprocs,j,idx,nsends,row; 1740 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1741 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1742 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1743 MPI_Comm comm = ((PetscObject)A)->comm; 1744 MPI_Request *send_waits,*recv_waits; 1745 MPI_Status recv_status,*send_status; 1746 const PetscScalar *xx; 1747 PetscScalar *bb; 1748 #if defined(PETSC_DEBUG) 1749 PetscBool found = PETSC_FALSE; 1750 #endif 1751 1752 PetscFunctionBegin; 1753 /* first count number of contributors to each processor */ 1754 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1755 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1757 j = 0; 1758 for (i=0; i<N; i++) { 1759 if (lastidx > (idx = rows[i])) j = 0; 1760 lastidx = idx; 1761 for (; j<size; j++) { 1762 if (idx >= owners[j] && idx < owners[j+1]) { 1763 nprocs[2*j]++; 1764 nprocs[2*j+1] = 1; 1765 owner[i] = j; 1766 #if defined(PETSC_DEBUG) 1767 found = PETSC_TRUE; 1768 #endif 1769 break; 1770 } 1771 } 1772 #if defined(PETSC_DEBUG) 1773 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1774 found = PETSC_FALSE; 1775 #endif 1776 } 1777 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1778 1779 if (A->nooffproczerorows) { 1780 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1781 nrecvs = nsends; 1782 nmax = N; 1783 } else { 1784 /* inform other processors of number of messages and max length*/ 1785 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1786 } 1787 1788 /* post receives: */ 1789 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1790 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1791 for (i=0; i<nrecvs; i++) { 1792 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1793 } 1794 1795 /* do sends: 1796 1) starts[i] gives the starting index in svalues for stuff going to 1797 the ith processor 1798 */ 1799 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1800 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1801 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1802 starts[0] = 0; 1803 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1804 for (i=0; i<N; i++) { 1805 svalues[starts[owner[i]]++] = rows[i]; 1806 } 1807 1808 starts[0] = 0; 1809 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1810 count = 0; 1811 for (i=0; i<size; i++) { 1812 if (nprocs[2*i+1]) { 1813 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1814 } 1815 } 1816 ierr = PetscFree(starts);CHKERRQ(ierr); 1817 1818 base = owners[rank]; 1819 1820 /* wait on receives */ 1821 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1822 count = nrecvs; 1823 slen = 0; 1824 while (count) { 1825 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1826 /* unpack receives into our local space */ 1827 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1828 source[imdex] = recv_status.MPI_SOURCE; 1829 lens[imdex] = n; 1830 slen += n; 1831 count--; 1832 } 1833 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1834 1835 /* move the data into the send scatter */ 1836 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1837 count = 0; 1838 for (i=0; i<nrecvs; i++) { 1839 values = rvalues + i*nmax; 1840 for (j=0; j<lens[i]; j++) { 1841 lrows[count++] = values[j] - base; 1842 } 1843 } 1844 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1845 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1846 ierr = PetscFree(owner);CHKERRQ(ierr); 1847 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1848 1849 /* fix right hand side if needed */ 1850 if (x && b) { 1851 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1852 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1853 for (i=0; i<slen; i++) { 1854 bb[lrows[i]] = diag*xx[lrows[i]]; 1855 } 1856 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1857 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1858 } 1859 1860 /* actually zap the local rows */ 1861 /* 1862 Zero the required rows. If the "diagonal block" of the matrix 1863 is square and the user wishes to set the diagonal we use separate 1864 code so that MatSetValues() is not called for each diagonal allocating 1865 new memory, thus calling lots of mallocs and slowing things down. 1866 1867 */ 1868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1869 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1872 } else if (diag != 0.0) { 1873 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1874 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1875 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1876 for (i=0; i<slen; i++) { 1877 row = lrows[i] + rstart_bs; 1878 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1879 } 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 } else { 1883 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1884 } 1885 1886 ierr = PetscFree(lrows);CHKERRQ(ierr); 1887 1888 /* wait on sends */ 1889 if (nsends) { 1890 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1891 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1892 ierr = PetscFree(send_status);CHKERRQ(ierr); 1893 } 1894 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1895 ierr = PetscFree(svalues);CHKERRQ(ierr); 1896 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1902 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1903 { 1904 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatEqual_MPIBAIJ" 1916 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1917 { 1918 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1919 Mat a,b,c,d; 1920 PetscBool flg; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 a = matA->A; b = matA->B; 1925 c = matB->A; d = matB->B; 1926 1927 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1928 if (flg) { 1929 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1930 } 1931 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatCopy_MPIBAIJ" 1937 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1938 { 1939 PetscErrorCode ierr; 1940 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1941 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1942 1943 PetscFunctionBegin; 1944 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1945 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1946 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1947 } else { 1948 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1949 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1950 } 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1956 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1957 { 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1967 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1968 { 1969 PetscErrorCode ierr; 1970 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1971 PetscBLASInt bnz,one=1; 1972 Mat_SeqBAIJ *x,*y; 1973 1974 PetscFunctionBegin; 1975 if (str == SAME_NONZERO_PATTERN) { 1976 PetscScalar alpha = a; 1977 x = (Mat_SeqBAIJ *)xx->A->data; 1978 y = (Mat_SeqBAIJ *)yy->A->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 x = (Mat_SeqBAIJ *)xx->B->data; 1982 y = (Mat_SeqBAIJ *)yy->B->data; 1983 bnz = PetscBLASIntCast(x->nz); 1984 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1985 } else { 1986 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1993 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1994 { 1995 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1996 PetscInt rbs,cbs; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2001 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2002 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2003 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2004 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2005 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2011 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2012 { 2013 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2018 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2025 { 2026 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2031 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2038 { 2039 PetscErrorCode ierr; 2040 IS iscol_local; 2041 PetscInt csize; 2042 2043 PetscFunctionBegin; 2044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2045 if (call == MAT_REUSE_MATRIX) { 2046 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2047 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2048 } else { 2049 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2050 } 2051 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2052 if (call == MAT_INITIAL_MATRIX) { 2053 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2054 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2061 /* 2062 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2063 in local and then by concatenating the local matrices the end result. 2064 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2065 */ 2066 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2067 { 2068 PetscErrorCode ierr; 2069 PetscMPIInt rank,size; 2070 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2071 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2072 Mat *local,M,Mreuse; 2073 MatScalar *vwork,*aa; 2074 MPI_Comm comm = ((PetscObject)mat)->comm; 2075 Mat_SeqBAIJ *aij; 2076 2077 2078 PetscFunctionBegin; 2079 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2081 2082 if (call == MAT_REUSE_MATRIX) { 2083 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2084 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2085 local = &Mreuse; 2086 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2089 Mreuse = *local; 2090 ierr = PetscFree(local);CHKERRQ(ierr); 2091 } 2092 2093 /* 2094 m - number of local rows 2095 n - number of columns (same on all processors) 2096 rstart - first row in new global matrix generated 2097 */ 2098 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2099 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2100 m = m/bs; 2101 n = n/bs; 2102 2103 if (call == MAT_INITIAL_MATRIX) { 2104 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2105 ii = aij->i; 2106 jj = aij->j; 2107 2108 /* 2109 Determine the number of non-zeros in the diagonal and off-diagonal 2110 portions of the matrix in order to do correct preallocation 2111 */ 2112 2113 /* first get start and end of "diagonal" columns */ 2114 if (csize == PETSC_DECIDE) { 2115 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2116 if (mglobal == n*bs) { /* square matrix */ 2117 nlocal = m; 2118 } else { 2119 nlocal = n/size + ((n % size) > rank); 2120 } 2121 } else { 2122 nlocal = csize/bs; 2123 } 2124 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2125 rstart = rend - nlocal; 2126 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2130 olens = dlens + m; 2131 for (i=0; i<m; i++) { 2132 jend = ii[i+1] - ii[i]; 2133 olen = 0; 2134 dlen = 0; 2135 for (j=0; j<jend; j++) { 2136 if (*jj < rstart || *jj >= rend) olen++; 2137 else dlen++; 2138 jj++; 2139 } 2140 olens[i] = olen; 2141 dlens[i] = dlen; 2142 } 2143 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2144 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2145 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2146 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2147 ierr = PetscFree(dlens);CHKERRQ(ierr); 2148 } else { 2149 PetscInt ml,nl; 2150 2151 M = *newmat; 2152 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2153 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2154 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2155 /* 2156 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2157 rather than the slower MatSetValues(). 2158 */ 2159 M->was_assembled = PETSC_TRUE; 2160 M->assembled = PETSC_FALSE; 2161 } 2162 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2163 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2164 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 aa = aij->a; 2168 for (i=0; i<m; i++) { 2169 row = rstart/bs + i; 2170 nz = ii[i+1] - ii[i]; 2171 cwork = jj; jj += nz; 2172 vwork = aa; aa += nz; 2173 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 2176 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 *newmat = M; 2179 2180 /* save submatrix used in processor for next request */ 2181 if (call == MAT_INITIAL_MATRIX) { 2182 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2183 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2184 } 2185 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatPermute_MPIBAIJ" 2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2192 { 2193 MPI_Comm comm,pcomm; 2194 PetscInt first,local_size,nrows; 2195 const PetscInt *rows; 2196 PetscMPIInt size; 2197 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2202 /* make a collective version of 'rowp' */ 2203 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2204 if (pcomm==comm) { 2205 crowp = rowp; 2206 } else { 2207 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2208 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2209 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2211 } 2212 /* collect the global row permutation and invert it */ 2213 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2214 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2215 if (pcomm!=comm) { 2216 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2217 } 2218 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2219 /* get the local target indices */ 2220 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2224 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2225 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2226 /* the column permutation is so much easier; 2227 make a local version of 'colp' and invert it */ 2228 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2229 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2230 if (size==1) { 2231 lcolp = colp; 2232 } else { 2233 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2234 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2235 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2236 } 2237 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2238 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2239 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2240 if (size>1) { 2241 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2242 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2243 } 2244 /* now we just get the submatrix */ 2245 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2246 /* clean up */ 2247 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2248 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2254 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2255 { 2256 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2257 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2258 2259 PetscFunctionBegin; 2260 if (nghosts) { *nghosts = B->nbs;} 2261 if (ghosts) {*ghosts = baij->garray;} 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2269 /* 2270 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2271 */ 2272 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2273 { 2274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2275 PetscErrorCode ierr; 2276 PetscMPIInt size,*ncolsonproc,*disp,nn; 2277 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2278 const PetscInt *is; 2279 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2280 PetscInt *rowhit,M,cstart,cend,colb; 2281 PetscInt *columnsforrow,l; 2282 IS *isa; 2283 PetscBool done,flg; 2284 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2285 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2286 2287 PetscFunctionBegin; 2288 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2289 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2290 2291 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2292 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2293 M = mat->rmap->n/bs; 2294 cstart = mat->cmap->rstart/bs; 2295 cend = mat->cmap->rend/bs; 2296 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2297 c->N = mat->cmap->N/bs; 2298 c->m = mat->rmap->n/bs; 2299 c->rstart = mat->rmap->rstart/bs; 2300 2301 c->ncolors = nis; 2302 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2307 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2308 2309 /* Allow access to data structures of local part of matrix */ 2310 if (!baij->colmap) { 2311 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2312 } 2313 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2314 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2315 2316 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2317 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2318 2319 for (i=0; i<nis; i++) { 2320 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2321 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2322 c->ncolumns[i] = n; 2323 if (n) { 2324 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2325 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2327 } else { 2328 c->columns[i] = 0; 2329 } 2330 2331 if (ctype == IS_COLORING_GLOBAL){ 2332 /* Determine the total (parallel) number of columns of this color */ 2333 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2334 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2335 2336 nn = PetscMPIIntCast(n); 2337 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2338 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2339 if (!nctot) { 2340 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2341 } 2342 2343 disp[0] = 0; 2344 for (j=1; j<size; j++) { 2345 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2346 } 2347 2348 /* Get complete list of columns for color on each processor */ 2349 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2350 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2351 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2352 } else if (ctype == IS_COLORING_GHOSTED){ 2353 /* Determine local number of columns of this color on this process, including ghost points */ 2354 nctot = n; 2355 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2356 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2357 } else { 2358 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2359 } 2360 2361 /* 2362 Mark all rows affect by these columns 2363 */ 2364 /* Temporary option to allow for debugging/testing */ 2365 flg = PETSC_FALSE; 2366 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2367 if (!flg) {/*-----------------------------------------------------------------------------*/ 2368 /* crude, fast version */ 2369 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2370 /* loop over columns*/ 2371 for (j=0; j<nctot; j++) { 2372 if (ctype == IS_COLORING_GHOSTED) { 2373 col = ltog[cols[j]]; 2374 } else { 2375 col = cols[j]; 2376 } 2377 if (col >= cstart && col < cend) { 2378 /* column is in diagonal block of matrix */ 2379 rows = A_cj + A_ci[col-cstart]; 2380 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2381 } else { 2382 #if defined (PETSC_USE_CTABLE) 2383 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2384 colb --; 2385 #else 2386 colb = baij->colmap[col] - 1; 2387 #endif 2388 if (colb == -1) { 2389 m = 0; 2390 } else { 2391 colb = colb/bs; 2392 rows = B_cj + B_ci[colb]; 2393 m = B_ci[colb+1] - B_ci[colb]; 2394 } 2395 } 2396 /* loop over columns marking them in rowhit */ 2397 for (k=0; k<m; k++) { 2398 rowhit[*rows++] = col + 1; 2399 } 2400 } 2401 2402 /* count the number of hits */ 2403 nrows = 0; 2404 for (j=0; j<M; j++) { 2405 if (rowhit[j]) nrows++; 2406 } 2407 c->nrows[i] = nrows; 2408 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2409 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2410 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2411 nrows = 0; 2412 for (j=0; j<M; j++) { 2413 if (rowhit[j]) { 2414 c->rows[i][nrows] = j; 2415 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2416 nrows++; 2417 } 2418 } 2419 } else {/*-------------------------------------------------------------------------------*/ 2420 /* slow version, using rowhit as a linked list */ 2421 PetscInt currentcol,fm,mfm; 2422 rowhit[M] = M; 2423 nrows = 0; 2424 /* loop over columns*/ 2425 for (j=0; j<nctot; j++) { 2426 if (ctype == IS_COLORING_GHOSTED) { 2427 col = ltog[cols[j]]; 2428 } else { 2429 col = cols[j]; 2430 } 2431 if (col >= cstart && col < cend) { 2432 /* column is in diagonal block of matrix */ 2433 rows = A_cj + A_ci[col-cstart]; 2434 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2435 } else { 2436 #if defined (PETSC_USE_CTABLE) 2437 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2438 colb --; 2439 #else 2440 colb = baij->colmap[col] - 1; 2441 #endif 2442 if (colb == -1) { 2443 m = 0; 2444 } else { 2445 colb = colb/bs; 2446 rows = B_cj + B_ci[colb]; 2447 m = B_ci[colb+1] - B_ci[colb]; 2448 } 2449 } 2450 2451 /* loop over columns marking them in rowhit */ 2452 fm = M; /* fm points to first entry in linked list */ 2453 for (k=0; k<m; k++) { 2454 currentcol = *rows++; 2455 /* is it already in the list? */ 2456 do { 2457 mfm = fm; 2458 fm = rowhit[fm]; 2459 } while (fm < currentcol); 2460 /* not in list so add it */ 2461 if (fm != currentcol) { 2462 nrows++; 2463 columnsforrow[currentcol] = col; 2464 /* next three lines insert new entry into linked list */ 2465 rowhit[mfm] = currentcol; 2466 rowhit[currentcol] = fm; 2467 fm = currentcol; 2468 /* fm points to present position in list since we know the columns are sorted */ 2469 } else { 2470 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2471 } 2472 } 2473 } 2474 c->nrows[i] = nrows; 2475 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2477 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2478 /* now store the linked list of rows into c->rows[i] */ 2479 nrows = 0; 2480 fm = rowhit[M]; 2481 do { 2482 c->rows[i][nrows] = fm; 2483 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2484 fm = rowhit[fm]; 2485 } while (fm < M); 2486 } /* ---------------------------------------------------------------------------------------*/ 2487 ierr = PetscFree(cols);CHKERRQ(ierr); 2488 } 2489 2490 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2491 /* 2492 vscale will contain the "diagonal" on processor scalings followed by the off processor 2493 */ 2494 if (ctype == IS_COLORING_GLOBAL) { 2495 PetscInt *garray; 2496 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2497 for (i=0; i<baij->B->cmap->n/bs; i++) { 2498 for (j=0; j<bs; j++) { 2499 garray[i*bs+j] = bs*baij->garray[i]+j; 2500 } 2501 } 2502 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2503 ierr = PetscFree(garray);CHKERRQ(ierr); 2504 CHKMEMQ; 2505 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2506 for (k=0; k<c->ncolors; k++) { 2507 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2508 for (l=0; l<c->nrows[k]; l++) { 2509 col = c->columnsforrow[k][l]; 2510 if (col >= cstart && col < cend) { 2511 /* column is in diagonal block of matrix */ 2512 colb = col - cstart; 2513 } else { 2514 /* column is in "off-processor" part */ 2515 #if defined (PETSC_USE_CTABLE) 2516 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2517 colb --; 2518 #else 2519 colb = baij->colmap[col] - 1; 2520 #endif 2521 colb = colb/bs; 2522 colb += cend - cstart; 2523 } 2524 c->vscaleforrow[k][l] = colb; 2525 } 2526 } 2527 } else if (ctype == IS_COLORING_GHOSTED) { 2528 /* Get gtol mapping */ 2529 PetscInt N = mat->cmap->N, *gtol; 2530 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2531 for (i=0; i<N; i++) gtol[i] = -1; 2532 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2533 2534 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2535 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2536 for (k=0; k<c->ncolors; k++) { 2537 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2538 for (l=0; l<c->nrows[k]; l++) { 2539 col = c->columnsforrow[k][l]; /* global column index */ 2540 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2541 } 2542 } 2543 ierr = PetscFree(gtol);CHKERRQ(ierr); 2544 } 2545 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2546 2547 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2548 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2549 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2550 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2551 CHKMEMQ; 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2557 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2558 { 2559 Mat B; 2560 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2561 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2562 Mat_SeqAIJ *b; 2563 PetscErrorCode ierr; 2564 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2565 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2566 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2567 2568 PetscFunctionBegin; 2569 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2570 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2571 2572 /* ---------------------------------------------------------------- 2573 Tell every processor the number of nonzeros per row 2574 */ 2575 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2576 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2577 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2578 } 2579 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2580 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2581 displs = recvcounts + size; 2582 for (i=0; i<size; i++) { 2583 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2584 displs[i] = A->rmap->range[i]/bs; 2585 } 2586 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2587 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2588 #else 2589 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2590 #endif 2591 /* --------------------------------------------------------------- 2592 Create the sequential matrix of the same type as the local block diagonal 2593 */ 2594 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2595 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2596 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2598 b = (Mat_SeqAIJ *)B->data; 2599 2600 /*-------------------------------------------------------------------- 2601 Copy my part of matrix column indices over 2602 */ 2603 sendcount = ad->nz + bd->nz; 2604 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2605 a_jsendbuf = ad->j; 2606 b_jsendbuf = bd->j; 2607 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2608 cnt = 0; 2609 for (i=0; i<n; i++) { 2610 2611 /* put in lower diagonal portion */ 2612 m = bd->i[i+1] - bd->i[i]; 2613 while (m > 0) { 2614 /* is it above diagonal (in bd (compressed) numbering) */ 2615 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2616 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2617 m--; 2618 } 2619 2620 /* put in diagonal portion */ 2621 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2622 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2623 } 2624 2625 /* put in upper diagonal portion */ 2626 while (m-- > 0) { 2627 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2628 } 2629 } 2630 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2631 2632 /*-------------------------------------------------------------------- 2633 Gather all column indices to all processors 2634 */ 2635 for (i=0; i<size; i++) { 2636 recvcounts[i] = 0; 2637 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2638 recvcounts[i] += lens[j]; 2639 } 2640 } 2641 displs[0] = 0; 2642 for (i=1; i<size; i++) { 2643 displs[i] = displs[i-1] + recvcounts[i-1]; 2644 } 2645 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2646 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2647 #else 2648 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2649 #endif 2650 /*-------------------------------------------------------------------- 2651 Assemble the matrix into useable form (note numerical values not yet set) 2652 */ 2653 /* set the b->ilen (length of each row) values */ 2654 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2655 /* set the b->i indices */ 2656 b->i[0] = 0; 2657 for (i=1; i<=A->rmap->N/bs; i++) { 2658 b->i[i] = b->i[i-1] + lens[i-1]; 2659 } 2660 ierr = PetscFree(lens);CHKERRQ(ierr); 2661 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2664 2665 if (A->symmetric){ 2666 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2667 } else if (A->hermitian) { 2668 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2669 } else if (A->structurally_symmetric) { 2670 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2671 } 2672 *newmat = B; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatSOR_MPIBAIJ" 2678 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2679 { 2680 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2681 PetscErrorCode ierr; 2682 Vec bb1 = 0; 2683 2684 PetscFunctionBegin; 2685 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2686 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2687 } 2688 2689 if (flag == SOR_APPLY_UPPER) { 2690 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2695 if (flag & SOR_ZERO_INITIAL_GUESS) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2697 its--; 2698 } 2699 2700 while (its--) { 2701 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 2704 /* update rhs: bb1 = bb - B*x */ 2705 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2706 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2707 2708 /* local sweep */ 2709 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2710 } 2711 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2712 if (flag & SOR_ZERO_INITIAL_GUESS) { 2713 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2714 its--; 2715 } 2716 while (its--) { 2717 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 2720 /* update rhs: bb1 = bb - B*x */ 2721 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2722 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2723 2724 /* local sweep */ 2725 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2726 } 2727 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2728 if (flag & SOR_ZERO_INITIAL_GUESS) { 2729 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2730 its--; 2731 } 2732 while (its--) { 2733 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2734 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 2736 /* update rhs: bb1 = bb - B*x */ 2737 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2738 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2739 2740 /* local sweep */ 2741 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2742 } 2743 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2744 2745 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2753 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values) 2754 { 2755 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 2764 /* -------------------------------------------------------------------*/ 2765 static struct _MatOps MatOps_Values = { 2766 MatSetValues_MPIBAIJ, 2767 MatGetRow_MPIBAIJ, 2768 MatRestoreRow_MPIBAIJ, 2769 MatMult_MPIBAIJ, 2770 /* 4*/ MatMultAdd_MPIBAIJ, 2771 MatMultTranspose_MPIBAIJ, 2772 MatMultTransposeAdd_MPIBAIJ, 2773 0, 2774 0, 2775 0, 2776 /*10*/ 0, 2777 0, 2778 0, 2779 MatSOR_MPIBAIJ, 2780 MatTranspose_MPIBAIJ, 2781 /*15*/ MatGetInfo_MPIBAIJ, 2782 MatEqual_MPIBAIJ, 2783 MatGetDiagonal_MPIBAIJ, 2784 MatDiagonalScale_MPIBAIJ, 2785 MatNorm_MPIBAIJ, 2786 /*20*/ MatAssemblyBegin_MPIBAIJ, 2787 MatAssemblyEnd_MPIBAIJ, 2788 MatSetOption_MPIBAIJ, 2789 MatZeroEntries_MPIBAIJ, 2790 /*24*/ MatZeroRows_MPIBAIJ, 2791 0, 2792 0, 2793 0, 2794 0, 2795 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2796 0, 2797 0, 2798 0, 2799 0, 2800 /*34*/ MatDuplicate_MPIBAIJ, 2801 0, 2802 0, 2803 0, 2804 0, 2805 /*39*/ MatAXPY_MPIBAIJ, 2806 MatGetSubMatrices_MPIBAIJ, 2807 MatIncreaseOverlap_MPIBAIJ, 2808 MatGetValues_MPIBAIJ, 2809 MatCopy_MPIBAIJ, 2810 /*44*/ 0, 2811 MatScale_MPIBAIJ, 2812 0, 2813 0, 2814 0, 2815 /*49*/ MatSetBlockSize_MPIBAIJ, 2816 0, 2817 0, 2818 0, 2819 0, 2820 /*54*/ MatFDColoringCreate_MPIBAIJ, 2821 0, 2822 MatSetUnfactored_MPIBAIJ, 2823 MatPermute_MPIBAIJ, 2824 MatSetValuesBlocked_MPIBAIJ, 2825 /*59*/ MatGetSubMatrix_MPIBAIJ, 2826 MatDestroy_MPIBAIJ, 2827 MatView_MPIBAIJ, 2828 0, 2829 0, 2830 /*64*/ 0, 2831 0, 2832 0, 2833 0, 2834 0, 2835 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2836 0, 2837 0, 2838 0, 2839 0, 2840 /*74*/ 0, 2841 MatFDColoringApply_BAIJ, 2842 0, 2843 0, 2844 0, 2845 /*79*/ 0, 2846 0, 2847 0, 2848 0, 2849 MatLoad_MPIBAIJ, 2850 /*84*/ 0, 2851 0, 2852 0, 2853 0, 2854 0, 2855 /*89*/ 0, 2856 0, 2857 0, 2858 0, 2859 0, 2860 /*94*/ 0, 2861 0, 2862 0, 2863 0, 2864 0, 2865 /*99*/ 0, 2866 0, 2867 0, 2868 0, 2869 0, 2870 /*104*/0, 2871 MatRealPart_MPIBAIJ, 2872 MatImaginaryPart_MPIBAIJ, 2873 0, 2874 0, 2875 /*109*/0, 2876 0, 2877 0, 2878 0, 2879 0, 2880 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2881 0, 2882 MatGetGhosts_MPIBAIJ, 2883 0, 2884 0, 2885 /*119*/0, 2886 0, 2887 0, 2888 0, 2889 0, 2890 /*124*/0, 2891 0, 2892 MatInvertBlockDiagonal_MPIBAIJ 2893 }; 2894 2895 EXTERN_C_BEGIN 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2898 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2899 { 2900 PetscFunctionBegin; 2901 *a = ((Mat_MPIBAIJ *)A->data)->A; 2902 PetscFunctionReturn(0); 2903 } 2904 EXTERN_C_END 2905 2906 EXTERN_C_BEGIN 2907 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2908 EXTERN_C_END 2909 2910 EXTERN_C_BEGIN 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2913 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2914 { 2915 PetscInt m,rstart,cstart,cend; 2916 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2917 const PetscInt *JJ=0; 2918 PetscScalar *values=0; 2919 PetscErrorCode ierr; 2920 2921 PetscFunctionBegin; 2922 2923 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2924 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2925 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2926 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2927 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2928 m = B->rmap->n/bs; 2929 rstart = B->rmap->rstart/bs; 2930 cstart = B->cmap->rstart/bs; 2931 cend = B->cmap->rend/bs; 2932 2933 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2934 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2935 for (i=0; i<m; i++) { 2936 nz = ii[i+1] - ii[i]; 2937 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2938 nz_max = PetscMax(nz_max,nz); 2939 JJ = jj + ii[i]; 2940 for (j=0; j<nz; j++) { 2941 if (*JJ >= cstart) break; 2942 JJ++; 2943 } 2944 d = 0; 2945 for (; j<nz; j++) { 2946 if (*JJ++ >= cend) break; 2947 d++; 2948 } 2949 d_nnz[i] = d; 2950 o_nnz[i] = nz - d; 2951 } 2952 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2953 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2954 2955 values = (PetscScalar*)V; 2956 if (!values) { 2957 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2958 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2959 } 2960 for (i=0; i<m; i++) { 2961 PetscInt row = i + rstart; 2962 PetscInt ncols = ii[i+1] - ii[i]; 2963 const PetscInt *icols = jj + ii[i]; 2964 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2965 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2966 } 2967 2968 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 2972 PetscFunctionReturn(0); 2973 } 2974 EXTERN_C_END 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2978 /*@C 2979 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2980 (the default parallel PETSc format). 2981 2982 Collective on MPI_Comm 2983 2984 Input Parameters: 2985 + A - the matrix 2986 . bs - the block size 2987 . i - the indices into j for the start of each local row (starts with zero) 2988 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2989 - v - optional values in the matrix 2990 2991 Level: developer 2992 2993 .keywords: matrix, aij, compressed row, sparse, parallel 2994 2995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2996 @*/ 2997 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2998 { 2999 PetscErrorCode ierr; 3000 3001 PetscFunctionBegin; 3002 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3003 PetscValidType(B,1); 3004 PetscValidLogicalCollectiveInt(B,bs,2); 3005 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3006 PetscFunctionReturn(0); 3007 } 3008 3009 EXTERN_C_BEGIN 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3012 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3013 { 3014 Mat_MPIBAIJ *b; 3015 PetscErrorCode ierr; 3016 PetscInt i, newbs = PetscAbs(bs); 3017 3018 PetscFunctionBegin; 3019 if (bs < 0) { 3020 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3021 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3022 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3023 bs = PetscAbs(bs); 3024 } 3025 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3026 bs = newbs; 3027 3028 3029 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3030 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3031 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3032 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3033 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3034 3035 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3036 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3037 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3038 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3039 3040 if (d_nnz) { 3041 for (i=0; i<B->rmap->n/bs; i++) { 3042 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3043 } 3044 } 3045 if (o_nnz) { 3046 for (i=0; i<B->rmap->n/bs; i++) { 3047 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3048 } 3049 } 3050 3051 b = (Mat_MPIBAIJ*)B->data; 3052 b->bs2 = bs*bs; 3053 b->mbs = B->rmap->n/bs; 3054 b->nbs = B->cmap->n/bs; 3055 b->Mbs = B->rmap->N/bs; 3056 b->Nbs = B->cmap->N/bs; 3057 3058 for (i=0; i<=b->size; i++) { 3059 b->rangebs[i] = B->rmap->range[i]/bs; 3060 } 3061 b->rstartbs = B->rmap->rstart/bs; 3062 b->rendbs = B->rmap->rend/bs; 3063 b->cstartbs = B->cmap->rstart/bs; 3064 b->cendbs = B->cmap->rend/bs; 3065 3066 if (!B->preallocated) { 3067 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3068 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3069 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3070 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3071 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3072 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3073 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3074 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3075 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3076 } 3077 3078 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3079 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3080 B->preallocated = PETSC_TRUE; 3081 PetscFunctionReturn(0); 3082 } 3083 EXTERN_C_END 3084 3085 EXTERN_C_BEGIN 3086 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3087 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3088 EXTERN_C_END 3089 3090 3091 EXTERN_C_BEGIN 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3094 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3095 { 3096 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3097 PetscErrorCode ierr; 3098 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3099 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3100 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3101 3102 PetscFunctionBegin; 3103 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3104 ii[0] = 0; 3105 CHKMEMQ; 3106 for (i=0; i<M; i++) { 3107 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3108 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3109 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3110 /* remove one from count of matrix has diagonal */ 3111 for (j=id[i]; j<id[i+1]; j++) { 3112 if (jd[j] == i) {ii[i+1]--;break;} 3113 } 3114 CHKMEMQ; 3115 } 3116 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3117 cnt = 0; 3118 for (i=0; i<M; i++) { 3119 for (j=io[i]; j<io[i+1]; j++) { 3120 if (garray[jo[j]] > rstart) break; 3121 jj[cnt++] = garray[jo[j]]; 3122 CHKMEMQ; 3123 } 3124 for (k=id[i]; k<id[i+1]; k++) { 3125 if (jd[k] != i) { 3126 jj[cnt++] = rstart + jd[k]; 3127 CHKMEMQ; 3128 } 3129 } 3130 for (;j<io[i+1]; j++) { 3131 jj[cnt++] = garray[jo[j]]; 3132 CHKMEMQ; 3133 } 3134 } 3135 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3136 PetscFunctionReturn(0); 3137 } 3138 EXTERN_C_END 3139 3140 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3141 EXTERN_C_BEGIN 3142 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3143 EXTERN_C_END 3144 3145 EXTERN_C_BEGIN 3146 #undef __FUNCT__ 3147 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3148 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3149 { 3150 PetscErrorCode ierr; 3151 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3152 Mat B; 3153 Mat_MPIAIJ *b; 3154 3155 PetscFunctionBegin; 3156 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3157 3158 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3159 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3160 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3161 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3162 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3163 b = (Mat_MPIAIJ*) B->data; 3164 3165 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3166 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3167 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3168 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3169 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3170 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3171 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3172 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3173 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3174 if (reuse == MAT_REUSE_MATRIX) { 3175 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3176 } else { 3177 *newmat = B; 3178 } 3179 PetscFunctionReturn(0); 3180 } 3181 EXTERN_C_END 3182 3183 EXTERN_C_BEGIN 3184 #if defined(PETSC_HAVE_MUMPS) 3185 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3186 #endif 3187 EXTERN_C_END 3188 3189 /*MC 3190 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3191 3192 Options Database Keys: 3193 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3194 . -mat_block_size <bs> - set the blocksize used to store the matrix 3195 - -mat_use_hash_table <fact> 3196 3197 Level: beginner 3198 3199 .seealso: MatCreateMPIBAIJ 3200 M*/ 3201 3202 EXTERN_C_BEGIN 3203 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3204 EXTERN_C_END 3205 3206 EXTERN_C_BEGIN 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatCreate_MPIBAIJ" 3209 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3210 { 3211 Mat_MPIBAIJ *b; 3212 PetscErrorCode ierr; 3213 PetscBool flg; 3214 3215 PetscFunctionBegin; 3216 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3217 B->data = (void*)b; 3218 3219 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3220 B->assembled = PETSC_FALSE; 3221 3222 B->insertmode = NOT_SET_VALUES; 3223 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3224 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3225 3226 /* build local table of row and column ownerships */ 3227 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3228 3229 /* build cache for off array entries formed */ 3230 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3231 b->donotstash = PETSC_FALSE; 3232 b->colmap = PETSC_NULL; 3233 b->garray = PETSC_NULL; 3234 b->roworiented = PETSC_TRUE; 3235 3236 /* stuff used in block assembly */ 3237 b->barray = 0; 3238 3239 /* stuff used for matrix vector multiply */ 3240 b->lvec = 0; 3241 b->Mvctx = 0; 3242 3243 /* stuff for MatGetRow() */ 3244 b->rowindices = 0; 3245 b->rowvalues = 0; 3246 b->getrowactive = PETSC_FALSE; 3247 3248 /* hash table stuff */ 3249 b->ht = 0; 3250 b->hd = 0; 3251 b->ht_size = 0; 3252 b->ht_flag = PETSC_FALSE; 3253 b->ht_fact = 0; 3254 b->ht_total_ct = 0; 3255 b->ht_insert_ct = 0; 3256 3257 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3258 b->ijonly = PETSC_FALSE; 3259 3260 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3261 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3262 if (flg) { 3263 PetscReal fact = 1.39; 3264 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3265 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3266 if (fact <= 1.0) fact = 1.39; 3267 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3268 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3269 } 3270 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3271 3272 #if defined(PETSC_HAVE_MUMPS) 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3274 #endif 3275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3276 "MatConvert_MPIBAIJ_MPIAdj", 3277 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3278 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3279 "MatConvert_MPIBAIJ_MPIAIJ", 3280 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3281 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3282 "MatConvert_MPIBAIJ_MPISBAIJ", 3283 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3284 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3285 "MatStoreValues_MPIBAIJ", 3286 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3287 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3288 "MatRetrieveValues_MPIBAIJ", 3289 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3291 "MatGetDiagonalBlock_MPIBAIJ", 3292 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3293 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3294 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3295 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3296 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3297 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3298 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3299 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3300 "MatDiagonalScaleLocal_MPIBAIJ", 3301 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3302 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3303 "MatSetHashTableFactor_MPIBAIJ", 3304 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3305 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3306 "MatConvert_MPIBAIJ_MPIBSTRM", 3307 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3308 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3309 PetscFunctionReturn(0); 3310 } 3311 EXTERN_C_END 3312 3313 /*MC 3314 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3315 3316 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3317 and MATMPIBAIJ otherwise. 3318 3319 Options Database Keys: 3320 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3321 3322 Level: beginner 3323 3324 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3325 M*/ 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3329 /*@C 3330 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3331 (block compressed row). For good matrix assembly performance 3332 the user should preallocate the matrix storage by setting the parameters 3333 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3334 performance can be increased by more than a factor of 50. 3335 3336 Collective on Mat 3337 3338 Input Parameters: 3339 + A - the matrix 3340 . bs - size of blockk 3341 . d_nz - number of block nonzeros per block row in diagonal portion of local 3342 submatrix (same for all local rows) 3343 . d_nnz - array containing the number of block nonzeros in the various block rows 3344 of the in diagonal portion of the local (possibly different for each block 3345 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3346 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3347 submatrix (same for all local rows). 3348 - o_nnz - array containing the number of nonzeros in the various block rows of the 3349 off-diagonal portion of the local submatrix (possibly different for 3350 each block row) or PETSC_NULL. 3351 3352 If the *_nnz parameter is given then the *_nz parameter is ignored 3353 3354 Options Database Keys: 3355 + -mat_block_size - size of the blocks to use 3356 - -mat_use_hash_table <fact> 3357 3358 Notes: 3359 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3360 than it must be used on all processors that share the object for that argument. 3361 3362 Storage Information: 3363 For a square global matrix we define each processor's diagonal portion 3364 to be its local rows and the corresponding columns (a square submatrix); 3365 each processor's off-diagonal portion encompasses the remainder of the 3366 local matrix (a rectangular submatrix). 3367 3368 The user can specify preallocated storage for the diagonal part of 3369 the local submatrix with either d_nz or d_nnz (not both). Set 3370 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3371 memory allocation. Likewise, specify preallocated storage for the 3372 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3373 3374 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3375 the figure below we depict these three local rows and all columns (0-11). 3376 3377 .vb 3378 0 1 2 3 4 5 6 7 8 9 10 11 3379 ------------------- 3380 row 3 | o o o d d d o o o o o o 3381 row 4 | o o o d d d o o o o o o 3382 row 5 | o o o d d d o o o o o o 3383 ------------------- 3384 .ve 3385 3386 Thus, any entries in the d locations are stored in the d (diagonal) 3387 submatrix, and any entries in the o locations are stored in the 3388 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3389 stored simply in the MATSEQBAIJ format for compressed row storage. 3390 3391 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3392 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3393 In general, for PDE problems in which most nonzeros are near the diagonal, 3394 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3395 or you will get TERRIBLE performance; see the users' manual chapter on 3396 matrices. 3397 3398 You can call MatGetInfo() to get information on how effective the preallocation was; 3399 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3400 You can also run with the option -info and look for messages with the string 3401 malloc in them to see if additional memory allocation was needed. 3402 3403 Level: intermediate 3404 3405 .keywords: matrix, block, aij, compressed row, sparse, parallel 3406 3407 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3408 @*/ 3409 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3410 { 3411 PetscErrorCode ierr; 3412 3413 PetscFunctionBegin; 3414 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3415 PetscValidType(B,1); 3416 PetscValidLogicalCollectiveInt(B,bs,2); 3417 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3418 PetscFunctionReturn(0); 3419 } 3420 3421 #undef __FUNCT__ 3422 #define __FUNCT__ "MatCreateMPIBAIJ" 3423 /*@C 3424 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3425 (block compressed row). For good matrix assembly performance 3426 the user should preallocate the matrix storage by setting the parameters 3427 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3428 performance can be increased by more than a factor of 50. 3429 3430 Collective on MPI_Comm 3431 3432 Input Parameters: 3433 + comm - MPI communicator 3434 . bs - size of blockk 3435 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3436 This value should be the same as the local size used in creating the 3437 y vector for the matrix-vector product y = Ax. 3438 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3439 This value should be the same as the local size used in creating the 3440 x vector for the matrix-vector product y = Ax. 3441 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3442 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3443 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3444 submatrix (same for all local rows) 3445 . d_nnz - array containing the number of nonzero blocks in the various block rows 3446 of the in diagonal portion of the local (possibly different for each block 3447 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3448 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3449 submatrix (same for all local rows). 3450 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3451 off-diagonal portion of the local submatrix (possibly different for 3452 each block row) or PETSC_NULL. 3453 3454 Output Parameter: 3455 . A - the matrix 3456 3457 Options Database Keys: 3458 + -mat_block_size - size of the blocks to use 3459 - -mat_use_hash_table <fact> 3460 3461 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3462 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3463 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3464 3465 Notes: 3466 If the *_nnz parameter is given then the *_nz parameter is ignored 3467 3468 A nonzero block is any block that as 1 or more nonzeros in it 3469 3470 The user MUST specify either the local or global matrix dimensions 3471 (possibly both). 3472 3473 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3474 than it must be used on all processors that share the object for that argument. 3475 3476 Storage Information: 3477 For a square global matrix we define each processor's diagonal portion 3478 to be its local rows and the corresponding columns (a square submatrix); 3479 each processor's off-diagonal portion encompasses the remainder of the 3480 local matrix (a rectangular submatrix). 3481 3482 The user can specify preallocated storage for the diagonal part of 3483 the local submatrix with either d_nz or d_nnz (not both). Set 3484 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3485 memory allocation. Likewise, specify preallocated storage for the 3486 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3487 3488 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3489 the figure below we depict these three local rows and all columns (0-11). 3490 3491 .vb 3492 0 1 2 3 4 5 6 7 8 9 10 11 3493 ------------------- 3494 row 3 | o o o d d d o o o o o o 3495 row 4 | o o o d d d o o o o o o 3496 row 5 | o o o d d d o o o o o o 3497 ------------------- 3498 .ve 3499 3500 Thus, any entries in the d locations are stored in the d (diagonal) 3501 submatrix, and any entries in the o locations are stored in the 3502 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3503 stored simply in the MATSEQBAIJ format for compressed row storage. 3504 3505 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3506 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3507 In general, for PDE problems in which most nonzeros are near the diagonal, 3508 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3509 or you will get TERRIBLE performance; see the users' manual chapter on 3510 matrices. 3511 3512 Level: intermediate 3513 3514 .keywords: matrix, block, aij, compressed row, sparse, parallel 3515 3516 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3517 @*/ 3518 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3519 { 3520 PetscErrorCode ierr; 3521 PetscMPIInt size; 3522 3523 PetscFunctionBegin; 3524 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3525 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 if (size > 1) { 3528 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3529 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3530 } else { 3531 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3532 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3533 } 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3539 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3540 { 3541 Mat mat; 3542 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3543 PetscErrorCode ierr; 3544 PetscInt len=0; 3545 3546 PetscFunctionBegin; 3547 *newmat = 0; 3548 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3549 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3550 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3551 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3552 3553 mat->factortype = matin->factortype; 3554 mat->preallocated = PETSC_TRUE; 3555 mat->assembled = PETSC_TRUE; 3556 mat->insertmode = NOT_SET_VALUES; 3557 3558 a = (Mat_MPIBAIJ*)mat->data; 3559 mat->rmap->bs = matin->rmap->bs; 3560 a->bs2 = oldmat->bs2; 3561 a->mbs = oldmat->mbs; 3562 a->nbs = oldmat->nbs; 3563 a->Mbs = oldmat->Mbs; 3564 a->Nbs = oldmat->Nbs; 3565 3566 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3567 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3568 3569 a->size = oldmat->size; 3570 a->rank = oldmat->rank; 3571 a->donotstash = oldmat->donotstash; 3572 a->roworiented = oldmat->roworiented; 3573 a->rowindices = 0; 3574 a->rowvalues = 0; 3575 a->getrowactive = PETSC_FALSE; 3576 a->barray = 0; 3577 a->rstartbs = oldmat->rstartbs; 3578 a->rendbs = oldmat->rendbs; 3579 a->cstartbs = oldmat->cstartbs; 3580 a->cendbs = oldmat->cendbs; 3581 3582 /* hash table stuff */ 3583 a->ht = 0; 3584 a->hd = 0; 3585 a->ht_size = 0; 3586 a->ht_flag = oldmat->ht_flag; 3587 a->ht_fact = oldmat->ht_fact; 3588 a->ht_total_ct = 0; 3589 a->ht_insert_ct = 0; 3590 3591 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3592 if (oldmat->colmap) { 3593 #if defined (PETSC_USE_CTABLE) 3594 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3595 #else 3596 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3597 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3598 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3599 #endif 3600 } else a->colmap = 0; 3601 3602 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3603 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3604 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3605 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3606 } else a->garray = 0; 3607 3608 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3609 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3610 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3611 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3612 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3613 3614 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3615 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3616 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3617 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3618 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3619 *newmat = mat; 3620 3621 PetscFunctionReturn(0); 3622 } 3623 3624 #undef __FUNCT__ 3625 #define __FUNCT__ "MatLoad_MPIBAIJ" 3626 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3627 { 3628 PetscErrorCode ierr; 3629 int fd; 3630 PetscInt i,nz,j,rstart,rend; 3631 PetscScalar *vals,*buf; 3632 MPI_Comm comm = ((PetscObject)viewer)->comm; 3633 MPI_Status status; 3634 PetscMPIInt rank,size,maxnz; 3635 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3636 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3637 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3638 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3639 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3640 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3641 3642 PetscFunctionBegin; 3643 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3644 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3645 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3646 3647 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3648 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3649 if (!rank) { 3650 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3651 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3652 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3653 } 3654 3655 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3656 3657 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3658 M = header[1]; N = header[2]; 3659 3660 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3661 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3662 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3663 3664 /* If global sizes are set, check if they are consistent with that given in the file */ 3665 if (sizesset) { 3666 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3667 } 3668 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3669 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3670 3671 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3672 3673 /* 3674 This code adds extra rows to make sure the number of rows is 3675 divisible by the blocksize 3676 */ 3677 Mbs = M/bs; 3678 extra_rows = bs - M + bs*Mbs; 3679 if (extra_rows == bs) extra_rows = 0; 3680 else Mbs++; 3681 if (extra_rows && !rank) { 3682 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3683 } 3684 3685 /* determine ownership of all rows */ 3686 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3687 mbs = Mbs/size + ((Mbs % size) > rank); 3688 m = mbs*bs; 3689 } else { /* User set */ 3690 m = newmat->rmap->n; 3691 mbs = m/bs; 3692 } 3693 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3694 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3695 3696 /* process 0 needs enough room for process with most rows */ 3697 if (!rank) { 3698 mmax = rowners[1]; 3699 for (i=2; i<size; i++) { 3700 mmax = PetscMax(mmax,rowners[i]); 3701 } 3702 mmax*=bs; 3703 } else mmax = m; 3704 3705 rowners[0] = 0; 3706 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3707 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3708 rstart = rowners[rank]; 3709 rend = rowners[rank+1]; 3710 3711 /* distribute row lengths to all processors */ 3712 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3713 if (!rank) { 3714 mend = m; 3715 if (size == 1) mend = mend - extra_rows; 3716 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3717 for (j=mend; j<m; j++) locrowlens[j] = 1; 3718 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3719 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3720 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3721 for (j=0; j<m; j++) { 3722 procsnz[0] += locrowlens[j]; 3723 } 3724 for (i=1; i<size; i++) { 3725 mend = browners[i+1] - browners[i]; 3726 if (i == size-1) mend = mend - extra_rows; 3727 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3728 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3729 /* calculate the number of nonzeros on each processor */ 3730 for (j=0; j<browners[i+1]-browners[i]; j++) { 3731 procsnz[i] += rowlengths[j]; 3732 } 3733 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3734 } 3735 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3736 } else { 3737 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3738 } 3739 3740 if (!rank) { 3741 /* determine max buffer needed and allocate it */ 3742 maxnz = procsnz[0]; 3743 for (i=1; i<size; i++) { 3744 maxnz = PetscMax(maxnz,procsnz[i]); 3745 } 3746 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3747 3748 /* read in my part of the matrix column indices */ 3749 nz = procsnz[0]; 3750 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3751 mycols = ibuf; 3752 if (size == 1) nz -= extra_rows; 3753 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3754 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3755 3756 /* read in every ones (except the last) and ship off */ 3757 for (i=1; i<size-1; i++) { 3758 nz = procsnz[i]; 3759 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3760 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3761 } 3762 /* read in the stuff for the last proc */ 3763 if (size != 1) { 3764 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3765 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3766 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3767 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3768 } 3769 ierr = PetscFree(cols);CHKERRQ(ierr); 3770 } else { 3771 /* determine buffer space needed for message */ 3772 nz = 0; 3773 for (i=0; i<m; i++) { 3774 nz += locrowlens[i]; 3775 } 3776 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3777 mycols = ibuf; 3778 /* receive message of column indices*/ 3779 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3780 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3781 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3782 } 3783 3784 /* loop over local rows, determining number of off diagonal entries */ 3785 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3786 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3787 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3788 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3789 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3790 rowcount = 0; nzcount = 0; 3791 for (i=0; i<mbs; i++) { 3792 dcount = 0; 3793 odcount = 0; 3794 for (j=0; j<bs; j++) { 3795 kmax = locrowlens[rowcount]; 3796 for (k=0; k<kmax; k++) { 3797 tmp = mycols[nzcount++]/bs; 3798 if (!mask[tmp]) { 3799 mask[tmp] = 1; 3800 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3801 else masked1[dcount++] = tmp; 3802 } 3803 } 3804 rowcount++; 3805 } 3806 3807 dlens[i] = dcount; 3808 odlens[i] = odcount; 3809 3810 /* zero out the mask elements we set */ 3811 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3812 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3813 } 3814 3815 3816 if (!sizesset) { 3817 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3818 } 3819 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3820 3821 if (!rank) { 3822 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3823 /* read in my part of the matrix numerical values */ 3824 nz = procsnz[0]; 3825 vals = buf; 3826 mycols = ibuf; 3827 if (size == 1) nz -= extra_rows; 3828 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3829 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3830 3831 /* insert into matrix */ 3832 jj = rstart*bs; 3833 for (i=0; i<m; i++) { 3834 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3835 mycols += locrowlens[i]; 3836 vals += locrowlens[i]; 3837 jj++; 3838 } 3839 /* read in other processors (except the last one) and ship out */ 3840 for (i=1; i<size-1; i++) { 3841 nz = procsnz[i]; 3842 vals = buf; 3843 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3844 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3845 } 3846 /* the last proc */ 3847 if (size != 1){ 3848 nz = procsnz[i] - extra_rows; 3849 vals = buf; 3850 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3851 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3852 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3853 } 3854 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3855 } else { 3856 /* receive numeric values */ 3857 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3858 3859 /* receive message of values*/ 3860 vals = buf; 3861 mycols = ibuf; 3862 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3863 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3864 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3865 3866 /* insert into matrix */ 3867 jj = rstart*bs; 3868 for (i=0; i<m; i++) { 3869 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3870 mycols += locrowlens[i]; 3871 vals += locrowlens[i]; 3872 jj++; 3873 } 3874 } 3875 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3876 ierr = PetscFree(buf);CHKERRQ(ierr); 3877 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3878 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3879 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3880 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3881 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3882 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3883 3884 PetscFunctionReturn(0); 3885 } 3886 3887 #undef __FUNCT__ 3888 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3889 /*@ 3890 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3891 3892 Input Parameters: 3893 . mat - the matrix 3894 . fact - factor 3895 3896 Not Collective, each process can use a different factor 3897 3898 Level: advanced 3899 3900 Notes: 3901 This can also be set by the command line option: -mat_use_hash_table <fact> 3902 3903 .keywords: matrix, hashtable, factor, HT 3904 3905 .seealso: MatSetOption() 3906 @*/ 3907 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3908 { 3909 PetscErrorCode ierr; 3910 3911 PetscFunctionBegin; 3912 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3913 PetscFunctionReturn(0); 3914 } 3915 3916 EXTERN_C_BEGIN 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3919 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3920 { 3921 Mat_MPIBAIJ *baij; 3922 3923 PetscFunctionBegin; 3924 baij = (Mat_MPIBAIJ*)mat->data; 3925 baij->ht_fact = fact; 3926 PetscFunctionReturn(0); 3927 } 3928 EXTERN_C_END 3929 3930 #undef __FUNCT__ 3931 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3932 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3933 { 3934 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3935 PetscFunctionBegin; 3936 *Ad = a->A; 3937 *Ao = a->B; 3938 *colmap = a->garray; 3939 PetscFunctionReturn(0); 3940 } 3941 3942 /* 3943 Special version for direct calls from Fortran (to eliminate two function call overheads 3944 */ 3945 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3946 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3947 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3948 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3949 #endif 3950 3951 #undef __FUNCT__ 3952 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3953 /*@C 3954 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3955 3956 Collective on Mat 3957 3958 Input Parameters: 3959 + mat - the matrix 3960 . min - number of input rows 3961 . im - input rows 3962 . nin - number of input columns 3963 . in - input columns 3964 . v - numerical values input 3965 - addvin - INSERT_VALUES or ADD_VALUES 3966 3967 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3968 3969 Level: advanced 3970 3971 .seealso: MatSetValuesBlocked() 3972 @*/ 3973 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3974 { 3975 /* convert input arguments to C version */ 3976 Mat mat = *matin; 3977 PetscInt m = *min, n = *nin; 3978 InsertMode addv = *addvin; 3979 3980 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3981 const MatScalar *value; 3982 MatScalar *barray=baij->barray; 3983 PetscBool roworiented = baij->roworiented; 3984 PetscErrorCode ierr; 3985 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3986 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3987 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3988 3989 PetscFunctionBegin; 3990 /* tasks normally handled by MatSetValuesBlocked() */ 3991 if (mat->insertmode == NOT_SET_VALUES) { 3992 mat->insertmode = addv; 3993 } 3994 #if defined(PETSC_USE_DEBUG) 3995 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3996 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3997 #endif 3998 if (mat->assembled) { 3999 mat->was_assembled = PETSC_TRUE; 4000 mat->assembled = PETSC_FALSE; 4001 } 4002 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4003 4004 4005 if(!barray) { 4006 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4007 baij->barray = barray; 4008 } 4009 4010 if (roworiented) { 4011 stepval = (n-1)*bs; 4012 } else { 4013 stepval = (m-1)*bs; 4014 } 4015 for (i=0; i<m; i++) { 4016 if (im[i] < 0) continue; 4017 #if defined(PETSC_USE_DEBUG) 4018 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4019 #endif 4020 if (im[i] >= rstart && im[i] < rend) { 4021 row = im[i] - rstart; 4022 for (j=0; j<n; j++) { 4023 /* If NumCol = 1 then a copy is not required */ 4024 if ((roworiented) && (n == 1)) { 4025 barray = (MatScalar*)v + i*bs2; 4026 } else if((!roworiented) && (m == 1)) { 4027 barray = (MatScalar*)v + j*bs2; 4028 } else { /* Here a copy is required */ 4029 if (roworiented) { 4030 value = v + i*(stepval+bs)*bs + j*bs; 4031 } else { 4032 value = v + j*(stepval+bs)*bs + i*bs; 4033 } 4034 for (ii=0; ii<bs; ii++,value+=stepval) { 4035 for (jj=0; jj<bs; jj++) { 4036 *barray++ = *value++; 4037 } 4038 } 4039 barray -=bs2; 4040 } 4041 4042 if (in[j] >= cstart && in[j] < cend){ 4043 col = in[j] - cstart; 4044 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4045 } 4046 else if (in[j] < 0) continue; 4047 #if defined(PETSC_USE_DEBUG) 4048 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4049 #endif 4050 else { 4051 if (mat->was_assembled) { 4052 if (!baij->colmap) { 4053 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4054 } 4055 4056 #if defined(PETSC_USE_DEBUG) 4057 #if defined (PETSC_USE_CTABLE) 4058 { PetscInt data; 4059 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4060 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4061 } 4062 #else 4063 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4064 #endif 4065 #endif 4066 #if defined (PETSC_USE_CTABLE) 4067 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4068 col = (col - 1)/bs; 4069 #else 4070 col = (baij->colmap[in[j]] - 1)/bs; 4071 #endif 4072 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4073 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4074 col = in[j]; 4075 } 4076 } 4077 else col = in[j]; 4078 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4079 } 4080 } 4081 } else { 4082 if (!baij->donotstash) { 4083 if (roworiented) { 4084 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4085 } else { 4086 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4087 } 4088 } 4089 } 4090 } 4091 4092 /* task normally handled by MatSetValuesBlocked() */ 4093 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4094 PetscFunctionReturn(0); 4095 } 4096 4097 #undef __FUNCT__ 4098 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4099 /*@ 4100 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4101 CSR format the local rows. 4102 4103 Collective on MPI_Comm 4104 4105 Input Parameters: 4106 + comm - MPI communicator 4107 . bs - the block size, only a block size of 1 is supported 4108 . m - number of local rows (Cannot be PETSC_DECIDE) 4109 . n - This value should be the same as the local size used in creating the 4110 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4111 calculated if N is given) For square matrices n is almost always m. 4112 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4113 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4114 . i - row indices 4115 . j - column indices 4116 - a - matrix values 4117 4118 Output Parameter: 4119 . mat - the matrix 4120 4121 Level: intermediate 4122 4123 Notes: 4124 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4125 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4126 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4127 4128 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4129 4130 .keywords: matrix, aij, compressed row, sparse, parallel 4131 4132 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4133 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4134 @*/ 4135 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4136 { 4137 PetscErrorCode ierr; 4138 4139 4140 PetscFunctionBegin; 4141 if (i[0]) { 4142 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4143 } 4144 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4145 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4146 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4147 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4148 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4149 PetscFunctionReturn(0); 4150 } 4151