1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1060 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(&A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1074 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1075 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1078 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1079 int fd; 1080 PetscScalar *column_values; 1081 FILE *file; 1082 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1083 PetscInt message_count,flowcontrolcount; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 for (i=0; i<a->mbs; i++) { 1109 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1110 } 1111 /* store the row lengths to the file */ 1112 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1126 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1127 for (k=0; k<rlen; k++) { 1128 for (j=0; j<bs; j++) { 1129 row_lens[k*bs+j] = bs*crow_lens[k]; 1130 } 1131 } 1132 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1133 } 1134 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1135 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1138 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1139 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1142 1143 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1144 information needed to make it for each row from a block row. This does require more communication but still not more than 1145 the communication needed for the nonzero values */ 1146 nzmax = nz; /* space a largest processor needs */ 1147 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1148 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1149 cnt = 0; 1150 for (i=0; i<a->mbs; i++) { 1151 pcnt = cnt; 1152 for (j=B->i[i]; j<B->i[i+1]; j++) { 1153 if ( (col = garray[B->j[j]]) > cstart) break; 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*col+l; 1156 } 1157 } 1158 for (k=A->i[i]; k<A->i[i+1]; k++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1161 } 1162 } 1163 for (; j<B->i[i+1]; j++) { 1164 for (l=0; l<bs; l++) { 1165 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1166 } 1167 } 1168 len = cnt - pcnt; 1169 for (k=1; k<bs; k++) { 1170 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1171 cnt += len; 1172 } 1173 } 1174 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1175 1176 /* store the columns to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1193 } 1194 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1195 1196 /* load up the numerical values */ 1197 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<a->mbs; i++) { 1200 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( garray[B->j[j]] > cstart) break; 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 for (; j<B->i[i+1]; j++) { 1219 for (l=0; l<bs; l++) { 1220 for (ll=0; ll<bs; ll++) { 1221 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1222 } 1223 } 1224 cnt += bs; 1225 } 1226 cnt += (bs-1)*rlen; 1227 } 1228 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1229 1230 /* store the column values to the file */ 1231 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1232 if (!rank) { 1233 MPI_Status status; 1234 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1237 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1238 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1245 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_values);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1251 if (file) { 1252 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatView_MPIBAIJ" 1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1260 { 1261 PetscErrorCode ierr; 1262 PetscBool iascii,isdraw,issocket,isbinary; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 if (iascii || isdraw || issocket) { 1270 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1271 } else if (isbinary) { 1272 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1282 { 1283 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 #if defined(PETSC_USE_LOG) 1288 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1289 #endif 1290 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1291 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1292 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1294 #if defined (PETSC_USE_CTABLE) 1295 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1296 #else 1297 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1300 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1301 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1302 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1306 ierr = PetscFree(baij);CHKERRQ(ierr); 1307 1308 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatMult_MPIBAIJ" 1322 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1323 { 1324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 PetscInt nt; 1327 1328 PetscFunctionBegin; 1329 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1330 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1331 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1332 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1342 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1349 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1350 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1357 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1358 { 1359 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1360 PetscErrorCode ierr; 1361 PetscBool merged; 1362 1363 PetscFunctionBegin; 1364 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1365 /* do nondiagonal part */ 1366 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1367 if (!merged) { 1368 /* send it on its way */ 1369 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1370 /* do local part */ 1371 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1372 /* receive remote parts: note this assumes the values are not actually */ 1373 /* inserted in yy until the next line */ 1374 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1375 } else { 1376 /* do local part */ 1377 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1378 /* send it on its way */ 1379 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1380 /* values actually were received in the Begin() but we need to call this nop */ 1381 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1388 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1389 { 1390 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1391 PetscErrorCode ierr; 1392 1393 PetscFunctionBegin; 1394 /* do nondiagonal part */ 1395 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1396 /* send it on its way */ 1397 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1398 /* do local part */ 1399 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1400 /* receive remote parts: note this assumes the values are not actually */ 1401 /* inserted in yy until the next line, which is true for my implementation*/ 1402 /* but is not perhaps always true. */ 1403 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 /* 1408 This only works correctly for square matrices where the subblock A->A is the 1409 diagonal block 1410 */ 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1413 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1414 { 1415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1420 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatScale_MPIBAIJ" 1426 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1427 { 1428 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1433 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1439 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1440 { 1441 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1442 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1443 PetscErrorCode ierr; 1444 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1445 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1446 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1447 1448 PetscFunctionBegin; 1449 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1450 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1451 mat->getrowactive = PETSC_TRUE; 1452 1453 if (!mat->rowvalues && (idx || v)) { 1454 /* 1455 allocate enough space to hold information from the longest row. 1456 */ 1457 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1458 PetscInt max = 1,mbs = mat->mbs,tmp; 1459 for (i=0; i<mbs; i++) { 1460 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1461 if (max < tmp) { max = tmp; } 1462 } 1463 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1464 } 1465 lrow = row - brstart; 1466 1467 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1468 if (!v) {pvA = 0; pvB = 0;} 1469 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1470 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1471 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1472 nztot = nzA + nzB; 1473 1474 cmap = mat->garray; 1475 if (v || idx) { 1476 if (nztot) { 1477 /* Sort by increasing column numbers, assuming A and B already sorted */ 1478 PetscInt imark = -1; 1479 if (v) { 1480 *v = v_p = mat->rowvalues; 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1483 else break; 1484 } 1485 imark = i; 1486 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1487 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1488 } 1489 if (idx) { 1490 *idx = idx_p = mat->rowindices; 1491 if (imark > -1) { 1492 for (i=0; i<imark; i++) { 1493 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1494 } 1495 } else { 1496 for (i=0; i<nzB; i++) { 1497 if (cmap[cworkB[i]/bs] < cstart) 1498 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1499 else break; 1500 } 1501 imark = i; 1502 } 1503 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1504 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1505 } 1506 } else { 1507 if (idx) *idx = 0; 1508 if (v) *v = 0; 1509 } 1510 } 1511 *nz = nztot; 1512 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1513 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1519 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1520 { 1521 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1522 1523 PetscFunctionBegin; 1524 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1525 baij->getrowactive = PETSC_FALSE; 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1531 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1532 { 1533 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1538 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1544 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1545 { 1546 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1547 Mat A = a->A,B = a->B; 1548 PetscErrorCode ierr; 1549 PetscReal isend[5],irecv[5]; 1550 1551 PetscFunctionBegin; 1552 info->block_size = (PetscReal)matin->rmap->bs; 1553 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1554 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1555 isend[3] = info->memory; isend[4] = info->mallocs; 1556 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1557 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1558 isend[3] += info->memory; isend[4] += info->mallocs; 1559 if (flag == MAT_LOCAL) { 1560 info->nz_used = isend[0]; 1561 info->nz_allocated = isend[1]; 1562 info->nz_unneeded = isend[2]; 1563 info->memory = isend[3]; 1564 info->mallocs = isend[4]; 1565 } else if (flag == MAT_GLOBAL_MAX) { 1566 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1567 info->nz_used = irecv[0]; 1568 info->nz_allocated = irecv[1]; 1569 info->nz_unneeded = irecv[2]; 1570 info->memory = irecv[3]; 1571 info->mallocs = irecv[4]; 1572 } else if (flag == MAT_GLOBAL_SUM) { 1573 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1574 info->nz_used = irecv[0]; 1575 info->nz_allocated = irecv[1]; 1576 info->nz_unneeded = irecv[2]; 1577 info->memory = irecv[3]; 1578 info->mallocs = irecv[4]; 1579 } else { 1580 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1581 } 1582 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1583 info->fill_ratio_needed = 0; 1584 info->factor_mallocs = 0; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1590 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1591 { 1592 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 switch (op) { 1597 case MAT_NEW_NONZERO_LOCATIONS: 1598 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1599 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1600 case MAT_KEEP_NONZERO_PATTERN: 1601 case MAT_NEW_NONZERO_LOCATION_ERR: 1602 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1603 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1604 break; 1605 case MAT_ROW_ORIENTED: 1606 a->roworiented = flg; 1607 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1608 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1609 break; 1610 case MAT_NEW_DIAGONALS: 1611 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1612 break; 1613 case MAT_IGNORE_OFF_PROC_ENTRIES: 1614 a->donotstash = flg; 1615 break; 1616 case MAT_USE_HASH_TABLE: 1617 a->ht_flag = flg; 1618 break; 1619 case MAT_SYMMETRIC: 1620 case MAT_STRUCTURALLY_SYMMETRIC: 1621 case MAT_HERMITIAN: 1622 case MAT_SYMMETRY_ETERNAL: 1623 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1624 break; 1625 default: 1626 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1633 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1634 { 1635 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1636 Mat_SeqBAIJ *Aloc; 1637 Mat B; 1638 PetscErrorCode ierr; 1639 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1640 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1641 MatScalar *a; 1642 1643 PetscFunctionBegin; 1644 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1645 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1646 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1647 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1648 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1649 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1650 } else { 1651 B = *matout; 1652 } 1653 1654 /* copy over the A part */ 1655 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1656 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1657 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1658 1659 for (i=0; i<mbs; i++) { 1660 rvals[0] = bs*(baij->rstartbs + i); 1661 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1662 for (j=ai[i]; j<ai[i+1]; j++) { 1663 col = (baij->cstartbs+aj[j])*bs; 1664 for (k=0; k<bs; k++) { 1665 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1666 col++; a += bs; 1667 } 1668 } 1669 } 1670 /* copy over the B part */ 1671 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1672 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1673 for (i=0; i<mbs; i++) { 1674 rvals[0] = bs*(baij->rstartbs + i); 1675 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1676 for (j=ai[i]; j<ai[i+1]; j++) { 1677 col = baij->garray[aj[j]]*bs; 1678 for (k=0; k<bs; k++) { 1679 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1680 col++; a += bs; 1681 } 1682 } 1683 } 1684 ierr = PetscFree(rvals);CHKERRQ(ierr); 1685 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 1688 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1689 *matout = B; 1690 } else { 1691 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1692 } 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1698 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1699 { 1700 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1701 Mat a = baij->A,b = baij->B; 1702 PetscErrorCode ierr; 1703 PetscInt s1,s2,s3; 1704 1705 PetscFunctionBegin; 1706 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1707 if (rr) { 1708 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1709 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1710 /* Overlap communication with computation. */ 1711 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1712 } 1713 if (ll) { 1714 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1715 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1716 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1717 } 1718 /* scale the diagonal block */ 1719 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1720 1721 if (rr) { 1722 /* Do a scatter end and then right scale the off-diagonal block */ 1723 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1725 } 1726 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1732 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1733 { 1734 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1735 PetscErrorCode ierr; 1736 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1737 PetscInt i,*owners = A->rmap->range; 1738 PetscInt *nprocs,j,idx,nsends,row; 1739 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1740 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1741 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1742 MPI_Comm comm = ((PetscObject)A)->comm; 1743 MPI_Request *send_waits,*recv_waits; 1744 MPI_Status recv_status,*send_status; 1745 const PetscScalar *xx; 1746 PetscScalar *bb; 1747 #if defined(PETSC_DEBUG) 1748 PetscBool found = PETSC_FALSE; 1749 #endif 1750 1751 PetscFunctionBegin; 1752 /* first count number of contributors to each processor */ 1753 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1754 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1756 j = 0; 1757 for (i=0; i<N; i++) { 1758 if (lastidx > (idx = rows[i])) j = 0; 1759 lastidx = idx; 1760 for (; j<size; j++) { 1761 if (idx >= owners[j] && idx < owners[j+1]) { 1762 nprocs[2*j]++; 1763 nprocs[2*j+1] = 1; 1764 owner[i] = j; 1765 #if defined(PETSC_DEBUG) 1766 found = PETSC_TRUE; 1767 #endif 1768 break; 1769 } 1770 } 1771 #if defined(PETSC_DEBUG) 1772 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1773 found = PETSC_FALSE; 1774 #endif 1775 } 1776 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1777 1778 if (A->nooffproczerorows) { 1779 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1780 nrecvs = nsends; 1781 nmax = N; 1782 } else { 1783 /* inform other processors of number of messages and max length*/ 1784 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1785 } 1786 1787 /* post receives: */ 1788 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1789 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1790 for (i=0; i<nrecvs; i++) { 1791 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1792 } 1793 1794 /* do sends: 1795 1) starts[i] gives the starting index in svalues for stuff going to 1796 the ith processor 1797 */ 1798 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1799 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1800 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1801 starts[0] = 0; 1802 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1803 for (i=0; i<N; i++) { 1804 svalues[starts[owner[i]]++] = rows[i]; 1805 } 1806 1807 starts[0] = 0; 1808 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1809 count = 0; 1810 for (i=0; i<size; i++) { 1811 if (nprocs[2*i+1]) { 1812 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1813 } 1814 } 1815 ierr = PetscFree(starts);CHKERRQ(ierr); 1816 1817 base = owners[rank]; 1818 1819 /* wait on receives */ 1820 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1821 count = nrecvs; 1822 slen = 0; 1823 while (count) { 1824 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1825 /* unpack receives into our local space */ 1826 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1827 source[imdex] = recv_status.MPI_SOURCE; 1828 lens[imdex] = n; 1829 slen += n; 1830 count--; 1831 } 1832 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1833 1834 /* move the data into the send scatter */ 1835 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1836 count = 0; 1837 for (i=0; i<nrecvs; i++) { 1838 values = rvalues + i*nmax; 1839 for (j=0; j<lens[i]; j++) { 1840 lrows[count++] = values[j] - base; 1841 } 1842 } 1843 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1844 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1845 ierr = PetscFree(owner);CHKERRQ(ierr); 1846 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1847 1848 /* fix right hand side if needed */ 1849 if (x && b) { 1850 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1851 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1852 for (i=0; i<slen; i++) { 1853 bb[lrows[i]] = diag*xx[lrows[i]]; 1854 } 1855 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1856 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1857 } 1858 1859 /* actually zap the local rows */ 1860 /* 1861 Zero the required rows. If the "diagonal block" of the matrix 1862 is square and the user wishes to set the diagonal we use separate 1863 code so that MatSetValues() is not called for each diagonal allocating 1864 new memory, thus calling lots of mallocs and slowing things down. 1865 1866 */ 1867 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1868 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1869 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1870 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1871 } else if (diag != 0.0) { 1872 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1873 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1874 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1875 for (i=0; i<slen; i++) { 1876 row = lrows[i] + rstart_bs; 1877 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1878 } 1879 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 } else { 1882 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1883 } 1884 1885 ierr = PetscFree(lrows);CHKERRQ(ierr); 1886 1887 /* wait on sends */ 1888 if (nsends) { 1889 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1890 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1891 ierr = PetscFree(send_status);CHKERRQ(ierr); 1892 } 1893 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1894 ierr = PetscFree(svalues);CHKERRQ(ierr); 1895 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1901 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1902 { 1903 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatEqual_MPIBAIJ" 1915 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1916 { 1917 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1918 Mat a,b,c,d; 1919 PetscBool flg; 1920 PetscErrorCode ierr; 1921 1922 PetscFunctionBegin; 1923 a = matA->A; b = matA->B; 1924 c = matB->A; d = matB->B; 1925 1926 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1927 if (flg) { 1928 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1929 } 1930 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatCopy_MPIBAIJ" 1936 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1937 { 1938 PetscErrorCode ierr; 1939 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1940 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1941 1942 PetscFunctionBegin; 1943 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1944 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1945 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1946 } else { 1947 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1948 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1949 } 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1955 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1956 { 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1966 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1967 { 1968 PetscErrorCode ierr; 1969 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1970 PetscBLASInt bnz,one=1; 1971 Mat_SeqBAIJ *x,*y; 1972 1973 PetscFunctionBegin; 1974 if (str == SAME_NONZERO_PATTERN) { 1975 PetscScalar alpha = a; 1976 x = (Mat_SeqBAIJ *)xx->A->data; 1977 y = (Mat_SeqBAIJ *)yy->A->data; 1978 bnz = PetscBLASIntCast(x->nz); 1979 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1980 x = (Mat_SeqBAIJ *)xx->B->data; 1981 y = (Mat_SeqBAIJ *)yy->B->data; 1982 bnz = PetscBLASIntCast(x->nz); 1983 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1984 } else { 1985 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1992 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1993 { 1994 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1995 PetscInt rbs,cbs; 1996 PetscErrorCode ierr; 1997 1998 PetscFunctionBegin; 1999 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2000 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2001 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2002 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2003 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2004 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2010 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2011 { 2012 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2013 PetscErrorCode ierr; 2014 2015 PetscFunctionBegin; 2016 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2017 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2023 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2024 { 2025 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2030 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2036 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2037 { 2038 PetscErrorCode ierr; 2039 IS iscol_local; 2040 PetscInt csize; 2041 2042 PetscFunctionBegin; 2043 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2044 if (call == MAT_REUSE_MATRIX) { 2045 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2046 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2047 } else { 2048 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2049 } 2050 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2051 if (call == MAT_INITIAL_MATRIX) { 2052 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2053 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2060 /* 2061 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2062 in local and then by concatenating the local matrices the end result. 2063 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2064 */ 2065 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2066 { 2067 PetscErrorCode ierr; 2068 PetscMPIInt rank,size; 2069 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2070 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2071 Mat *local,M,Mreuse; 2072 MatScalar *vwork,*aa; 2073 MPI_Comm comm = ((PetscObject)mat)->comm; 2074 Mat_SeqBAIJ *aij; 2075 2076 2077 PetscFunctionBegin; 2078 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2080 2081 if (call == MAT_REUSE_MATRIX) { 2082 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2083 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2084 local = &Mreuse; 2085 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2086 } else { 2087 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2088 Mreuse = *local; 2089 ierr = PetscFree(local);CHKERRQ(ierr); 2090 } 2091 2092 /* 2093 m - number of local rows 2094 n - number of columns (same on all processors) 2095 rstart - first row in new global matrix generated 2096 */ 2097 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2098 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2099 m = m/bs; 2100 n = n/bs; 2101 2102 if (call == MAT_INITIAL_MATRIX) { 2103 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2104 ii = aij->i; 2105 jj = aij->j; 2106 2107 /* 2108 Determine the number of non-zeros in the diagonal and off-diagonal 2109 portions of the matrix in order to do correct preallocation 2110 */ 2111 2112 /* first get start and end of "diagonal" columns */ 2113 if (csize == PETSC_DECIDE) { 2114 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2115 if (mglobal == n*bs) { /* square matrix */ 2116 nlocal = m; 2117 } else { 2118 nlocal = n/size + ((n % size) > rank); 2119 } 2120 } else { 2121 nlocal = csize/bs; 2122 } 2123 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2124 rstart = rend - nlocal; 2125 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2126 2127 /* next, compute all the lengths */ 2128 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2129 olens = dlens + m; 2130 for (i=0; i<m; i++) { 2131 jend = ii[i+1] - ii[i]; 2132 olen = 0; 2133 dlen = 0; 2134 for (j=0; j<jend; j++) { 2135 if (*jj < rstart || *jj >= rend) olen++; 2136 else dlen++; 2137 jj++; 2138 } 2139 olens[i] = olen; 2140 dlens[i] = dlen; 2141 } 2142 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2143 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2144 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2145 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2146 ierr = PetscFree(dlens);CHKERRQ(ierr); 2147 } else { 2148 PetscInt ml,nl; 2149 2150 M = *newmat; 2151 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2152 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2153 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2154 /* 2155 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2156 rather than the slower MatSetValues(). 2157 */ 2158 M->was_assembled = PETSC_TRUE; 2159 M->assembled = PETSC_FALSE; 2160 } 2161 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2162 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2163 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2164 ii = aij->i; 2165 jj = aij->j; 2166 aa = aij->a; 2167 for (i=0; i<m; i++) { 2168 row = rstart/bs + i; 2169 nz = ii[i+1] - ii[i]; 2170 cwork = jj; jj += nz; 2171 vwork = aa; aa += nz; 2172 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2173 } 2174 2175 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2176 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 *newmat = M; 2178 2179 /* save submatrix used in processor for next request */ 2180 if (call == MAT_INITIAL_MATRIX) { 2181 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2182 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2183 } 2184 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatPermute_MPIBAIJ" 2190 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2191 { 2192 MPI_Comm comm,pcomm; 2193 PetscInt first,local_size,nrows; 2194 const PetscInt *rows; 2195 PetscMPIInt size; 2196 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2201 /* make a collective version of 'rowp' */ 2202 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2203 if (pcomm==comm) { 2204 crowp = rowp; 2205 } else { 2206 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2207 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2208 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2209 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2210 } 2211 /* collect the global row permutation and invert it */ 2212 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2213 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2214 if (pcomm!=comm) { 2215 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2216 } 2217 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2218 /* get the local target indices */ 2219 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2220 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2222 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2223 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2224 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2225 /* the column permutation is so much easier; 2226 make a local version of 'colp' and invert it */ 2227 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2228 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2229 if (size==1) { 2230 lcolp = colp; 2231 } else { 2232 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2233 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2234 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2235 } 2236 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2237 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2238 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2239 if (size>1) { 2240 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2241 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2242 } 2243 /* now we just get the submatrix */ 2244 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2245 /* clean up */ 2246 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2247 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2253 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2254 { 2255 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2256 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2257 2258 PetscFunctionBegin; 2259 if (nghosts) { *nghosts = B->nbs;} 2260 if (ghosts) {*ghosts = baij->garray;} 2261 PetscFunctionReturn(0); 2262 } 2263 2264 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2268 /* 2269 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2270 */ 2271 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2272 { 2273 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2274 PetscErrorCode ierr; 2275 PetscMPIInt size,*ncolsonproc,*disp,nn; 2276 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2277 const PetscInt *is; 2278 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2279 PetscInt *rowhit,M,cstart,cend,colb; 2280 PetscInt *columnsforrow,l; 2281 IS *isa; 2282 PetscBool done,flg; 2283 ISLocalToGlobalMapping map = mat->cbmapping; 2284 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2285 2286 PetscFunctionBegin; 2287 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2288 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2289 2290 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2291 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2292 M = mat->rmap->n/bs; 2293 cstart = mat->cmap->rstart/bs; 2294 cend = mat->cmap->rend/bs; 2295 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2296 c->N = mat->cmap->N/bs; 2297 c->m = mat->rmap->n/bs; 2298 c->rstart = mat->rmap->rstart/bs; 2299 2300 c->ncolors = nis; 2301 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2302 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2306 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2307 2308 /* Allow access to data structures of local part of matrix */ 2309 if (!baij->colmap) { 2310 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2311 } 2312 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2313 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2314 2315 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2316 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2317 2318 for (i=0; i<nis; i++) { 2319 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2320 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2321 c->ncolumns[i] = n; 2322 if (n) { 2323 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2324 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2325 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 } else { 2327 c->columns[i] = 0; 2328 } 2329 2330 if (ctype == IS_COLORING_GLOBAL){ 2331 /* Determine the total (parallel) number of columns of this color */ 2332 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2333 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2334 2335 nn = PetscMPIIntCast(n); 2336 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2337 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2338 if (!nctot) { 2339 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2340 } 2341 2342 disp[0] = 0; 2343 for (j=1; j<size; j++) { 2344 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2345 } 2346 2347 /* Get complete list of columns for color on each processor */ 2348 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2349 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2350 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2351 } else if (ctype == IS_COLORING_GHOSTED){ 2352 /* Determine local number of columns of this color on this process, including ghost points */ 2353 nctot = n; 2354 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2355 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2356 } else { 2357 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2358 } 2359 2360 /* 2361 Mark all rows affect by these columns 2362 */ 2363 /* Temporary option to allow for debugging/testing */ 2364 flg = PETSC_FALSE; 2365 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2366 if (!flg) {/*-----------------------------------------------------------------------------*/ 2367 /* crude, fast version */ 2368 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2369 /* loop over columns*/ 2370 for (j=0; j<nctot; j++) { 2371 if (ctype == IS_COLORING_GHOSTED) { 2372 col = ltog[cols[j]]; 2373 } else { 2374 col = cols[j]; 2375 } 2376 if (col >= cstart && col < cend) { 2377 /* column is in diagonal block of matrix */ 2378 rows = A_cj + A_ci[col-cstart]; 2379 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2380 } else { 2381 #if defined (PETSC_USE_CTABLE) 2382 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2383 colb --; 2384 #else 2385 colb = baij->colmap[col] - 1; 2386 #endif 2387 if (colb == -1) { 2388 m = 0; 2389 } else { 2390 colb = colb/bs; 2391 rows = B_cj + B_ci[colb]; 2392 m = B_ci[colb+1] - B_ci[colb]; 2393 } 2394 } 2395 /* loop over columns marking them in rowhit */ 2396 for (k=0; k<m; k++) { 2397 rowhit[*rows++] = col + 1; 2398 } 2399 } 2400 2401 /* count the number of hits */ 2402 nrows = 0; 2403 for (j=0; j<M; j++) { 2404 if (rowhit[j]) nrows++; 2405 } 2406 c->nrows[i] = nrows; 2407 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2408 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2409 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2410 nrows = 0; 2411 for (j=0; j<M; j++) { 2412 if (rowhit[j]) { 2413 c->rows[i][nrows] = j; 2414 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2415 nrows++; 2416 } 2417 } 2418 } else {/*-------------------------------------------------------------------------------*/ 2419 /* slow version, using rowhit as a linked list */ 2420 PetscInt currentcol,fm,mfm; 2421 rowhit[M] = M; 2422 nrows = 0; 2423 /* loop over columns*/ 2424 for (j=0; j<nctot; j++) { 2425 if (ctype == IS_COLORING_GHOSTED) { 2426 col = ltog[cols[j]]; 2427 } else { 2428 col = cols[j]; 2429 } 2430 if (col >= cstart && col < cend) { 2431 /* column is in diagonal block of matrix */ 2432 rows = A_cj + A_ci[col-cstart]; 2433 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2434 } else { 2435 #if defined (PETSC_USE_CTABLE) 2436 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2437 colb --; 2438 #else 2439 colb = baij->colmap[col] - 1; 2440 #endif 2441 if (colb == -1) { 2442 m = 0; 2443 } else { 2444 colb = colb/bs; 2445 rows = B_cj + B_ci[colb]; 2446 m = B_ci[colb+1] - B_ci[colb]; 2447 } 2448 } 2449 2450 /* loop over columns marking them in rowhit */ 2451 fm = M; /* fm points to first entry in linked list */ 2452 for (k=0; k<m; k++) { 2453 currentcol = *rows++; 2454 /* is it already in the list? */ 2455 do { 2456 mfm = fm; 2457 fm = rowhit[fm]; 2458 } while (fm < currentcol); 2459 /* not in list so add it */ 2460 if (fm != currentcol) { 2461 nrows++; 2462 columnsforrow[currentcol] = col; 2463 /* next three lines insert new entry into linked list */ 2464 rowhit[mfm] = currentcol; 2465 rowhit[currentcol] = fm; 2466 fm = currentcol; 2467 /* fm points to present position in list since we know the columns are sorted */ 2468 } else { 2469 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2470 } 2471 } 2472 } 2473 c->nrows[i] = nrows; 2474 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2475 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2476 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2477 /* now store the linked list of rows into c->rows[i] */ 2478 nrows = 0; 2479 fm = rowhit[M]; 2480 do { 2481 c->rows[i][nrows] = fm; 2482 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2483 fm = rowhit[fm]; 2484 } while (fm < M); 2485 } /* ---------------------------------------------------------------------------------------*/ 2486 ierr = PetscFree(cols);CHKERRQ(ierr); 2487 } 2488 2489 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2490 /* 2491 vscale will contain the "diagonal" on processor scalings followed by the off processor 2492 */ 2493 if (ctype == IS_COLORING_GLOBAL) { 2494 PetscInt *garray; 2495 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2496 for (i=0; i<baij->B->cmap->n/bs; i++) { 2497 for (j=0; j<bs; j++) { 2498 garray[i*bs+j] = bs*baij->garray[i]+j; 2499 } 2500 } 2501 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2502 ierr = PetscFree(garray);CHKERRQ(ierr); 2503 CHKMEMQ; 2504 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2505 for (k=0; k<c->ncolors; k++) { 2506 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2507 for (l=0; l<c->nrows[k]; l++) { 2508 col = c->columnsforrow[k][l]; 2509 if (col >= cstart && col < cend) { 2510 /* column is in diagonal block of matrix */ 2511 colb = col - cstart; 2512 } else { 2513 /* column is in "off-processor" part */ 2514 #if defined (PETSC_USE_CTABLE) 2515 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2516 colb --; 2517 #else 2518 colb = baij->colmap[col] - 1; 2519 #endif 2520 colb = colb/bs; 2521 colb += cend - cstart; 2522 } 2523 c->vscaleforrow[k][l] = colb; 2524 } 2525 } 2526 } else if (ctype == IS_COLORING_GHOSTED) { 2527 /* Get gtol mapping */ 2528 PetscInt N = mat->cmap->N, *gtol; 2529 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2530 for (i=0; i<N; i++) gtol[i] = -1; 2531 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2532 2533 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2534 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2535 for (k=0; k<c->ncolors; k++) { 2536 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2537 for (l=0; l<c->nrows[k]; l++) { 2538 col = c->columnsforrow[k][l]; /* global column index */ 2539 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2540 } 2541 } 2542 ierr = PetscFree(gtol);CHKERRQ(ierr); 2543 } 2544 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2545 2546 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2547 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2548 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2549 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2550 CHKMEMQ; 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2556 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2557 { 2558 Mat B; 2559 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2560 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2561 Mat_SeqAIJ *b; 2562 PetscErrorCode ierr; 2563 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2564 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2565 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2566 2567 PetscFunctionBegin; 2568 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2569 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2570 2571 /* ---------------------------------------------------------------- 2572 Tell every processor the number of nonzeros per row 2573 */ 2574 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2575 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2576 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2577 } 2578 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2579 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2580 displs = recvcounts + size; 2581 for (i=0; i<size; i++) { 2582 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2583 displs[i] = A->rmap->range[i]/bs; 2584 } 2585 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2586 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2587 #else 2588 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2589 #endif 2590 /* --------------------------------------------------------------- 2591 Create the sequential matrix of the same type as the local block diagonal 2592 */ 2593 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2594 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2595 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2596 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2597 b = (Mat_SeqAIJ *)B->data; 2598 2599 /*-------------------------------------------------------------------- 2600 Copy my part of matrix column indices over 2601 */ 2602 sendcount = ad->nz + bd->nz; 2603 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2604 a_jsendbuf = ad->j; 2605 b_jsendbuf = bd->j; 2606 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2607 cnt = 0; 2608 for (i=0; i<n; i++) { 2609 2610 /* put in lower diagonal portion */ 2611 m = bd->i[i+1] - bd->i[i]; 2612 while (m > 0) { 2613 /* is it above diagonal (in bd (compressed) numbering) */ 2614 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2615 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2616 m--; 2617 } 2618 2619 /* put in diagonal portion */ 2620 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2621 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2622 } 2623 2624 /* put in upper diagonal portion */ 2625 while (m-- > 0) { 2626 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2627 } 2628 } 2629 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2630 2631 /*-------------------------------------------------------------------- 2632 Gather all column indices to all processors 2633 */ 2634 for (i=0; i<size; i++) { 2635 recvcounts[i] = 0; 2636 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2637 recvcounts[i] += lens[j]; 2638 } 2639 } 2640 displs[0] = 0; 2641 for (i=1; i<size; i++) { 2642 displs[i] = displs[i-1] + recvcounts[i-1]; 2643 } 2644 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2645 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2646 #else 2647 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2648 #endif 2649 /*-------------------------------------------------------------------- 2650 Assemble the matrix into useable form (note numerical values not yet set) 2651 */ 2652 /* set the b->ilen (length of each row) values */ 2653 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2654 /* set the b->i indices */ 2655 b->i[0] = 0; 2656 for (i=1; i<=A->rmap->N/bs; i++) { 2657 b->i[i] = b->i[i-1] + lens[i-1]; 2658 } 2659 ierr = PetscFree(lens);CHKERRQ(ierr); 2660 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2663 2664 if (A->symmetric){ 2665 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2666 } else if (A->hermitian) { 2667 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2668 } else if (A->structurally_symmetric) { 2669 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2670 } 2671 *newmat = B; 2672 PetscFunctionReturn(0); 2673 } 2674 2675 #undef __FUNCT__ 2676 #define __FUNCT__ "MatSOR_MPIBAIJ" 2677 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2678 { 2679 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2680 PetscErrorCode ierr; 2681 Vec bb1 = 0; 2682 2683 PetscFunctionBegin; 2684 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2685 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2686 } 2687 2688 if (flag == SOR_APPLY_UPPER) { 2689 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2694 if (flag & SOR_ZERO_INITIAL_GUESS) { 2695 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2696 its--; 2697 } 2698 2699 while (its--) { 2700 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2701 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 2703 /* update rhs: bb1 = bb - B*x */ 2704 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2705 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2706 2707 /* local sweep */ 2708 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2709 } 2710 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2711 if (flag & SOR_ZERO_INITIAL_GUESS) { 2712 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2713 its--; 2714 } 2715 while (its--) { 2716 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 2719 /* update rhs: bb1 = bb - B*x */ 2720 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2721 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2722 2723 /* local sweep */ 2724 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2725 } 2726 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2727 if (flag & SOR_ZERO_INITIAL_GUESS) { 2728 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2729 its--; 2730 } 2731 while (its--) { 2732 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2733 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2734 2735 /* update rhs: bb1 = bb - B*x */ 2736 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2737 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2738 2739 /* local sweep */ 2740 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2741 } 2742 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2743 2744 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2749 2750 2751 /* -------------------------------------------------------------------*/ 2752 static struct _MatOps MatOps_Values = { 2753 MatSetValues_MPIBAIJ, 2754 MatGetRow_MPIBAIJ, 2755 MatRestoreRow_MPIBAIJ, 2756 MatMult_MPIBAIJ, 2757 /* 4*/ MatMultAdd_MPIBAIJ, 2758 MatMultTranspose_MPIBAIJ, 2759 MatMultTransposeAdd_MPIBAIJ, 2760 0, 2761 0, 2762 0, 2763 /*10*/ 0, 2764 0, 2765 0, 2766 MatSOR_MPIBAIJ, 2767 MatTranspose_MPIBAIJ, 2768 /*15*/ MatGetInfo_MPIBAIJ, 2769 MatEqual_MPIBAIJ, 2770 MatGetDiagonal_MPIBAIJ, 2771 MatDiagonalScale_MPIBAIJ, 2772 MatNorm_MPIBAIJ, 2773 /*20*/ MatAssemblyBegin_MPIBAIJ, 2774 MatAssemblyEnd_MPIBAIJ, 2775 MatSetOption_MPIBAIJ, 2776 MatZeroEntries_MPIBAIJ, 2777 /*24*/ MatZeroRows_MPIBAIJ, 2778 0, 2779 0, 2780 0, 2781 0, 2782 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2783 0, 2784 0, 2785 0, 2786 0, 2787 /*34*/ MatDuplicate_MPIBAIJ, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*39*/ MatAXPY_MPIBAIJ, 2793 MatGetSubMatrices_MPIBAIJ, 2794 MatIncreaseOverlap_MPIBAIJ, 2795 MatGetValues_MPIBAIJ, 2796 MatCopy_MPIBAIJ, 2797 /*44*/ 0, 2798 MatScale_MPIBAIJ, 2799 0, 2800 0, 2801 0, 2802 /*49*/ MatSetBlockSize_MPIBAIJ, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*54*/ MatFDColoringCreate_MPIBAIJ, 2808 0, 2809 MatSetUnfactored_MPIBAIJ, 2810 MatPermute_MPIBAIJ, 2811 MatSetValuesBlocked_MPIBAIJ, 2812 /*59*/ MatGetSubMatrix_MPIBAIJ, 2813 MatDestroy_MPIBAIJ, 2814 MatView_MPIBAIJ, 2815 0, 2816 0, 2817 /*64*/ 0, 2818 0, 2819 0, 2820 0, 2821 0, 2822 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2823 0, 2824 0, 2825 0, 2826 0, 2827 /*74*/ 0, 2828 MatFDColoringApply_BAIJ, 2829 0, 2830 0, 2831 0, 2832 /*79*/ 0, 2833 0, 2834 0, 2835 0, 2836 MatLoad_MPIBAIJ, 2837 /*84*/ 0, 2838 0, 2839 0, 2840 0, 2841 0, 2842 /*89*/ 0, 2843 0, 2844 0, 2845 0, 2846 0, 2847 /*94*/ 0, 2848 0, 2849 0, 2850 0, 2851 0, 2852 /*99*/ 0, 2853 0, 2854 0, 2855 0, 2856 0, 2857 /*104*/0, 2858 MatRealPart_MPIBAIJ, 2859 MatImaginaryPart_MPIBAIJ, 2860 0, 2861 0, 2862 /*109*/0, 2863 0, 2864 0, 2865 0, 2866 0, 2867 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2868 0, 2869 MatGetGhosts_MPIBAIJ, 2870 0, 2871 0, 2872 /*119*/0, 2873 0, 2874 0, 2875 0 2876 }; 2877 2878 EXTERN_C_BEGIN 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2881 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2882 { 2883 PetscFunctionBegin; 2884 *a = ((Mat_MPIBAIJ *)A->data)->A; 2885 *iscopy = PETSC_FALSE; 2886 PetscFunctionReturn(0); 2887 } 2888 EXTERN_C_END 2889 2890 EXTERN_C_BEGIN 2891 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2892 EXTERN_C_END 2893 2894 EXTERN_C_BEGIN 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2897 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2898 { 2899 PetscInt m,rstart,cstart,cend; 2900 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2901 const PetscInt *JJ=0; 2902 PetscScalar *values=0; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 2907 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2908 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2911 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2912 m = B->rmap->n/bs; 2913 rstart = B->rmap->rstart/bs; 2914 cstart = B->cmap->rstart/bs; 2915 cend = B->cmap->rend/bs; 2916 2917 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2918 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2919 for (i=0; i<m; i++) { 2920 nz = ii[i+1] - ii[i]; 2921 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2922 nz_max = PetscMax(nz_max,nz); 2923 JJ = jj + ii[i]; 2924 for (j=0; j<nz; j++) { 2925 if (*JJ >= cstart) break; 2926 JJ++; 2927 } 2928 d = 0; 2929 for (; j<nz; j++) { 2930 if (*JJ++ >= cend) break; 2931 d++; 2932 } 2933 d_nnz[i] = d; 2934 o_nnz[i] = nz - d; 2935 } 2936 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2937 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2938 2939 values = (PetscScalar*)V; 2940 if (!values) { 2941 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2942 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2943 } 2944 for (i=0; i<m; i++) { 2945 PetscInt row = i + rstart; 2946 PetscInt ncols = ii[i+1] - ii[i]; 2947 const PetscInt *icols = jj + ii[i]; 2948 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2949 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2950 } 2951 2952 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 2956 PetscFunctionReturn(0); 2957 } 2958 EXTERN_C_END 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2962 /*@C 2963 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2964 (the default parallel PETSc format). 2965 2966 Collective on MPI_Comm 2967 2968 Input Parameters: 2969 + A - the matrix 2970 . bs - the block size 2971 . i - the indices into j for the start of each local row (starts with zero) 2972 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2973 - v - optional values in the matrix 2974 2975 Level: developer 2976 2977 .keywords: matrix, aij, compressed row, sparse, parallel 2978 2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2980 @*/ 2981 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2982 { 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 EXTERN_C_BEGIN 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2993 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2994 { 2995 Mat_MPIBAIJ *b; 2996 PetscErrorCode ierr; 2997 PetscInt i, newbs = PetscAbs(bs); 2998 2999 PetscFunctionBegin; 3000 if (bs < 0) { 3001 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3002 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3003 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3004 bs = PetscAbs(bs); 3005 } 3006 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3007 bs = newbs; 3008 3009 3010 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3011 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3012 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3013 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3014 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3015 3016 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3020 3021 if (d_nnz) { 3022 for (i=0; i<B->rmap->n/bs; i++) { 3023 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3024 } 3025 } 3026 if (o_nnz) { 3027 for (i=0; i<B->rmap->n/bs; i++) { 3028 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3029 } 3030 } 3031 3032 b = (Mat_MPIBAIJ*)B->data; 3033 b->bs2 = bs*bs; 3034 b->mbs = B->rmap->n/bs; 3035 b->nbs = B->cmap->n/bs; 3036 b->Mbs = B->rmap->N/bs; 3037 b->Nbs = B->cmap->N/bs; 3038 3039 for (i=0; i<=b->size; i++) { 3040 b->rangebs[i] = B->rmap->range[i]/bs; 3041 } 3042 b->rstartbs = B->rmap->rstart/bs; 3043 b->rendbs = B->rmap->rend/bs; 3044 b->cstartbs = B->cmap->rstart/bs; 3045 b->cendbs = B->cmap->rend/bs; 3046 3047 if (!B->preallocated) { 3048 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3049 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3050 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3051 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3052 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3053 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3054 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3055 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3056 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3057 } 3058 3059 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3060 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3061 B->preallocated = PETSC_TRUE; 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 EXTERN_C_BEGIN 3067 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3068 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3069 EXTERN_C_END 3070 3071 3072 EXTERN_C_BEGIN 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3075 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3076 { 3077 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3078 PetscErrorCode ierr; 3079 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3080 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3081 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3082 3083 PetscFunctionBegin; 3084 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3085 ii[0] = 0; 3086 CHKMEMQ; 3087 for (i=0; i<M; i++) { 3088 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3089 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3090 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3091 /* remove one from count of matrix has diagonal */ 3092 for (j=id[i]; j<id[i+1]; j++) { 3093 if (jd[j] == i) {ii[i+1]--;break;} 3094 } 3095 CHKMEMQ; 3096 } 3097 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3098 cnt = 0; 3099 for (i=0; i<M; i++) { 3100 for (j=io[i]; j<io[i+1]; j++) { 3101 if (garray[jo[j]] > rstart) break; 3102 jj[cnt++] = garray[jo[j]]; 3103 CHKMEMQ; 3104 } 3105 for (k=id[i]; k<id[i+1]; k++) { 3106 if (jd[k] != i) { 3107 jj[cnt++] = rstart + jd[k]; 3108 CHKMEMQ; 3109 } 3110 } 3111 for (;j<io[i+1]; j++) { 3112 jj[cnt++] = garray[jo[j]]; 3113 CHKMEMQ; 3114 } 3115 } 3116 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 EXTERN_C_END 3120 3121 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3122 EXTERN_C_BEGIN 3123 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3124 EXTERN_C_END 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3129 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3130 { 3131 PetscErrorCode ierr; 3132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3133 Mat B; 3134 Mat_MPIAIJ *b; 3135 3136 PetscFunctionBegin; 3137 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3138 3139 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3140 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3141 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3142 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3143 b = (Mat_MPIAIJ*) B->data; 3144 3145 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3146 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3147 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3148 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3149 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3150 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 if (reuse == MAT_REUSE_MATRIX) { 3155 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3156 } else { 3157 *newmat = B; 3158 } 3159 PetscFunctionReturn(0); 3160 } 3161 EXTERN_C_END 3162 3163 EXTERN_C_BEGIN 3164 #if defined(PETSC_HAVE_MUMPS) 3165 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3166 #endif 3167 EXTERN_C_END 3168 3169 /*MC 3170 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3171 3172 Options Database Keys: 3173 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3174 . -mat_block_size <bs> - set the blocksize used to store the matrix 3175 - -mat_use_hash_table <fact> 3176 3177 Level: beginner 3178 3179 .seealso: MatCreateMPIBAIJ 3180 M*/ 3181 3182 EXTERN_C_BEGIN 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "MatCreate_MPIBAIJ" 3185 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3186 { 3187 Mat_MPIBAIJ *b; 3188 PetscErrorCode ierr; 3189 PetscBool flg; 3190 3191 PetscFunctionBegin; 3192 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3193 B->data = (void*)b; 3194 3195 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3196 B->assembled = PETSC_FALSE; 3197 3198 B->insertmode = NOT_SET_VALUES; 3199 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3200 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3201 3202 /* build local table of row and column ownerships */ 3203 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3204 3205 /* build cache for off array entries formed */ 3206 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3207 b->donotstash = PETSC_FALSE; 3208 b->colmap = PETSC_NULL; 3209 b->garray = PETSC_NULL; 3210 b->roworiented = PETSC_TRUE; 3211 3212 /* stuff used in block assembly */ 3213 b->barray = 0; 3214 3215 /* stuff used for matrix vector multiply */ 3216 b->lvec = 0; 3217 b->Mvctx = 0; 3218 3219 /* stuff for MatGetRow() */ 3220 b->rowindices = 0; 3221 b->rowvalues = 0; 3222 b->getrowactive = PETSC_FALSE; 3223 3224 /* hash table stuff */ 3225 b->ht = 0; 3226 b->hd = 0; 3227 b->ht_size = 0; 3228 b->ht_flag = PETSC_FALSE; 3229 b->ht_fact = 0; 3230 b->ht_total_ct = 0; 3231 b->ht_insert_ct = 0; 3232 3233 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3234 b->ijonly = PETSC_FALSE; 3235 3236 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3237 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3238 if (flg) { 3239 PetscReal fact = 1.39; 3240 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3241 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3242 if (fact <= 1.0) fact = 1.39; 3243 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3244 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3245 } 3246 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3247 3248 #if defined(PETSC_HAVE_MUMPS) 3249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3250 #endif 3251 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3252 "MatConvert_MPIBAIJ_MPIAdj", 3253 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3255 "MatConvert_MPIBAIJ_MPIAIJ", 3256 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3257 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3258 "MatConvert_MPIBAIJ_MPISBAIJ", 3259 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3260 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3261 "MatStoreValues_MPIBAIJ", 3262 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3264 "MatRetrieveValues_MPIBAIJ", 3265 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3267 "MatGetDiagonalBlock_MPIBAIJ", 3268 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3270 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3271 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3273 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3274 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3276 "MatDiagonalScaleLocal_MPIBAIJ", 3277 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3278 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3279 "MatSetHashTableFactor_MPIBAIJ", 3280 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3281 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3282 PetscFunctionReturn(0); 3283 } 3284 EXTERN_C_END 3285 3286 /*MC 3287 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3288 3289 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3290 and MATMPIBAIJ otherwise. 3291 3292 Options Database Keys: 3293 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3294 3295 Level: beginner 3296 3297 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3298 M*/ 3299 3300 EXTERN_C_BEGIN 3301 #undef __FUNCT__ 3302 #define __FUNCT__ "MatCreate_BAIJ" 3303 PetscErrorCode MatCreate_BAIJ(Mat A) 3304 { 3305 PetscErrorCode ierr; 3306 PetscMPIInt size; 3307 3308 PetscFunctionBegin; 3309 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3310 if (size == 1) { 3311 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3312 } else { 3313 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3314 } 3315 PetscFunctionReturn(0); 3316 } 3317 EXTERN_C_END 3318 3319 #undef __FUNCT__ 3320 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3321 /*@C 3322 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3323 (block compressed row). For good matrix assembly performance 3324 the user should preallocate the matrix storage by setting the parameters 3325 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3326 performance can be increased by more than a factor of 50. 3327 3328 Collective on Mat 3329 3330 Input Parameters: 3331 + A - the matrix 3332 . bs - size of blockk 3333 . d_nz - number of block nonzeros per block row in diagonal portion of local 3334 submatrix (same for all local rows) 3335 . d_nnz - array containing the number of block nonzeros in the various block rows 3336 of the in diagonal portion of the local (possibly different for each block 3337 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3338 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3339 submatrix (same for all local rows). 3340 - o_nnz - array containing the number of nonzeros in the various block rows of the 3341 off-diagonal portion of the local submatrix (possibly different for 3342 each block row) or PETSC_NULL. 3343 3344 If the *_nnz parameter is given then the *_nz parameter is ignored 3345 3346 Options Database Keys: 3347 + -mat_block_size - size of the blocks to use 3348 - -mat_use_hash_table <fact> 3349 3350 Notes: 3351 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3352 than it must be used on all processors that share the object for that argument. 3353 3354 Storage Information: 3355 For a square global matrix we define each processor's diagonal portion 3356 to be its local rows and the corresponding columns (a square submatrix); 3357 each processor's off-diagonal portion encompasses the remainder of the 3358 local matrix (a rectangular submatrix). 3359 3360 The user can specify preallocated storage for the diagonal part of 3361 the local submatrix with either d_nz or d_nnz (not both). Set 3362 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3363 memory allocation. Likewise, specify preallocated storage for the 3364 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3365 3366 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3367 the figure below we depict these three local rows and all columns (0-11). 3368 3369 .vb 3370 0 1 2 3 4 5 6 7 8 9 10 11 3371 ------------------- 3372 row 3 | o o o d d d o o o o o o 3373 row 4 | o o o d d d o o o o o o 3374 row 5 | o o o d d d o o o o o o 3375 ------------------- 3376 .ve 3377 3378 Thus, any entries in the d locations are stored in the d (diagonal) 3379 submatrix, and any entries in the o locations are stored in the 3380 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3381 stored simply in the MATSEQBAIJ format for compressed row storage. 3382 3383 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3384 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3385 In general, for PDE problems in which most nonzeros are near the diagonal, 3386 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3387 or you will get TERRIBLE performance; see the users' manual chapter on 3388 matrices. 3389 3390 You can call MatGetInfo() to get information on how effective the preallocation was; 3391 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3392 You can also run with the option -info and look for messages with the string 3393 malloc in them to see if additional memory allocation was needed. 3394 3395 Level: intermediate 3396 3397 .keywords: matrix, block, aij, compressed row, sparse, parallel 3398 3399 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3400 @*/ 3401 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3402 { 3403 PetscErrorCode ierr; 3404 3405 PetscFunctionBegin; 3406 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatCreateMPIBAIJ" 3412 /*@C 3413 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3414 (block compressed row). For good matrix assembly performance 3415 the user should preallocate the matrix storage by setting the parameters 3416 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3417 performance can be increased by more than a factor of 50. 3418 3419 Collective on MPI_Comm 3420 3421 Input Parameters: 3422 + comm - MPI communicator 3423 . bs - size of blockk 3424 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3425 This value should be the same as the local size used in creating the 3426 y vector for the matrix-vector product y = Ax. 3427 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3428 This value should be the same as the local size used in creating the 3429 x vector for the matrix-vector product y = Ax. 3430 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3431 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3432 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3433 submatrix (same for all local rows) 3434 . d_nnz - array containing the number of nonzero blocks in the various block rows 3435 of the in diagonal portion of the local (possibly different for each block 3436 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3437 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3438 submatrix (same for all local rows). 3439 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3440 off-diagonal portion of the local submatrix (possibly different for 3441 each block row) or PETSC_NULL. 3442 3443 Output Parameter: 3444 . A - the matrix 3445 3446 Options Database Keys: 3447 + -mat_block_size - size of the blocks to use 3448 - -mat_use_hash_table <fact> 3449 3450 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3451 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3452 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3453 3454 Notes: 3455 If the *_nnz parameter is given then the *_nz parameter is ignored 3456 3457 A nonzero block is any block that as 1 or more nonzeros in it 3458 3459 The user MUST specify either the local or global matrix dimensions 3460 (possibly both). 3461 3462 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3463 than it must be used on all processors that share the object for that argument. 3464 3465 Storage Information: 3466 For a square global matrix we define each processor's diagonal portion 3467 to be its local rows and the corresponding columns (a square submatrix); 3468 each processor's off-diagonal portion encompasses the remainder of the 3469 local matrix (a rectangular submatrix). 3470 3471 The user can specify preallocated storage for the diagonal part of 3472 the local submatrix with either d_nz or d_nnz (not both). Set 3473 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3474 memory allocation. Likewise, specify preallocated storage for the 3475 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3476 3477 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3478 the figure below we depict these three local rows and all columns (0-11). 3479 3480 .vb 3481 0 1 2 3 4 5 6 7 8 9 10 11 3482 ------------------- 3483 row 3 | o o o d d d o o o o o o 3484 row 4 | o o o d d d o o o o o o 3485 row 5 | o o o d d d o o o o o o 3486 ------------------- 3487 .ve 3488 3489 Thus, any entries in the d locations are stored in the d (diagonal) 3490 submatrix, and any entries in the o locations are stored in the 3491 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3492 stored simply in the MATSEQBAIJ format for compressed row storage. 3493 3494 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3495 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3496 In general, for PDE problems in which most nonzeros are near the diagonal, 3497 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3498 or you will get TERRIBLE performance; see the users' manual chapter on 3499 matrices. 3500 3501 Level: intermediate 3502 3503 .keywords: matrix, block, aij, compressed row, sparse, parallel 3504 3505 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3506 @*/ 3507 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3508 { 3509 PetscErrorCode ierr; 3510 PetscMPIInt size; 3511 3512 PetscFunctionBegin; 3513 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3514 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3515 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3516 if (size > 1) { 3517 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3518 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3519 } else { 3520 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3521 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3522 } 3523 PetscFunctionReturn(0); 3524 } 3525 3526 #undef __FUNCT__ 3527 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3528 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3529 { 3530 Mat mat; 3531 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3532 PetscErrorCode ierr; 3533 PetscInt len=0; 3534 3535 PetscFunctionBegin; 3536 *newmat = 0; 3537 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3538 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3539 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3540 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3541 3542 mat->factortype = matin->factortype; 3543 mat->preallocated = PETSC_TRUE; 3544 mat->assembled = PETSC_TRUE; 3545 mat->insertmode = NOT_SET_VALUES; 3546 3547 a = (Mat_MPIBAIJ*)mat->data; 3548 mat->rmap->bs = matin->rmap->bs; 3549 a->bs2 = oldmat->bs2; 3550 a->mbs = oldmat->mbs; 3551 a->nbs = oldmat->nbs; 3552 a->Mbs = oldmat->Mbs; 3553 a->Nbs = oldmat->Nbs; 3554 3555 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3556 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3557 3558 a->size = oldmat->size; 3559 a->rank = oldmat->rank; 3560 a->donotstash = oldmat->donotstash; 3561 a->roworiented = oldmat->roworiented; 3562 a->rowindices = 0; 3563 a->rowvalues = 0; 3564 a->getrowactive = PETSC_FALSE; 3565 a->barray = 0; 3566 a->rstartbs = oldmat->rstartbs; 3567 a->rendbs = oldmat->rendbs; 3568 a->cstartbs = oldmat->cstartbs; 3569 a->cendbs = oldmat->cendbs; 3570 3571 /* hash table stuff */ 3572 a->ht = 0; 3573 a->hd = 0; 3574 a->ht_size = 0; 3575 a->ht_flag = oldmat->ht_flag; 3576 a->ht_fact = oldmat->ht_fact; 3577 a->ht_total_ct = 0; 3578 a->ht_insert_ct = 0; 3579 3580 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3581 if (oldmat->colmap) { 3582 #if defined (PETSC_USE_CTABLE) 3583 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3584 #else 3585 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3586 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3587 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3588 #endif 3589 } else a->colmap = 0; 3590 3591 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3592 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3593 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3594 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3595 } else a->garray = 0; 3596 3597 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3598 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3599 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3600 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3601 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3602 3603 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3604 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3605 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3606 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3607 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3608 *newmat = mat; 3609 3610 PetscFunctionReturn(0); 3611 } 3612 3613 #undef __FUNCT__ 3614 #define __FUNCT__ "MatLoad_MPIBAIJ" 3615 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3616 { 3617 PetscErrorCode ierr; 3618 int fd; 3619 PetscInt i,nz,j,rstart,rend; 3620 PetscScalar *vals,*buf; 3621 MPI_Comm comm = ((PetscObject)viewer)->comm; 3622 MPI_Status status; 3623 PetscMPIInt rank,size,maxnz; 3624 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3625 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3626 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3627 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3628 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3629 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3630 3631 PetscFunctionBegin; 3632 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3633 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3634 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3635 3636 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3637 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3638 if (!rank) { 3639 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3640 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3641 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3642 } 3643 3644 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3645 3646 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3647 M = header[1]; N = header[2]; 3648 3649 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3650 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3651 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3652 3653 /* If global sizes are set, check if they are consistent with that given in the file */ 3654 if (sizesset) { 3655 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3656 } 3657 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3658 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3659 3660 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3661 3662 /* 3663 This code adds extra rows to make sure the number of rows is 3664 divisible by the blocksize 3665 */ 3666 Mbs = M/bs; 3667 extra_rows = bs - M + bs*Mbs; 3668 if (extra_rows == bs) extra_rows = 0; 3669 else Mbs++; 3670 if (extra_rows && !rank) { 3671 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3672 } 3673 3674 /* determine ownership of all rows */ 3675 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3676 mbs = Mbs/size + ((Mbs % size) > rank); 3677 m = mbs*bs; 3678 } else { /* User set */ 3679 m = newmat->rmap->n; 3680 mbs = m/bs; 3681 } 3682 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3683 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3684 3685 /* process 0 needs enough room for process with most rows */ 3686 if (!rank) { 3687 mmax = rowners[1]; 3688 for (i=2; i<size; i++) { 3689 mmax = PetscMax(mmax,rowners[i]); 3690 } 3691 mmax*=bs; 3692 } else mmax = m; 3693 3694 rowners[0] = 0; 3695 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3696 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3697 rstart = rowners[rank]; 3698 rend = rowners[rank+1]; 3699 3700 /* distribute row lengths to all processors */ 3701 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3702 if (!rank) { 3703 mend = m; 3704 if (size == 1) mend = mend - extra_rows; 3705 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3706 for (j=mend; j<m; j++) locrowlens[j] = 1; 3707 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3708 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3709 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3710 for (j=0; j<m; j++) { 3711 procsnz[0] += locrowlens[j]; 3712 } 3713 for (i=1; i<size; i++) { 3714 mend = browners[i+1] - browners[i]; 3715 if (i == size-1) mend = mend - extra_rows; 3716 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3717 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3718 /* calculate the number of nonzeros on each processor */ 3719 for (j=0; j<browners[i+1]-browners[i]; j++) { 3720 procsnz[i] += rowlengths[j]; 3721 } 3722 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3723 } 3724 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3725 } else { 3726 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3727 } 3728 3729 if (!rank) { 3730 /* determine max buffer needed and allocate it */ 3731 maxnz = procsnz[0]; 3732 for (i=1; i<size; i++) { 3733 maxnz = PetscMax(maxnz,procsnz[i]); 3734 } 3735 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3736 3737 /* read in my part of the matrix column indices */ 3738 nz = procsnz[0]; 3739 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3740 mycols = ibuf; 3741 if (size == 1) nz -= extra_rows; 3742 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3743 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3744 3745 /* read in every ones (except the last) and ship off */ 3746 for (i=1; i<size-1; i++) { 3747 nz = procsnz[i]; 3748 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3749 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3750 } 3751 /* read in the stuff for the last proc */ 3752 if (size != 1) { 3753 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3754 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3755 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3756 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3757 } 3758 ierr = PetscFree(cols);CHKERRQ(ierr); 3759 } else { 3760 /* determine buffer space needed for message */ 3761 nz = 0; 3762 for (i=0; i<m; i++) { 3763 nz += locrowlens[i]; 3764 } 3765 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3766 mycols = ibuf; 3767 /* receive message of column indices*/ 3768 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3769 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3770 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3771 } 3772 3773 /* loop over local rows, determining number of off diagonal entries */ 3774 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3775 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3776 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3777 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3778 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3779 rowcount = 0; nzcount = 0; 3780 for (i=0; i<mbs; i++) { 3781 dcount = 0; 3782 odcount = 0; 3783 for (j=0; j<bs; j++) { 3784 kmax = locrowlens[rowcount]; 3785 for (k=0; k<kmax; k++) { 3786 tmp = mycols[nzcount++]/bs; 3787 if (!mask[tmp]) { 3788 mask[tmp] = 1; 3789 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3790 else masked1[dcount++] = tmp; 3791 } 3792 } 3793 rowcount++; 3794 } 3795 3796 dlens[i] = dcount; 3797 odlens[i] = odcount; 3798 3799 /* zero out the mask elements we set */ 3800 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3801 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3802 } 3803 3804 3805 if (!sizesset) { 3806 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3807 } 3808 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3809 3810 if (!rank) { 3811 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3812 /* read in my part of the matrix numerical values */ 3813 nz = procsnz[0]; 3814 vals = buf; 3815 mycols = ibuf; 3816 if (size == 1) nz -= extra_rows; 3817 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3818 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3819 3820 /* insert into matrix */ 3821 jj = rstart*bs; 3822 for (i=0; i<m; i++) { 3823 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3824 mycols += locrowlens[i]; 3825 vals += locrowlens[i]; 3826 jj++; 3827 } 3828 /* read in other processors (except the last one) and ship out */ 3829 for (i=1; i<size-1; i++) { 3830 nz = procsnz[i]; 3831 vals = buf; 3832 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3833 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3834 } 3835 /* the last proc */ 3836 if (size != 1){ 3837 nz = procsnz[i] - extra_rows; 3838 vals = buf; 3839 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3840 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3841 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3842 } 3843 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3844 } else { 3845 /* receive numeric values */ 3846 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3847 3848 /* receive message of values*/ 3849 vals = buf; 3850 mycols = ibuf; 3851 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3852 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3853 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3854 3855 /* insert into matrix */ 3856 jj = rstart*bs; 3857 for (i=0; i<m; i++) { 3858 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3859 mycols += locrowlens[i]; 3860 vals += locrowlens[i]; 3861 jj++; 3862 } 3863 } 3864 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3865 ierr = PetscFree(buf);CHKERRQ(ierr); 3866 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3867 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3868 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3869 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3870 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 3873 PetscFunctionReturn(0); 3874 } 3875 3876 #undef __FUNCT__ 3877 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3878 /*@ 3879 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3880 3881 Input Parameters: 3882 . mat - the matrix 3883 . fact - factor 3884 3885 Not Collective, each process can use a different factor 3886 3887 Level: advanced 3888 3889 Notes: 3890 This can also be set by the command line option: -mat_use_hash_table <fact> 3891 3892 .keywords: matrix, hashtable, factor, HT 3893 3894 .seealso: MatSetOption() 3895 @*/ 3896 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3897 { 3898 PetscErrorCode ierr; 3899 3900 PetscFunctionBegin; 3901 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3902 PetscFunctionReturn(0); 3903 } 3904 3905 EXTERN_C_BEGIN 3906 #undef __FUNCT__ 3907 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3908 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3909 { 3910 Mat_MPIBAIJ *baij; 3911 3912 PetscFunctionBegin; 3913 baij = (Mat_MPIBAIJ*)mat->data; 3914 baij->ht_fact = fact; 3915 PetscFunctionReturn(0); 3916 } 3917 EXTERN_C_END 3918 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3921 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3922 { 3923 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3924 PetscFunctionBegin; 3925 *Ad = a->A; 3926 *Ao = a->B; 3927 *colmap = a->garray; 3928 PetscFunctionReturn(0); 3929 } 3930 3931 /* 3932 Special version for direct calls from Fortran (to eliminate two function call overheads 3933 */ 3934 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3935 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3936 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3937 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3938 #endif 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3942 /*@C 3943 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3944 3945 Collective on Mat 3946 3947 Input Parameters: 3948 + mat - the matrix 3949 . min - number of input rows 3950 . im - input rows 3951 . nin - number of input columns 3952 . in - input columns 3953 . v - numerical values input 3954 - addvin - INSERT_VALUES or ADD_VALUES 3955 3956 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3957 3958 Level: advanced 3959 3960 .seealso: MatSetValuesBlocked() 3961 @*/ 3962 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3963 { 3964 /* convert input arguments to C version */ 3965 Mat mat = *matin; 3966 PetscInt m = *min, n = *nin; 3967 InsertMode addv = *addvin; 3968 3969 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3970 const MatScalar *value; 3971 MatScalar *barray=baij->barray; 3972 PetscBool roworiented = baij->roworiented; 3973 PetscErrorCode ierr; 3974 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3975 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3976 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3977 3978 PetscFunctionBegin; 3979 /* tasks normally handled by MatSetValuesBlocked() */ 3980 if (mat->insertmode == NOT_SET_VALUES) { 3981 mat->insertmode = addv; 3982 } 3983 #if defined(PETSC_USE_DEBUG) 3984 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3985 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3986 #endif 3987 if (mat->assembled) { 3988 mat->was_assembled = PETSC_TRUE; 3989 mat->assembled = PETSC_FALSE; 3990 } 3991 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3992 3993 3994 if(!barray) { 3995 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3996 baij->barray = barray; 3997 } 3998 3999 if (roworiented) { 4000 stepval = (n-1)*bs; 4001 } else { 4002 stepval = (m-1)*bs; 4003 } 4004 for (i=0; i<m; i++) { 4005 if (im[i] < 0) continue; 4006 #if defined(PETSC_USE_DEBUG) 4007 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4008 #endif 4009 if (im[i] >= rstart && im[i] < rend) { 4010 row = im[i] - rstart; 4011 for (j=0; j<n; j++) { 4012 /* If NumCol = 1 then a copy is not required */ 4013 if ((roworiented) && (n == 1)) { 4014 barray = (MatScalar*)v + i*bs2; 4015 } else if((!roworiented) && (m == 1)) { 4016 barray = (MatScalar*)v + j*bs2; 4017 } else { /* Here a copy is required */ 4018 if (roworiented) { 4019 value = v + i*(stepval+bs)*bs + j*bs; 4020 } else { 4021 value = v + j*(stepval+bs)*bs + i*bs; 4022 } 4023 for (ii=0; ii<bs; ii++,value+=stepval) { 4024 for (jj=0; jj<bs; jj++) { 4025 *barray++ = *value++; 4026 } 4027 } 4028 barray -=bs2; 4029 } 4030 4031 if (in[j] >= cstart && in[j] < cend){ 4032 col = in[j] - cstart; 4033 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4034 } 4035 else if (in[j] < 0) continue; 4036 #if defined(PETSC_USE_DEBUG) 4037 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4038 #endif 4039 else { 4040 if (mat->was_assembled) { 4041 if (!baij->colmap) { 4042 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4043 } 4044 4045 #if defined(PETSC_USE_DEBUG) 4046 #if defined (PETSC_USE_CTABLE) 4047 { PetscInt data; 4048 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4049 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4050 } 4051 #else 4052 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4053 #endif 4054 #endif 4055 #if defined (PETSC_USE_CTABLE) 4056 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4057 col = (col - 1)/bs; 4058 #else 4059 col = (baij->colmap[in[j]] - 1)/bs; 4060 #endif 4061 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4062 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4063 col = in[j]; 4064 } 4065 } 4066 else col = in[j]; 4067 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4068 } 4069 } 4070 } else { 4071 if (!baij->donotstash) { 4072 if (roworiented) { 4073 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4074 } else { 4075 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4076 } 4077 } 4078 } 4079 } 4080 4081 /* task normally handled by MatSetValuesBlocked() */ 4082 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4083 PetscFunctionReturn(0); 4084 } 4085 4086 #undef __FUNCT__ 4087 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4088 /*@ 4089 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4090 CSR format the local rows. 4091 4092 Collective on MPI_Comm 4093 4094 Input Parameters: 4095 + comm - MPI communicator 4096 . bs - the block size, only a block size of 1 is supported 4097 . m - number of local rows (Cannot be PETSC_DECIDE) 4098 . n - This value should be the same as the local size used in creating the 4099 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4100 calculated if N is given) For square matrices n is almost always m. 4101 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4102 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4103 . i - row indices 4104 . j - column indices 4105 - a - matrix values 4106 4107 Output Parameter: 4108 . mat - the matrix 4109 4110 Level: intermediate 4111 4112 Notes: 4113 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4114 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4115 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4116 4117 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4118 4119 .keywords: matrix, aij, compressed row, sparse, parallel 4120 4121 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4122 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4123 @*/ 4124 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4125 { 4126 PetscErrorCode ierr; 4127 4128 4129 PetscFunctionBegin; 4130 if (i[0]) { 4131 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4132 } 4133 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4134 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4135 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4136 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4137 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4138 PetscFunctionReturn(0); 4139 } 4140