1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) { 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba=b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if ((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + (i*(stepval+bs) + j)*bs; 316 } else { 317 value = v + (j*(stepval+bs) + i)*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 320 for (jj=0; jj<bs; jj++) { 321 barray[jj] = value[jj]; 322 } 323 barray += bs; 324 } 325 barray -= bs2; 326 } 327 328 if (in[j] >= cstart && in[j] < cend) { 329 col = in[j] - cstart; 330 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 331 } 332 else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined (PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined (PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 427 if (idx == h1) { 428 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 429 } 430 } 431 } 432 #else 433 if (HT[idx] != key) { 434 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 435 if (idx == size) { 436 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 437 if (idx == h1) { 438 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 439 } 440 } 441 } 442 #endif 443 /* A HASH table entry is found, so insert the values at the correct address */ 444 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 445 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 446 } 447 } else { 448 if (!baij->donotstash) { 449 if (roworiented) { 450 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 451 } else { 452 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 453 } 454 } 455 } 456 } 457 #if defined(PETSC_USE_DEBUG) 458 baij->ht_total_ct = total_ct; 459 baij->ht_insert_ct = insert_ct; 460 #endif 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 466 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 467 { 468 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 469 PetscBool roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col; 472 PetscInt rstart=baij->rstartbs; 473 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 474 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 475 PetscReal tmp; 476 MatScalar **HD = baij->hd,*baij_a; 477 const PetscScalar *v_t,*value; 478 #if defined(PETSC_USE_DEBUG) 479 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 480 #endif 481 482 PetscFunctionBegin; 483 484 if (roworiented) { 485 stepval = (n-1)*bs; 486 } else { 487 stepval = (m-1)*bs; 488 } 489 for (i=0; i<m; i++) { 490 #if defined(PETSC_USE_DEBUG) 491 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 492 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 493 #endif 494 row = im[i]; 495 v_t = v + i*nbs2; 496 if (row >= rstart && row < rend) { 497 for (j=0; j<n; j++) { 498 col = in[j]; 499 500 /* Look up into the Hash Table */ 501 key = row*Nbs+col+1; 502 h1 = HASH(size,key,tmp); 503 504 idx = h1; 505 #if defined(PETSC_USE_DEBUG) 506 total_ct++; 507 insert_ct++; 508 if (HT[idx] != key) { 509 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == size) { 511 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 512 if (idx == h1) { 513 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 514 } 515 } 516 } 517 #else 518 if (HT[idx] != key) { 519 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 520 if (idx == size) { 521 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 522 if (idx == h1) { 523 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 524 } 525 } 526 } 527 #endif 528 baij_a = HD[idx]; 529 if (roworiented) { 530 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 531 /* value = v + (i*(stepval+bs)+j)*bs; */ 532 value = v_t; 533 v_t += bs; 534 if (addv == ADD_VALUES) { 535 for (ii=0; ii<bs; ii++,value+=stepval) { 536 for (jj=ii; jj<bs2; jj+=bs) { 537 baij_a[jj] += *value++; 538 } 539 } 540 } else { 541 for (ii=0; ii<bs; ii++,value+=stepval) { 542 for (jj=ii; jj<bs2; jj+=bs) { 543 baij_a[jj] = *value++; 544 } 545 } 546 } 547 } else { 548 value = v + j*(stepval+bs)*bs + i*bs; 549 if (addv == ADD_VALUES) { 550 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 551 for (jj=0; jj<bs; jj++) { 552 baij_a[jj] += *value++; 553 } 554 } 555 } else { 556 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 557 for (jj=0; jj<bs; jj++) { 558 baij_a[jj] = *value++; 559 } 560 } 561 } 562 } 563 } 564 } else { 565 if (!baij->donotstash) { 566 if (roworiented) { 567 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } else { 569 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 570 } 571 } 572 } 573 } 574 #if defined(PETSC_USE_DEBUG) 575 baij->ht_total_ct = total_ct; 576 baij->ht_insert_ct = insert_ct; 577 #endif 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatGetValues_MPIBAIJ" 583 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 584 { 585 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 586 PetscErrorCode ierr; 587 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 588 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 589 590 PetscFunctionBegin; 591 for (i=0; i<m; i++) { 592 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 593 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 594 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 595 row = idxm[i] - bsrstart; 596 for (j=0; j<n; j++) { 597 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 598 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 599 if (idxn[j] >= bscstart && idxn[j] < bscend) { 600 col = idxn[j] - bscstart; 601 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 602 } else { 603 if (!baij->colmap) { 604 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 605 } 606 #if defined (PETSC_USE_CTABLE) 607 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 608 data --; 609 #else 610 data = baij->colmap[idxn[j]/bs]-1; 611 #endif 612 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 613 else { 614 col = data + idxn[j]%bs; 615 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatNorm_MPIBAIJ" 628 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 629 { 630 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 631 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 632 PetscErrorCode ierr; 633 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 634 PetscReal sum = 0.0; 635 MatScalar *v; 636 637 PetscFunctionBegin; 638 if (baij->size == 1) { 639 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 640 } else { 641 if (type == NORM_FROBENIUS) { 642 v = amat->a; 643 nz = amat->nz*bs2; 644 for (i=0; i<nz; i++) { 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 651 } 652 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 *nrm = PetscSqrtReal(*nrm); 654 } else if (type == NORM_1) { /* max column sum */ 655 PetscReal *tmp,*tmp2; 656 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 657 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 658 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 659 v = amat->a; jj = amat->j; 660 for (i=0; i<amat->nz; i++) { 661 for (j=0; j<bs; j++) { 662 col = bs*(cstart + *jj) + j; /* column index */ 663 for (row=0; row<bs; row++) { 664 tmp[col] += PetscAbsScalar(*v); v++; 665 } 666 } 667 jj++; 668 } 669 v = bmat->a; jj = bmat->j; 670 for (i=0; i<bmat->nz; i++) { 671 for (j=0; j<bs; j++) { 672 col = bs*garray[*jj] + j; 673 for (row=0; row<bs; row++) { 674 tmp[col] += PetscAbsScalar(*v); v++; 675 } 676 } 677 jj++; 678 } 679 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 680 *nrm = 0.0; 681 for (j=0; j<mat->cmap->N; j++) { 682 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 683 } 684 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 685 } else if (type == NORM_INFINITY) { /* max row sum */ 686 PetscReal *sums; 687 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 688 sum = 0.0; 689 for (j=0; j<amat->mbs; j++) { 690 for (row=0; row<bs; row++) sums[row] = 0.0; 691 v = amat->a + bs2*amat->i[j]; 692 nz = amat->i[j+1]-amat->i[j]; 693 for (i=0; i<nz; i++) { 694 for (col=0; col<bs; col++) { 695 for (row=0; row<bs; row++) { 696 sums[row] += PetscAbsScalar(*v); v++; 697 } 698 } 699 } 700 v = bmat->a + bs2*bmat->i[j]; 701 nz = bmat->i[j+1]-bmat->i[j]; 702 for (i=0; i<nz; i++) { 703 for (col=0; col<bs; col++) { 704 for (row=0; row<bs; row++) { 705 sums[row] += PetscAbsScalar(*v); v++; 706 } 707 } 708 } 709 for (row=0; row<bs; row++) { 710 if (sums[row] > sum) sum = sums[row]; 711 } 712 } 713 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 714 ierr = PetscFree(sums);CHKERRQ(ierr); 715 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 /* 721 Creates the hash table, and sets the table 722 This table is created only once. 723 If new entried need to be added to the matrix 724 then the hash table has to be destroyed and 725 recreated. 726 */ 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 729 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 730 { 731 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 732 Mat A = baij->A,B=baij->B; 733 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 734 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 735 PetscErrorCode ierr; 736 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 737 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 738 PetscInt *HT,key; 739 MatScalar **HD; 740 PetscReal tmp; 741 #if defined(PETSC_USE_INFO) 742 PetscInt ct=0,max=0; 743 #endif 744 745 PetscFunctionBegin; 746 if (baij->ht) PetscFunctionReturn(0); 747 748 baij->ht_size = (PetscInt)(factor*nz); 749 ht_size = baij->ht_size; 750 751 /* Allocate Memory for Hash Table */ 752 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 753 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 754 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 755 HD = baij->hd; 756 HT = baij->ht; 757 758 /* Loop Over A */ 759 for (i=0; i<a->mbs; i++) { 760 for (j=ai[i]; j<ai[i+1]; j++) { 761 row = i+rstart; 762 col = aj[j]+cstart; 763 764 key = row*Nbs + col + 1; 765 h1 = HASH(ht_size,key,tmp); 766 for (k=0; k<ht_size; k++) { 767 if (!HT[(h1+k)%ht_size]) { 768 HT[(h1+k)%ht_size] = key; 769 HD[(h1+k)%ht_size] = a->a + j*bs2; 770 break; 771 #if defined(PETSC_USE_INFO) 772 } else { 773 ct++; 774 #endif 775 } 776 } 777 #if defined(PETSC_USE_INFO) 778 if (k> max) max = k; 779 #endif 780 } 781 } 782 /* Loop Over B */ 783 for (i=0; i<b->mbs; i++) { 784 for (j=bi[i]; j<bi[i+1]; j++) { 785 row = i+rstart; 786 col = garray[bj[j]]; 787 key = row*Nbs + col + 1; 788 h1 = HASH(ht_size,key,tmp); 789 for (k=0; k<ht_size; k++) { 790 if (!HT[(h1+k)%ht_size]) { 791 HT[(h1+k)%ht_size] = key; 792 HD[(h1+k)%ht_size] = b->a + j*bs2; 793 break; 794 #if defined(PETSC_USE_INFO) 795 } else { 796 ct++; 797 #endif 798 } 799 } 800 #if defined(PETSC_USE_INFO) 801 if (k> max) max = k; 802 #endif 803 } 804 } 805 806 /* Print Summary */ 807 #if defined(PETSC_USE_INFO) 808 for (i=0,j=0; i<ht_size; i++) { 809 if (HT[i]) {j++;} 810 } 811 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 812 #endif 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 818 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 819 { 820 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 821 PetscErrorCode ierr; 822 PetscInt nstash,reallocs; 823 InsertMode addv; 824 825 PetscFunctionBegin; 826 if (baij->donotstash || mat->nooffprocentries) { 827 PetscFunctionReturn(0); 828 } 829 830 /* make sure all processors are either in INSERTMODE or ADDMODE */ 831 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 832 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 833 mat->insertmode = addv; /* in case this processor had no cache */ 834 835 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 836 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 837 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 838 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 839 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 840 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 #undef __FUNCT__ 845 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 846 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 847 { 848 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 849 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 850 PetscErrorCode ierr; 851 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 852 PetscInt *row,*col; 853 PetscBool r1,r2,r3,other_disassembled; 854 MatScalar *val; 855 InsertMode addv = mat->insertmode; 856 PetscMPIInt n; 857 858 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 859 PetscFunctionBegin; 860 if (!baij->donotstash && !mat->nooffprocentries) { 861 while (1) { 862 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 863 if (!flg) break; 864 865 for (i=0; i<n;) { 866 /* Now identify the consecutive vals belonging to the same row */ 867 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 868 if (j < n) ncols = j-i; 869 else ncols = n-i; 870 /* Now assemble all these values with a single function call */ 871 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 872 i = j; 873 } 874 } 875 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 876 /* Now process the block-stash. Since the values are stashed column-oriented, 877 set the roworiented flag to column oriented, and after MatSetValues() 878 restore the original flags */ 879 r1 = baij->roworiented; 880 r2 = a->roworiented; 881 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 882 baij->roworiented = PETSC_FALSE; 883 a->roworiented = PETSC_FALSE; 884 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 885 while (1) { 886 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 887 if (!flg) break; 888 889 for (i=0; i<n;) { 890 /* Now identify the consecutive vals belonging to the same row */ 891 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 892 if (j < n) ncols = j-i; 893 else ncols = n-i; 894 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 895 i = j; 896 } 897 } 898 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 899 baij->roworiented = r1; 900 a->roworiented = r2; 901 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 902 } 903 904 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 905 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 906 907 /* determine if any processor has disassembled, if so we must 908 also disassemble ourselfs, in order that we may reassemble. */ 909 /* 910 if nonzero structure of submatrix B cannot change then we know that 911 no processor disassembled thus we can skip this stuff 912 */ 913 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 914 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 915 if (mat->was_assembled && !other_disassembled) { 916 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 917 } 918 } 919 920 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 921 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 922 } 923 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 924 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 925 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 926 927 #if defined(PETSC_USE_INFO) 928 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 929 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 930 baij->ht_total_ct = 0; 931 baij->ht_insert_ct = 0; 932 } 933 #endif 934 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 935 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 936 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 937 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 938 } 939 940 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 941 baij->rowvalues = 0; 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 947 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 948 { 949 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 950 PetscErrorCode ierr; 951 PetscMPIInt size = baij->size,rank = baij->rank; 952 PetscInt bs = mat->rmap->bs; 953 PetscBool iascii,isdraw; 954 PetscViewer sviewer; 955 PetscViewerFormat format; 956 957 PetscFunctionBegin; 958 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 959 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 960 if (iascii) { 961 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 962 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 963 MatInfo info; 964 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 965 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 966 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 968 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 969 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 971 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 972 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 973 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 975 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 976 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } else if (format == PETSC_VIEWER_ASCII_INFO) { 979 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 982 PetscFunctionReturn(0); 983 } 984 } 985 986 if (isdraw) { 987 PetscDraw draw; 988 PetscBool isnull; 989 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 990 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 991 } 992 993 if (size == 1) { 994 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 995 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 996 } else { 997 /* assemble the entire matrix onto first processor. */ 998 Mat A; 999 Mat_SeqBAIJ *Aloc; 1000 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1001 MatScalar *a; 1002 1003 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1004 /* Perhaps this should be the type of mat? */ 1005 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1006 if (!rank) { 1007 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1008 } else { 1009 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1010 } 1011 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1012 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1013 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1014 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1015 1016 /* copy over the A part */ 1017 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1018 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1019 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1020 1021 for (i=0; i<mbs; i++) { 1022 rvals[0] = bs*(baij->rstartbs + i); 1023 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1024 for (j=ai[i]; j<ai[i+1]; j++) { 1025 col = (baij->cstartbs+aj[j])*bs; 1026 for (k=0; k<bs; k++) { 1027 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1028 col++; a += bs; 1029 } 1030 } 1031 } 1032 /* copy over the B part */ 1033 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1034 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1035 for (i=0; i<mbs; i++) { 1036 rvals[0] = bs*(baij->rstartbs + i); 1037 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1038 for (j=ai[i]; j<ai[i+1]; j++) { 1039 col = baij->garray[aj[j]]*bs; 1040 for (k=0; k<bs; k++) { 1041 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1042 col++; a += bs; 1043 } 1044 } 1045 } 1046 ierr = PetscFree(rvals);CHKERRQ(ierr); 1047 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1048 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1049 /* 1050 Everyone has to call to draw the matrix since the graphics waits are 1051 synchronized across all processors that share the PetscDraw object 1052 */ 1053 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1054 if (!rank) { 1055 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1056 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1057 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1058 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1059 } 1060 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1061 ierr = MatDestroy(&A);CHKERRQ(ierr); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1068 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1069 { 1070 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1071 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1072 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1073 PetscErrorCode ierr; 1074 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1075 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1076 int fd; 1077 PetscScalar *column_values; 1078 FILE *file; 1079 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1080 PetscInt message_count,flowcontrolcount; 1081 1082 PetscFunctionBegin; 1083 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1084 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1085 nz = bs2*(A->nz + B->nz); 1086 rlen = mat->rmap->n; 1087 if (!rank) { 1088 header[0] = MAT_FILE_CLASSID; 1089 header[1] = mat->rmap->N; 1090 header[2] = mat->cmap->N; 1091 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1092 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1093 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1094 /* get largest number of rows any processor has */ 1095 range = mat->rmap->range; 1096 for (i=1; i<size; i++) { 1097 rlen = PetscMax(rlen,range[i+1] - range[i]); 1098 } 1099 } else { 1100 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1101 } 1102 1103 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1104 /* compute lengths of each row */ 1105 for (i=0; i<a->mbs; i++) { 1106 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1107 } 1108 /* store the row lengths to the file */ 1109 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1110 if (!rank) { 1111 MPI_Status status; 1112 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1113 rlen = (range[1] - range[0])/bs; 1114 for (i=0; i<rlen; i++) { 1115 for (j=0; j<bs; j++) { 1116 row_lens[i*bs+j] = bs*crow_lens[i]; 1117 } 1118 } 1119 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1120 for (i=1; i<size; i++) { 1121 rlen = (range[i+1] - range[i])/bs; 1122 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1123 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1124 for (k=0; k<rlen; k++) { 1125 for (j=0; j<bs; j++) { 1126 row_lens[k*bs+j] = bs*crow_lens[k]; 1127 } 1128 } 1129 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1130 } 1131 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1132 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1133 } else { 1134 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1135 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1136 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1137 } 1138 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1139 1140 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1141 information needed to make it for each row from a block row. This does require more communication but still not more than 1142 the communication needed for the nonzero values */ 1143 nzmax = nz; /* space a largest processor needs */ 1144 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1145 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1146 cnt = 0; 1147 for (i=0; i<a->mbs; i++) { 1148 pcnt = cnt; 1149 for (j=B->i[i]; j<B->i[i+1]; j++) { 1150 if ( (col = garray[B->j[j]]) > cstart) break; 1151 for (l=0; l<bs; l++) { 1152 column_indices[cnt++] = bs*col+l; 1153 } 1154 } 1155 for (k=A->i[i]; k<A->i[i+1]; k++) { 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1158 } 1159 } 1160 for (; j<B->i[i+1]; j++) { 1161 for (l=0; l<bs; l++) { 1162 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1163 } 1164 } 1165 len = cnt - pcnt; 1166 for (k=1; k<bs; k++) { 1167 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1168 cnt += len; 1169 } 1170 } 1171 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1172 1173 /* store the columns to the file */ 1174 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1175 if (!rank) { 1176 MPI_Status status; 1177 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1178 for (i=1; i<size; i++) { 1179 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1180 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1181 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1182 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1185 } else { 1186 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1187 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1188 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1189 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1190 } 1191 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1192 1193 /* load up the numerical values */ 1194 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1195 cnt = 0; 1196 for (i=0; i<a->mbs; i++) { 1197 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1198 for (j=B->i[i]; j<B->i[i+1]; j++) { 1199 if ( garray[B->j[j]] > cstart) break; 1200 for (l=0; l<bs; l++) { 1201 for (ll=0; ll<bs; ll++) { 1202 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1203 } 1204 } 1205 cnt += bs; 1206 } 1207 for (k=A->i[i]; k<A->i[i+1]; k++) { 1208 for (l=0; l<bs; l++) { 1209 for (ll=0; ll<bs; ll++) { 1210 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1211 } 1212 } 1213 cnt += bs; 1214 } 1215 for (; j<B->i[i+1]; j++) { 1216 for (l=0; l<bs; l++) { 1217 for (ll=0; ll<bs; ll++) { 1218 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1219 } 1220 } 1221 cnt += bs; 1222 } 1223 cnt += (bs-1)*rlen; 1224 } 1225 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1226 1227 /* store the column values to the file */ 1228 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1229 if (!rank) { 1230 MPI_Status status; 1231 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1232 for (i=1; i<size; i++) { 1233 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1234 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1235 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1236 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1237 } 1238 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1241 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1242 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1243 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1244 } 1245 ierr = PetscFree(column_values);CHKERRQ(ierr); 1246 1247 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1248 if (file) { 1249 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatView_MPIBAIJ" 1256 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1257 { 1258 PetscErrorCode ierr; 1259 PetscBool iascii,isdraw,issocket,isbinary; 1260 1261 PetscFunctionBegin; 1262 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1266 if (iascii || isdraw || issocket) { 1267 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1268 } else if (isbinary) { 1269 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1270 } else { 1271 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1278 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1279 { 1280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 #if defined(PETSC_USE_LOG) 1285 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1286 #endif 1287 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1288 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1289 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1290 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1291 #if defined (PETSC_USE_CTABLE) 1292 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1293 #else 1294 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1295 #endif 1296 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1297 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1298 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1299 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1300 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1301 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1302 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1303 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1304 1305 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatMult_MPIBAIJ" 1320 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1321 { 1322 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 PetscInt nt; 1325 1326 PetscFunctionBegin; 1327 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1328 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1329 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1330 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1331 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1332 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1333 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1340 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1341 { 1342 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1347 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1348 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1349 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1355 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1356 { 1357 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 PetscBool merged; 1360 1361 PetscFunctionBegin; 1362 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1363 /* do nondiagonal part */ 1364 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1365 if (!merged) { 1366 /* send it on its way */ 1367 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1368 /* do local part */ 1369 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1370 /* receive remote parts: note this assumes the values are not actually */ 1371 /* inserted in yy until the next line */ 1372 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1373 } else { 1374 /* do local part */ 1375 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1376 /* send it on its way */ 1377 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1378 /* values actually were received in the Begin() but we need to call this nop */ 1379 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1386 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1387 { 1388 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1389 PetscErrorCode ierr; 1390 1391 PetscFunctionBegin; 1392 /* do nondiagonal part */ 1393 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1394 /* send it on its way */ 1395 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1396 /* do local part */ 1397 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1398 /* receive remote parts: note this assumes the values are not actually */ 1399 /* inserted in yy until the next line, which is true for my implementation*/ 1400 /* but is not perhaps always true. */ 1401 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 /* 1406 This only works correctly for square matrices where the subblock A->A is the 1407 diagonal block 1408 */ 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1411 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1418 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatScale_MPIBAIJ" 1424 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1425 { 1426 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1427 PetscErrorCode ierr; 1428 1429 PetscFunctionBegin; 1430 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1431 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1437 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1438 { 1439 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1440 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1441 PetscErrorCode ierr; 1442 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1443 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1444 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1445 1446 PetscFunctionBegin; 1447 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1448 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1449 mat->getrowactive = PETSC_TRUE; 1450 1451 if (!mat->rowvalues && (idx || v)) { 1452 /* 1453 allocate enough space to hold information from the longest row. 1454 */ 1455 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1456 PetscInt max = 1,mbs = mat->mbs,tmp; 1457 for (i=0; i<mbs; i++) { 1458 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1459 if (max < tmp) { max = tmp; } 1460 } 1461 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1462 } 1463 lrow = row - brstart; 1464 1465 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1466 if (!v) {pvA = 0; pvB = 0;} 1467 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1468 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1469 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1470 nztot = nzA + nzB; 1471 1472 cmap = mat->garray; 1473 if (v || idx) { 1474 if (nztot) { 1475 /* Sort by increasing column numbers, assuming A and B already sorted */ 1476 PetscInt imark = -1; 1477 if (v) { 1478 *v = v_p = mat->rowvalues; 1479 for (i=0; i<nzB; i++) { 1480 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1481 else break; 1482 } 1483 imark = i; 1484 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1485 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1486 } 1487 if (idx) { 1488 *idx = idx_p = mat->rowindices; 1489 if (imark > -1) { 1490 for (i=0; i<imark; i++) { 1491 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1492 } 1493 } else { 1494 for (i=0; i<nzB; i++) { 1495 if (cmap[cworkB[i]/bs] < cstart) 1496 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1497 else break; 1498 } 1499 imark = i; 1500 } 1501 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1502 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1503 } 1504 } else { 1505 if (idx) *idx = 0; 1506 if (v) *v = 0; 1507 } 1508 } 1509 *nz = nztot; 1510 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1517 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1518 { 1519 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1520 1521 PetscFunctionBegin; 1522 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1523 baij->getrowactive = PETSC_FALSE; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1529 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1530 { 1531 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1536 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1542 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1543 { 1544 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1545 Mat A = a->A,B = a->B; 1546 PetscErrorCode ierr; 1547 PetscReal isend[5],irecv[5]; 1548 1549 PetscFunctionBegin; 1550 info->block_size = (PetscReal)matin->rmap->bs; 1551 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1552 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1553 isend[3] = info->memory; isend[4] = info->mallocs; 1554 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1556 isend[3] += info->memory; isend[4] += info->mallocs; 1557 if (flag == MAT_LOCAL) { 1558 info->nz_used = isend[0]; 1559 info->nz_allocated = isend[1]; 1560 info->nz_unneeded = isend[2]; 1561 info->memory = isend[3]; 1562 info->mallocs = isend[4]; 1563 } else if (flag == MAT_GLOBAL_MAX) { 1564 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1565 info->nz_used = irecv[0]; 1566 info->nz_allocated = irecv[1]; 1567 info->nz_unneeded = irecv[2]; 1568 info->memory = irecv[3]; 1569 info->mallocs = irecv[4]; 1570 } else if (flag == MAT_GLOBAL_SUM) { 1571 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1572 info->nz_used = irecv[0]; 1573 info->nz_allocated = irecv[1]; 1574 info->nz_unneeded = irecv[2]; 1575 info->memory = irecv[3]; 1576 info->mallocs = irecv[4]; 1577 } else { 1578 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1579 } 1580 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1581 info->fill_ratio_needed = 0; 1582 info->factor_mallocs = 0; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1588 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1589 { 1590 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1591 PetscErrorCode ierr; 1592 1593 PetscFunctionBegin; 1594 switch (op) { 1595 case MAT_NEW_NONZERO_LOCATIONS: 1596 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1597 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1598 case MAT_KEEP_NONZERO_PATTERN: 1599 case MAT_NEW_NONZERO_LOCATION_ERR: 1600 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1601 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1602 break; 1603 case MAT_ROW_ORIENTED: 1604 a->roworiented = flg; 1605 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1606 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1607 break; 1608 case MAT_NEW_DIAGONALS: 1609 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1610 break; 1611 case MAT_IGNORE_OFF_PROC_ENTRIES: 1612 a->donotstash = flg; 1613 break; 1614 case MAT_USE_HASH_TABLE: 1615 a->ht_flag = flg; 1616 break; 1617 case MAT_SYMMETRIC: 1618 case MAT_STRUCTURALLY_SYMMETRIC: 1619 case MAT_HERMITIAN: 1620 case MAT_SYMMETRY_ETERNAL: 1621 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1622 break; 1623 default: 1624 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1625 } 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1631 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1632 { 1633 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1634 Mat_SeqBAIJ *Aloc; 1635 Mat B; 1636 PetscErrorCode ierr; 1637 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1638 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1639 MatScalar *a; 1640 1641 PetscFunctionBegin; 1642 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1643 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1644 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1645 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1646 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1647 /* Do not know preallocation information, but must set block size */ 1648 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1649 } else { 1650 B = *matout; 1651 } 1652 1653 /* copy over the A part */ 1654 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1655 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1656 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1657 1658 for (i=0; i<mbs; i++) { 1659 rvals[0] = bs*(baij->rstartbs + i); 1660 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1661 for (j=ai[i]; j<ai[i+1]; j++) { 1662 col = (baij->cstartbs+aj[j])*bs; 1663 for (k=0; k<bs; k++) { 1664 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1665 col++; a += bs; 1666 } 1667 } 1668 } 1669 /* copy over the B part */ 1670 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1671 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1672 for (i=0; i<mbs; i++) { 1673 rvals[0] = bs*(baij->rstartbs + i); 1674 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1675 for (j=ai[i]; j<ai[i+1]; j++) { 1676 col = baij->garray[aj[j]]*bs; 1677 for (k=0; k<bs; k++) { 1678 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1679 col++; a += bs; 1680 } 1681 } 1682 } 1683 ierr = PetscFree(rvals);CHKERRQ(ierr); 1684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 1687 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1688 *matout = B; 1689 } else { 1690 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1698 { 1699 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1700 Mat a = baij->A,b = baij->B; 1701 PetscErrorCode ierr; 1702 PetscInt s1,s2,s3; 1703 1704 PetscFunctionBegin; 1705 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1706 if (rr) { 1707 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1708 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1709 /* Overlap communication with computation. */ 1710 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 } 1712 if (ll) { 1713 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1714 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1715 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1716 } 1717 /* scale the diagonal block */ 1718 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1719 1720 if (rr) { 1721 /* Do a scatter end and then right scale the off-diagonal block */ 1722 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1724 } 1725 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1731 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1732 { 1733 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1734 PetscErrorCode ierr; 1735 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1736 PetscInt i,*owners = A->rmap->range; 1737 PetscInt *nprocs,j,idx,nsends,row; 1738 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1739 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1740 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1741 MPI_Comm comm = ((PetscObject)A)->comm; 1742 MPI_Request *send_waits,*recv_waits; 1743 MPI_Status recv_status,*send_status; 1744 const PetscScalar *xx; 1745 PetscScalar *bb; 1746 #if defined(PETSC_DEBUG) 1747 PetscBool found = PETSC_FALSE; 1748 #endif 1749 1750 PetscFunctionBegin; 1751 /* first count number of contributors to each processor */ 1752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1755 j = 0; 1756 for (i=0; i<N; i++) { 1757 if (lastidx > (idx = rows[i])) j = 0; 1758 lastidx = idx; 1759 for (; j<size; j++) { 1760 if (idx >= owners[j] && idx < owners[j+1]) { 1761 nprocs[2*j]++; 1762 nprocs[2*j+1] = 1; 1763 owner[i] = j; 1764 #if defined(PETSC_DEBUG) 1765 found = PETSC_TRUE; 1766 #endif 1767 break; 1768 } 1769 } 1770 #if defined(PETSC_DEBUG) 1771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1772 found = PETSC_FALSE; 1773 #endif 1774 } 1775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1776 1777 if (A->nooffproczerorows) { 1778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1779 nrecvs = nsends; 1780 nmax = N; 1781 } else { 1782 /* inform other processors of number of messages and max length*/ 1783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1784 } 1785 1786 /* post receives: */ 1787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1789 for (i=0; i<nrecvs; i++) { 1790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1791 } 1792 1793 /* do sends: 1794 1) starts[i] gives the starting index in svalues for stuff going to 1795 the ith processor 1796 */ 1797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1800 starts[0] = 0; 1801 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1802 for (i=0; i<N; i++) { 1803 svalues[starts[owner[i]]++] = rows[i]; 1804 } 1805 1806 starts[0] = 0; 1807 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1808 count = 0; 1809 for (i=0; i<size; i++) { 1810 if (nprocs[2*i+1]) { 1811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1812 } 1813 } 1814 ierr = PetscFree(starts);CHKERRQ(ierr); 1815 1816 base = owners[rank]; 1817 1818 /* wait on receives */ 1819 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1820 count = nrecvs; 1821 slen = 0; 1822 while (count) { 1823 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1824 /* unpack receives into our local space */ 1825 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1826 source[imdex] = recv_status.MPI_SOURCE; 1827 lens[imdex] = n; 1828 slen += n; 1829 count--; 1830 } 1831 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1832 1833 /* move the data into the send scatter */ 1834 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1835 count = 0; 1836 for (i=0; i<nrecvs; i++) { 1837 values = rvalues + i*nmax; 1838 for (j=0; j<lens[i]; j++) { 1839 lrows[count++] = values[j] - base; 1840 } 1841 } 1842 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1843 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1844 ierr = PetscFree(owner);CHKERRQ(ierr); 1845 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1846 1847 /* fix right hand side if needed */ 1848 if (x && b) { 1849 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1850 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1851 for (i=0; i<slen; i++) { 1852 bb[lrows[i]] = diag*xx[lrows[i]]; 1853 } 1854 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1855 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1856 } 1857 1858 /* actually zap the local rows */ 1859 /* 1860 Zero the required rows. If the "diagonal block" of the matrix 1861 is square and the user wishes to set the diagonal we use separate 1862 code so that MatSetValues() is not called for each diagonal allocating 1863 new memory, thus calling lots of mallocs and slowing things down. 1864 1865 */ 1866 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1867 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1868 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1869 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1870 } else if (diag != 0.0) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1872 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1873 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1874 for (i=0; i<slen; i++) { 1875 row = lrows[i] + rstart_bs; 1876 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1877 } 1878 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 } else { 1881 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1882 } 1883 1884 ierr = PetscFree(lrows);CHKERRQ(ierr); 1885 1886 /* wait on sends */ 1887 if (nsends) { 1888 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1889 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1890 ierr = PetscFree(send_status);CHKERRQ(ierr); 1891 } 1892 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1893 ierr = PetscFree(svalues);CHKERRQ(ierr); 1894 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1901 { 1902 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatEqual_MPIBAIJ" 1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1915 { 1916 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1917 Mat a,b,c,d; 1918 PetscBool flg; 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 a = matA->A; b = matA->B; 1923 c = matB->A; d = matB->B; 1924 1925 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1926 if (flg) { 1927 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1928 } 1929 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatCopy_MPIBAIJ" 1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1936 { 1937 PetscErrorCode ierr; 1938 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1939 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1940 1941 PetscFunctionBegin; 1942 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1943 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1944 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1945 } else { 1946 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1947 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1954 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1955 { 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1966 { 1967 PetscErrorCode ierr; 1968 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1969 PetscBLASInt bnz,one=1; 1970 Mat_SeqBAIJ *x,*y; 1971 1972 PetscFunctionBegin; 1973 if (str == SAME_NONZERO_PATTERN) { 1974 PetscScalar alpha = a; 1975 x = (Mat_SeqBAIJ *)xx->A->data; 1976 y = (Mat_SeqBAIJ *)yy->A->data; 1977 bnz = PetscBLASIntCast(x->nz); 1978 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1979 x = (Mat_SeqBAIJ *)xx->B->data; 1980 y = (Mat_SeqBAIJ *)yy->B->data; 1981 bnz = PetscBLASIntCast(x->nz); 1982 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1983 } else { 1984 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1991 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1992 { 1993 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1998 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2004 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2005 { 2006 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2011 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2017 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2018 { 2019 PetscErrorCode ierr; 2020 IS iscol_local; 2021 PetscInt csize; 2022 2023 PetscFunctionBegin; 2024 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2025 if (call == MAT_REUSE_MATRIX) { 2026 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2027 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2028 } else { 2029 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2030 } 2031 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2032 if (call == MAT_INITIAL_MATRIX) { 2033 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2034 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2041 /* 2042 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2043 in local and then by concatenating the local matrices the end result. 2044 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2045 */ 2046 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2047 { 2048 PetscErrorCode ierr; 2049 PetscMPIInt rank,size; 2050 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2051 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2052 Mat M,Mreuse; 2053 MatScalar *vwork,*aa; 2054 MPI_Comm comm = ((PetscObject)mat)->comm; 2055 IS isrow_new, iscol_new; 2056 PetscBool idflag,allrows, allcols; 2057 Mat_SeqBAIJ *aij; 2058 2059 2060 PetscFunctionBegin; 2061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2062 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2063 /* The compression and expansion should be avoided. Doesn't point 2064 out errors, might change the indices, hence buggey */ 2065 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2066 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2067 2068 /* Check for special case: each processor gets entire matrix columns */ 2069 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2070 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2071 if (idflag && ncol == mat->cmap->N) { 2072 allcols = PETSC_TRUE; 2073 } else { 2074 allcols = PETSC_FALSE; 2075 } 2076 2077 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2078 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2079 if (idflag && nrow == mat->rmap->N) { 2080 allrows = PETSC_TRUE; 2081 } else { 2082 allrows = PETSC_FALSE; 2083 } 2084 if (call == MAT_REUSE_MATRIX) { 2085 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2086 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2087 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2088 } else { 2089 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2090 } 2091 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2092 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2093 /* 2094 m - number of local rows 2095 n - number of columns (same on all processors) 2096 rstart - first row in new global matrix generated 2097 */ 2098 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2099 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2100 m = m/bs; 2101 n = n/bs; 2102 2103 if (call == MAT_INITIAL_MATRIX) { 2104 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2105 ii = aij->i; 2106 jj = aij->j; 2107 2108 /* 2109 Determine the number of non-zeros in the diagonal and off-diagonal 2110 portions of the matrix in order to do correct preallocation 2111 */ 2112 2113 /* first get start and end of "diagonal" columns */ 2114 if (csize == PETSC_DECIDE) { 2115 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2116 if (mglobal == n*bs) { /* square matrix */ 2117 nlocal = m; 2118 } else { 2119 nlocal = n/size + ((n % size) > rank); 2120 } 2121 } else { 2122 nlocal = csize/bs; 2123 } 2124 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2125 rstart = rend - nlocal; 2126 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2130 olens = dlens + m; 2131 for (i=0; i<m; i++) { 2132 jend = ii[i+1] - ii[i]; 2133 olen = 0; 2134 dlen = 0; 2135 for (j=0; j<jend; j++) { 2136 if (*jj < rstart || *jj >= rend) olen++; 2137 else dlen++; 2138 jj++; 2139 } 2140 olens[i] = olen; 2141 dlens[i] = dlen; 2142 } 2143 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2144 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2145 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2146 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2147 ierr = PetscFree(dlens);CHKERRQ(ierr); 2148 } else { 2149 PetscInt ml,nl; 2150 2151 M = *newmat; 2152 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2153 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2154 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2155 /* 2156 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2157 rather than the slower MatSetValues(). 2158 */ 2159 M->was_assembled = PETSC_TRUE; 2160 M->assembled = PETSC_FALSE; 2161 } 2162 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2163 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2164 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 aa = aij->a; 2168 for (i=0; i<m; i++) { 2169 row = rstart/bs + i; 2170 nz = ii[i+1] - ii[i]; 2171 cwork = jj; jj += nz; 2172 vwork = aa; aa += nz*bs*bs; 2173 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 2176 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 *newmat = M; 2179 2180 /* save submatrix used in processor for next request */ 2181 if (call == MAT_INITIAL_MATRIX) { 2182 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2183 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2184 } 2185 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatPermute_MPIBAIJ" 2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2192 { 2193 MPI_Comm comm,pcomm; 2194 PetscInt first,rlocal_size,clocal_size,nrows; 2195 const PetscInt *rows; 2196 PetscMPIInt size; 2197 IS crowp,growp,irowp,lrowp,lcolp; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2202 /* make a collective version of 'rowp' */ 2203 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2204 if (pcomm==comm) { 2205 crowp = rowp; 2206 } else { 2207 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2208 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2209 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2211 } 2212 /* collect the global row permutation and invert it */ 2213 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2214 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2215 if (pcomm!=comm) { 2216 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2217 } 2218 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2219 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2220 /* get the local target indices */ 2221 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2223 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2224 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2225 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2226 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2227 /* the column permutation is so much easier; 2228 make a local version of 'colp' and invert it */ 2229 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2230 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2231 if (size==1) { 2232 lcolp = colp; 2233 } else { 2234 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2235 } 2236 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2237 /* now we just get the submatrix */ 2238 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2239 if (size>1) { 2240 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2241 } 2242 /* clean up */ 2243 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2249 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2250 { 2251 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2252 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2253 2254 PetscFunctionBegin; 2255 if (nghosts) { *nghosts = B->nbs;} 2256 if (ghosts) {*ghosts = baij->garray;} 2257 PetscFunctionReturn(0); 2258 } 2259 2260 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2264 /* 2265 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2266 */ 2267 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2268 { 2269 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2270 PetscErrorCode ierr; 2271 PetscMPIInt size,*ncolsonproc,*disp,nn; 2272 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2273 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2274 PetscInt nis = iscoloring->n,nctot,*cols; 2275 PetscInt *rowhit,M,cstart,cend,colb; 2276 PetscInt *columnsforrow,l; 2277 IS *isa; 2278 PetscBool done,flg; 2279 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2280 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2281 2282 PetscFunctionBegin; 2283 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2284 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2285 2286 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2287 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2288 M = mat->rmap->n/bs; 2289 cstart = mat->cmap->rstart/bs; 2290 cend = mat->cmap->rend/bs; 2291 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2292 c->N = mat->cmap->N/bs; 2293 c->m = mat->rmap->n/bs; 2294 c->rstart = mat->rmap->rstart/bs; 2295 2296 c->ncolors = nis; 2297 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2298 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2299 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2300 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2301 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2302 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2303 2304 /* Allow access to data structures of local part of matrix */ 2305 if (!baij->colmap) { 2306 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2307 } 2308 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2309 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2310 2311 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2312 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2313 2314 for (i=0; i<nis; i++) { 2315 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2316 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2317 c->ncolumns[i] = n; 2318 if (n) { 2319 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2320 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2321 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2322 } else { 2323 c->columns[i] = 0; 2324 } 2325 2326 if (ctype == IS_COLORING_GLOBAL) { 2327 /* Determine the total (parallel) number of columns of this color */ 2328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2329 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2330 2331 nn = PetscMPIIntCast(n); 2332 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2333 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2334 if (!nctot) { 2335 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2336 } 2337 2338 disp[0] = 0; 2339 for (j=1; j<size; j++) { 2340 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2341 } 2342 2343 /* Get complete list of columns for color on each processor */ 2344 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2345 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2346 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2347 } else if (ctype == IS_COLORING_GHOSTED) { 2348 /* Determine local number of columns of this color on this process, including ghost points */ 2349 nctot = n; 2350 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2351 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2352 } else { 2353 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2354 } 2355 2356 /* 2357 Mark all rows affect by these columns 2358 */ 2359 /* Temporary option to allow for debugging/testing */ 2360 flg = PETSC_FALSE; 2361 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2362 if (!flg) {/*-----------------------------------------------------------------------------*/ 2363 /* crude, fast version */ 2364 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2365 /* loop over columns*/ 2366 for (j=0; j<nctot; j++) { 2367 if (ctype == IS_COLORING_GHOSTED) { 2368 col = ltog[cols[j]]; 2369 } else { 2370 col = cols[j]; 2371 } 2372 if (col >= cstart && col < cend) { 2373 /* column is in diagonal block of matrix */ 2374 rows = A_cj + A_ci[col-cstart]; 2375 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2376 } else { 2377 #if defined (PETSC_USE_CTABLE) 2378 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2379 colb --; 2380 #else 2381 colb = baij->colmap[col] - 1; 2382 #endif 2383 if (colb == -1) { 2384 m = 0; 2385 } else { 2386 colb = colb/bs; 2387 rows = B_cj + B_ci[colb]; 2388 m = B_ci[colb+1] - B_ci[colb]; 2389 } 2390 } 2391 /* loop over columns marking them in rowhit */ 2392 for (k=0; k<m; k++) { 2393 rowhit[*rows++] = col + 1; 2394 } 2395 } 2396 2397 /* count the number of hits */ 2398 nrows = 0; 2399 for (j=0; j<M; j++) { 2400 if (rowhit[j]) nrows++; 2401 } 2402 c->nrows[i] = nrows; 2403 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2404 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2405 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2406 nrows = 0; 2407 for (j=0; j<M; j++) { 2408 if (rowhit[j]) { 2409 c->rows[i][nrows] = j; 2410 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2411 nrows++; 2412 } 2413 } 2414 } else {/*-------------------------------------------------------------------------------*/ 2415 /* slow version, using rowhit as a linked list */ 2416 PetscInt currentcol,fm,mfm; 2417 rowhit[M] = M; 2418 nrows = 0; 2419 /* loop over columns*/ 2420 for (j=0; j<nctot; j++) { 2421 if (ctype == IS_COLORING_GHOSTED) { 2422 col = ltog[cols[j]]; 2423 } else { 2424 col = cols[j]; 2425 } 2426 if (col >= cstart && col < cend) { 2427 /* column is in diagonal block of matrix */ 2428 rows = A_cj + A_ci[col-cstart]; 2429 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2430 } else { 2431 #if defined (PETSC_USE_CTABLE) 2432 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2433 colb --; 2434 #else 2435 colb = baij->colmap[col] - 1; 2436 #endif 2437 if (colb == -1) { 2438 m = 0; 2439 } else { 2440 colb = colb/bs; 2441 rows = B_cj + B_ci[colb]; 2442 m = B_ci[colb+1] - B_ci[colb]; 2443 } 2444 } 2445 2446 /* loop over columns marking them in rowhit */ 2447 fm = M; /* fm points to first entry in linked list */ 2448 for (k=0; k<m; k++) { 2449 currentcol = *rows++; 2450 /* is it already in the list? */ 2451 do { 2452 mfm = fm; 2453 fm = rowhit[fm]; 2454 } while (fm < currentcol); 2455 /* not in list so add it */ 2456 if (fm != currentcol) { 2457 nrows++; 2458 columnsforrow[currentcol] = col; 2459 /* next three lines insert new entry into linked list */ 2460 rowhit[mfm] = currentcol; 2461 rowhit[currentcol] = fm; 2462 fm = currentcol; 2463 /* fm points to present position in list since we know the columns are sorted */ 2464 } else { 2465 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2466 } 2467 } 2468 } 2469 c->nrows[i] = nrows; 2470 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2471 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2472 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2473 /* now store the linked list of rows into c->rows[i] */ 2474 nrows = 0; 2475 fm = rowhit[M]; 2476 do { 2477 c->rows[i][nrows] = fm; 2478 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2479 fm = rowhit[fm]; 2480 } while (fm < M); 2481 } /* ---------------------------------------------------------------------------------------*/ 2482 ierr = PetscFree(cols);CHKERRQ(ierr); 2483 } 2484 2485 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2486 /* 2487 vscale will contain the "diagonal" on processor scalings followed by the off processor 2488 */ 2489 if (ctype == IS_COLORING_GLOBAL) { 2490 PetscInt *garray; 2491 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2492 for (i=0; i<baij->B->cmap->n/bs; i++) { 2493 for (j=0; j<bs; j++) { 2494 garray[i*bs+j] = bs*baij->garray[i]+j; 2495 } 2496 } 2497 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2498 ierr = PetscFree(garray);CHKERRQ(ierr); 2499 CHKMEMQ; 2500 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2501 for (k=0; k<c->ncolors; k++) { 2502 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2503 for (l=0; l<c->nrows[k]; l++) { 2504 col = c->columnsforrow[k][l]; 2505 if (col >= cstart && col < cend) { 2506 /* column is in diagonal block of matrix */ 2507 colb = col - cstart; 2508 } else { 2509 /* column is in "off-processor" part */ 2510 #if defined (PETSC_USE_CTABLE) 2511 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2512 colb --; 2513 #else 2514 colb = baij->colmap[col] - 1; 2515 #endif 2516 colb = colb/bs; 2517 colb += cend - cstart; 2518 } 2519 c->vscaleforrow[k][l] = colb; 2520 } 2521 } 2522 } else if (ctype == IS_COLORING_GHOSTED) { 2523 /* Get gtol mapping */ 2524 PetscInt N = mat->cmap->N, *gtol; 2525 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2526 for (i=0; i<N; i++) gtol[i] = -1; 2527 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2528 2529 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2530 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2531 for (k=0; k<c->ncolors; k++) { 2532 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2533 for (l=0; l<c->nrows[k]; l++) { 2534 col = c->columnsforrow[k][l]; /* global column index */ 2535 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2536 } 2537 } 2538 ierr = PetscFree(gtol);CHKERRQ(ierr); 2539 } 2540 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2541 2542 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2543 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2544 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2545 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2546 CHKMEMQ; 2547 PetscFunctionReturn(0); 2548 } 2549 2550 #undef __FUNCT__ 2551 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2552 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2553 { 2554 Mat B; 2555 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2556 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2557 Mat_SeqAIJ *b; 2558 PetscErrorCode ierr; 2559 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2560 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2561 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2562 2563 PetscFunctionBegin; 2564 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2565 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2566 2567 /* ---------------------------------------------------------------- 2568 Tell every processor the number of nonzeros per row 2569 */ 2570 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2571 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2572 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2573 } 2574 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2575 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2576 displs = recvcounts + size; 2577 for (i=0; i<size; i++) { 2578 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2579 displs[i] = A->rmap->range[i]/bs; 2580 } 2581 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2582 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2583 #else 2584 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2585 #endif 2586 /* --------------------------------------------------------------- 2587 Create the sequential matrix of the same type as the local block diagonal 2588 */ 2589 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2590 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2591 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2592 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2593 b = (Mat_SeqAIJ *)B->data; 2594 2595 /*-------------------------------------------------------------------- 2596 Copy my part of matrix column indices over 2597 */ 2598 sendcount = ad->nz + bd->nz; 2599 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2600 a_jsendbuf = ad->j; 2601 b_jsendbuf = bd->j; 2602 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2603 cnt = 0; 2604 for (i=0; i<n; i++) { 2605 2606 /* put in lower diagonal portion */ 2607 m = bd->i[i+1] - bd->i[i]; 2608 while (m > 0) { 2609 /* is it above diagonal (in bd (compressed) numbering) */ 2610 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2611 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2612 m--; 2613 } 2614 2615 /* put in diagonal portion */ 2616 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2617 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2618 } 2619 2620 /* put in upper diagonal portion */ 2621 while (m-- > 0) { 2622 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2623 } 2624 } 2625 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2626 2627 /*-------------------------------------------------------------------- 2628 Gather all column indices to all processors 2629 */ 2630 for (i=0; i<size; i++) { 2631 recvcounts[i] = 0; 2632 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2633 recvcounts[i] += lens[j]; 2634 } 2635 } 2636 displs[0] = 0; 2637 for (i=1; i<size; i++) { 2638 displs[i] = displs[i-1] + recvcounts[i-1]; 2639 } 2640 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2641 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2642 #else 2643 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2644 #endif 2645 /*-------------------------------------------------------------------- 2646 Assemble the matrix into useable form (note numerical values not yet set) 2647 */ 2648 /* set the b->ilen (length of each row) values */ 2649 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2650 /* set the b->i indices */ 2651 b->i[0] = 0; 2652 for (i=1; i<=A->rmap->N/bs; i++) { 2653 b->i[i] = b->i[i-1] + lens[i-1]; 2654 } 2655 ierr = PetscFree(lens);CHKERRQ(ierr); 2656 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2657 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2659 2660 if (A->symmetric) { 2661 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2662 } else if (A->hermitian) { 2663 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2664 } else if (A->structurally_symmetric) { 2665 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2666 } 2667 *newmat = B; 2668 PetscFunctionReturn(0); 2669 } 2670 2671 #undef __FUNCT__ 2672 #define __FUNCT__ "MatSOR_MPIBAIJ" 2673 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2674 { 2675 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2676 PetscErrorCode ierr; 2677 Vec bb1 = 0; 2678 2679 PetscFunctionBegin; 2680 if (flag == SOR_APPLY_UPPER) { 2681 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2682 PetscFunctionReturn(0); 2683 } 2684 2685 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2686 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2687 } 2688 2689 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2690 if (flag & SOR_ZERO_INITIAL_GUESS) { 2691 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2692 its--; 2693 } 2694 2695 while (its--) { 2696 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2697 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2698 2699 /* update rhs: bb1 = bb - B*x */ 2700 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2701 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2702 2703 /* local sweep */ 2704 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2705 } 2706 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2707 if (flag & SOR_ZERO_INITIAL_GUESS) { 2708 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2709 its--; 2710 } 2711 while (its--) { 2712 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2713 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2714 2715 /* update rhs: bb1 = bb - B*x */ 2716 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2717 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2718 2719 /* local sweep */ 2720 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2721 } 2722 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2723 if (flag & SOR_ZERO_INITIAL_GUESS) { 2724 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2725 its--; 2726 } 2727 while (its--) { 2728 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2729 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2730 2731 /* update rhs: bb1 = bb - B*x */ 2732 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2733 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2734 2735 /* local sweep */ 2736 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2737 } 2738 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2739 2740 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2745 2746 #undef __FUNCT__ 2747 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2748 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2749 { 2750 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 2759 /* -------------------------------------------------------------------*/ 2760 static struct _MatOps MatOps_Values = { 2761 MatSetValues_MPIBAIJ, 2762 MatGetRow_MPIBAIJ, 2763 MatRestoreRow_MPIBAIJ, 2764 MatMult_MPIBAIJ, 2765 /* 4*/ MatMultAdd_MPIBAIJ, 2766 MatMultTranspose_MPIBAIJ, 2767 MatMultTransposeAdd_MPIBAIJ, 2768 0, 2769 0, 2770 0, 2771 /*10*/ 0, 2772 0, 2773 0, 2774 MatSOR_MPIBAIJ, 2775 MatTranspose_MPIBAIJ, 2776 /*15*/ MatGetInfo_MPIBAIJ, 2777 MatEqual_MPIBAIJ, 2778 MatGetDiagonal_MPIBAIJ, 2779 MatDiagonalScale_MPIBAIJ, 2780 MatNorm_MPIBAIJ, 2781 /*20*/ MatAssemblyBegin_MPIBAIJ, 2782 MatAssemblyEnd_MPIBAIJ, 2783 MatSetOption_MPIBAIJ, 2784 MatZeroEntries_MPIBAIJ, 2785 /*24*/ MatZeroRows_MPIBAIJ, 2786 0, 2787 0, 2788 0, 2789 0, 2790 /*29*/ MatSetUp_MPIBAIJ, 2791 0, 2792 0, 2793 0, 2794 0, 2795 /*34*/ MatDuplicate_MPIBAIJ, 2796 0, 2797 0, 2798 0, 2799 0, 2800 /*39*/ MatAXPY_MPIBAIJ, 2801 MatGetSubMatrices_MPIBAIJ, 2802 MatIncreaseOverlap_MPIBAIJ, 2803 MatGetValues_MPIBAIJ, 2804 MatCopy_MPIBAIJ, 2805 /*44*/ 0, 2806 MatScale_MPIBAIJ, 2807 0, 2808 0, 2809 0, 2810 /*49*/ 0, 2811 0, 2812 0, 2813 0, 2814 0, 2815 /*54*/ MatFDColoringCreate_MPIBAIJ, 2816 0, 2817 MatSetUnfactored_MPIBAIJ, 2818 MatPermute_MPIBAIJ, 2819 MatSetValuesBlocked_MPIBAIJ, 2820 /*59*/ MatGetSubMatrix_MPIBAIJ, 2821 MatDestroy_MPIBAIJ, 2822 MatView_MPIBAIJ, 2823 0, 2824 0, 2825 /*64*/ 0, 2826 0, 2827 0, 2828 0, 2829 0, 2830 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2831 0, 2832 0, 2833 0, 2834 0, 2835 /*74*/ 0, 2836 MatFDColoringApply_BAIJ, 2837 0, 2838 0, 2839 0, 2840 /*79*/ 0, 2841 0, 2842 0, 2843 0, 2844 MatLoad_MPIBAIJ, 2845 /*84*/ 0, 2846 0, 2847 0, 2848 0, 2849 0, 2850 /*89*/ 0, 2851 0, 2852 0, 2853 0, 2854 0, 2855 /*94*/ 0, 2856 0, 2857 0, 2858 0, 2859 0, 2860 /*99*/ 0, 2861 0, 2862 0, 2863 0, 2864 0, 2865 /*104*/0, 2866 MatRealPart_MPIBAIJ, 2867 MatImaginaryPart_MPIBAIJ, 2868 0, 2869 0, 2870 /*109*/0, 2871 0, 2872 0, 2873 0, 2874 0, 2875 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2876 0, 2877 MatGetGhosts_MPIBAIJ, 2878 0, 2879 0, 2880 /*119*/0, 2881 0, 2882 0, 2883 0, 2884 0, 2885 /*124*/0, 2886 0, 2887 MatInvertBlockDiagonal_MPIBAIJ 2888 }; 2889 2890 EXTERN_C_BEGIN 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2893 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2894 { 2895 PetscFunctionBegin; 2896 *a = ((Mat_MPIBAIJ *)A->data)->A; 2897 PetscFunctionReturn(0); 2898 } 2899 EXTERN_C_END 2900 2901 EXTERN_C_BEGIN 2902 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2903 EXTERN_C_END 2904 2905 EXTERN_C_BEGIN 2906 #undef __FUNCT__ 2907 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2908 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2909 { 2910 PetscInt m,rstart,cstart,cend; 2911 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2912 const PetscInt *JJ=0; 2913 PetscScalar *values=0; 2914 PetscErrorCode ierr; 2915 2916 PetscFunctionBegin; 2917 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2918 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2919 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2920 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2921 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2922 m = B->rmap->n/bs; 2923 rstart = B->rmap->rstart/bs; 2924 cstart = B->cmap->rstart/bs; 2925 cend = B->cmap->rend/bs; 2926 2927 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2928 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2929 for (i=0; i<m; i++) { 2930 nz = ii[i+1] - ii[i]; 2931 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2932 nz_max = PetscMax(nz_max,nz); 2933 JJ = jj + ii[i]; 2934 for (j=0; j<nz; j++) { 2935 if (*JJ >= cstart) break; 2936 JJ++; 2937 } 2938 d = 0; 2939 for (; j<nz; j++) { 2940 if (*JJ++ >= cend) break; 2941 d++; 2942 } 2943 d_nnz[i] = d; 2944 o_nnz[i] = nz - d; 2945 } 2946 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2947 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2948 2949 values = (PetscScalar*)V; 2950 if (!values) { 2951 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2952 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2953 } 2954 for (i=0; i<m; i++) { 2955 PetscInt row = i + rstart; 2956 PetscInt ncols = ii[i+1] - ii[i]; 2957 const PetscInt *icols = jj + ii[i]; 2958 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2959 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2960 } 2961 2962 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2965 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2966 PetscFunctionReturn(0); 2967 } 2968 EXTERN_C_END 2969 2970 #undef __FUNCT__ 2971 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2972 /*@C 2973 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2974 (the default parallel PETSc format). 2975 2976 Collective on MPI_Comm 2977 2978 Input Parameters: 2979 + A - the matrix 2980 . bs - the block size 2981 . i - the indices into j for the start of each local row (starts with zero) 2982 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2983 - v - optional values in the matrix 2984 2985 Level: developer 2986 2987 .keywords: matrix, aij, compressed row, sparse, parallel 2988 2989 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2990 @*/ 2991 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2992 { 2993 PetscErrorCode ierr; 2994 2995 PetscFunctionBegin; 2996 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2997 PetscValidType(B,1); 2998 PetscValidLogicalCollectiveInt(B,bs,2); 2999 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3000 PetscFunctionReturn(0); 3001 } 3002 3003 EXTERN_C_BEGIN 3004 #undef __FUNCT__ 3005 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3006 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3007 { 3008 Mat_MPIBAIJ *b; 3009 PetscErrorCode ierr; 3010 PetscInt i; 3011 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3012 3013 PetscFunctionBegin; 3014 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3015 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3016 3017 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3018 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3019 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3020 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3021 3022 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3023 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3024 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3025 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3026 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3027 3028 if (d_nnz) { 3029 for (i=0; i<B->rmap->n/bs; i++) { 3030 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3031 } 3032 } 3033 if (o_nnz) { 3034 for (i=0; i<B->rmap->n/bs; i++) { 3035 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3036 } 3037 } 3038 3039 b = (Mat_MPIBAIJ*)B->data; 3040 b->bs2 = bs*bs; 3041 b->mbs = B->rmap->n/bs; 3042 b->nbs = B->cmap->n/bs; 3043 b->Mbs = B->rmap->N/bs; 3044 b->Nbs = B->cmap->N/bs; 3045 3046 for (i=0; i<=b->size; i++) { 3047 b->rangebs[i] = B->rmap->range[i]/bs; 3048 } 3049 b->rstartbs = B->rmap->rstart/bs; 3050 b->rendbs = B->rmap->rend/bs; 3051 b->cstartbs = B->cmap->rstart/bs; 3052 b->cendbs = B->cmap->rend/bs; 3053 3054 if (!B->preallocated) { 3055 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3056 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3057 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3058 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3059 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3060 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3061 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3062 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3063 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3064 } 3065 3066 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3067 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3068 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3069 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3070 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3071 B->preallocated = PETSC_TRUE; 3072 PetscFunctionReturn(0); 3073 } 3074 EXTERN_C_END 3075 3076 EXTERN_C_BEGIN 3077 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3078 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3079 EXTERN_C_END 3080 3081 3082 EXTERN_C_BEGIN 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3085 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3086 { 3087 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3088 PetscErrorCode ierr; 3089 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3090 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3091 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3092 3093 PetscFunctionBegin; 3094 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3095 ii[0] = 0; 3096 CHKMEMQ; 3097 for (i=0; i<M; i++) { 3098 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3099 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3100 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3101 /* remove one from count of matrix has diagonal */ 3102 for (j=id[i]; j<id[i+1]; j++) { 3103 if (jd[j] == i) {ii[i+1]--;break;} 3104 } 3105 CHKMEMQ; 3106 } 3107 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3108 cnt = 0; 3109 for (i=0; i<M; i++) { 3110 for (j=io[i]; j<io[i+1]; j++) { 3111 if (garray[jo[j]] > rstart) break; 3112 jj[cnt++] = garray[jo[j]]; 3113 CHKMEMQ; 3114 } 3115 for (k=id[i]; k<id[i+1]; k++) { 3116 if (jd[k] != i) { 3117 jj[cnt++] = rstart + jd[k]; 3118 CHKMEMQ; 3119 } 3120 } 3121 for (;j<io[i+1]; j++) { 3122 jj[cnt++] = garray[jo[j]]; 3123 CHKMEMQ; 3124 } 3125 } 3126 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3127 PetscFunctionReturn(0); 3128 } 3129 EXTERN_C_END 3130 3131 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3132 EXTERN_C_BEGIN 3133 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3134 EXTERN_C_END 3135 3136 EXTERN_C_BEGIN 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3139 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3140 { 3141 PetscErrorCode ierr; 3142 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3143 Mat B; 3144 Mat_MPIAIJ *b; 3145 3146 PetscFunctionBegin; 3147 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3148 3149 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3150 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3151 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3152 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3153 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3154 b = (Mat_MPIAIJ*) B->data; 3155 3156 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3157 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3158 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3159 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3160 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3161 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 if (reuse == MAT_REUSE_MATRIX) { 3166 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3167 } else { 3168 *newmat = B; 3169 } 3170 PetscFunctionReturn(0); 3171 } 3172 EXTERN_C_END 3173 3174 EXTERN_C_BEGIN 3175 #if defined(PETSC_HAVE_MUMPS) 3176 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3177 #endif 3178 EXTERN_C_END 3179 3180 /*MC 3181 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3182 3183 Options Database Keys: 3184 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3185 . -mat_block_size <bs> - set the blocksize used to store the matrix 3186 - -mat_use_hash_table <fact> 3187 3188 Level: beginner 3189 3190 .seealso: MatCreateMPIBAIJ 3191 M*/ 3192 3193 EXTERN_C_BEGIN 3194 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3195 EXTERN_C_END 3196 3197 EXTERN_C_BEGIN 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatCreate_MPIBAIJ" 3200 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3201 { 3202 Mat_MPIBAIJ *b; 3203 PetscErrorCode ierr; 3204 PetscBool flg; 3205 3206 PetscFunctionBegin; 3207 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3208 B->data = (void*)b; 3209 3210 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3211 B->assembled = PETSC_FALSE; 3212 3213 B->insertmode = NOT_SET_VALUES; 3214 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3215 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3216 3217 /* build local table of row and column ownerships */ 3218 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3219 3220 /* build cache for off array entries formed */ 3221 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3222 b->donotstash = PETSC_FALSE; 3223 b->colmap = PETSC_NULL; 3224 b->garray = PETSC_NULL; 3225 b->roworiented = PETSC_TRUE; 3226 3227 /* stuff used in block assembly */ 3228 b->barray = 0; 3229 3230 /* stuff used for matrix vector multiply */ 3231 b->lvec = 0; 3232 b->Mvctx = 0; 3233 3234 /* stuff for MatGetRow() */ 3235 b->rowindices = 0; 3236 b->rowvalues = 0; 3237 b->getrowactive = PETSC_FALSE; 3238 3239 /* hash table stuff */ 3240 b->ht = 0; 3241 b->hd = 0; 3242 b->ht_size = 0; 3243 b->ht_flag = PETSC_FALSE; 3244 b->ht_fact = 0; 3245 b->ht_total_ct = 0; 3246 b->ht_insert_ct = 0; 3247 3248 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3249 b->ijonly = PETSC_FALSE; 3250 3251 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3252 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3253 if (flg) { 3254 PetscReal fact = 1.39; 3255 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3256 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3257 if (fact <= 1.0) fact = 1.39; 3258 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3259 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3260 } 3261 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3262 3263 #if defined(PETSC_HAVE_MUMPS) 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3265 #endif 3266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3267 "MatConvert_MPIBAIJ_MPIAdj", 3268 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3270 "MatConvert_MPIBAIJ_MPIAIJ", 3271 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3273 "MatConvert_MPIBAIJ_MPISBAIJ", 3274 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3276 "MatStoreValues_MPIBAIJ", 3277 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3278 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3279 "MatRetrieveValues_MPIBAIJ", 3280 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3281 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3282 "MatGetDiagonalBlock_MPIBAIJ", 3283 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3284 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3285 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3286 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3287 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3288 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3289 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3291 "MatDiagonalScaleLocal_MPIBAIJ", 3292 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3293 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3294 "MatSetHashTableFactor_MPIBAIJ", 3295 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3296 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3297 "MatConvert_MPIBAIJ_MPIBSTRM", 3298 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3299 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3300 PetscFunctionReturn(0); 3301 } 3302 EXTERN_C_END 3303 3304 /*MC 3305 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3306 3307 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3308 and MATMPIBAIJ otherwise. 3309 3310 Options Database Keys: 3311 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3312 3313 Level: beginner 3314 3315 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3316 M*/ 3317 3318 #undef __FUNCT__ 3319 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3320 /*@C 3321 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3322 (block compressed row). For good matrix assembly performance 3323 the user should preallocate the matrix storage by setting the parameters 3324 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3325 performance can be increased by more than a factor of 50. 3326 3327 Collective on Mat 3328 3329 Input Parameters: 3330 + A - the matrix 3331 . bs - size of blockk 3332 . d_nz - number of block nonzeros per block row in diagonal portion of local 3333 submatrix (same for all local rows) 3334 . d_nnz - array containing the number of block nonzeros in the various block rows 3335 of the in diagonal portion of the local (possibly different for each block 3336 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3337 set it even if it is zero. 3338 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3339 submatrix (same for all local rows). 3340 - o_nnz - array containing the number of nonzeros in the various block rows of the 3341 off-diagonal portion of the local submatrix (possibly different for 3342 each block row) or PETSC_NULL. 3343 3344 If the *_nnz parameter is given then the *_nz parameter is ignored 3345 3346 Options Database Keys: 3347 + -mat_block_size - size of the blocks to use 3348 - -mat_use_hash_table <fact> 3349 3350 Notes: 3351 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3352 than it must be used on all processors that share the object for that argument. 3353 3354 Storage Information: 3355 For a square global matrix we define each processor's diagonal portion 3356 to be its local rows and the corresponding columns (a square submatrix); 3357 each processor's off-diagonal portion encompasses the remainder of the 3358 local matrix (a rectangular submatrix). 3359 3360 The user can specify preallocated storage for the diagonal part of 3361 the local submatrix with either d_nz or d_nnz (not both). Set 3362 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3363 memory allocation. Likewise, specify preallocated storage for the 3364 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3365 3366 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3367 the figure below we depict these three local rows and all columns (0-11). 3368 3369 .vb 3370 0 1 2 3 4 5 6 7 8 9 10 11 3371 ------------------- 3372 row 3 | o o o d d d o o o o o o 3373 row 4 | o o o d d d o o o o o o 3374 row 5 | o o o d d d o o o o o o 3375 ------------------- 3376 .ve 3377 3378 Thus, any entries in the d locations are stored in the d (diagonal) 3379 submatrix, and any entries in the o locations are stored in the 3380 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3381 stored simply in the MATSEQBAIJ format for compressed row storage. 3382 3383 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3384 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3385 In general, for PDE problems in which most nonzeros are near the diagonal, 3386 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3387 or you will get TERRIBLE performance; see the users' manual chapter on 3388 matrices. 3389 3390 You can call MatGetInfo() to get information on how effective the preallocation was; 3391 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3392 You can also run with the option -info and look for messages with the string 3393 malloc in them to see if additional memory allocation was needed. 3394 3395 Level: intermediate 3396 3397 .keywords: matrix, block, aij, compressed row, sparse, parallel 3398 3399 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3400 @*/ 3401 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3402 { 3403 PetscErrorCode ierr; 3404 3405 PetscFunctionBegin; 3406 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3407 PetscValidType(B,1); 3408 PetscValidLogicalCollectiveInt(B,bs,2); 3409 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3410 PetscFunctionReturn(0); 3411 } 3412 3413 #undef __FUNCT__ 3414 #define __FUNCT__ "MatCreateBAIJ" 3415 /*@C 3416 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3417 (block compressed row). For good matrix assembly performance 3418 the user should preallocate the matrix storage by setting the parameters 3419 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3420 performance can be increased by more than a factor of 50. 3421 3422 Collective on MPI_Comm 3423 3424 Input Parameters: 3425 + comm - MPI communicator 3426 . bs - size of blockk 3427 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3428 This value should be the same as the local size used in creating the 3429 y vector for the matrix-vector product y = Ax. 3430 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3431 This value should be the same as the local size used in creating the 3432 x vector for the matrix-vector product y = Ax. 3433 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3434 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3435 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3436 submatrix (same for all local rows) 3437 . d_nnz - array containing the number of nonzero blocks in the various block rows 3438 of the in diagonal portion of the local (possibly different for each block 3439 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3440 and set it even if it is zero. 3441 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3442 submatrix (same for all local rows). 3443 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3444 off-diagonal portion of the local submatrix (possibly different for 3445 each block row) or PETSC_NULL. 3446 3447 Output Parameter: 3448 . A - the matrix 3449 3450 Options Database Keys: 3451 + -mat_block_size - size of the blocks to use 3452 - -mat_use_hash_table <fact> 3453 3454 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3455 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3456 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3457 3458 Notes: 3459 If the *_nnz parameter is given then the *_nz parameter is ignored 3460 3461 A nonzero block is any block that as 1 or more nonzeros in it 3462 3463 The user MUST specify either the local or global matrix dimensions 3464 (possibly both). 3465 3466 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3467 than it must be used on all processors that share the object for that argument. 3468 3469 Storage Information: 3470 For a square global matrix we define each processor's diagonal portion 3471 to be its local rows and the corresponding columns (a square submatrix); 3472 each processor's off-diagonal portion encompasses the remainder of the 3473 local matrix (a rectangular submatrix). 3474 3475 The user can specify preallocated storage for the diagonal part of 3476 the local submatrix with either d_nz or d_nnz (not both). Set 3477 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3478 memory allocation. Likewise, specify preallocated storage for the 3479 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3480 3481 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3482 the figure below we depict these three local rows and all columns (0-11). 3483 3484 .vb 3485 0 1 2 3 4 5 6 7 8 9 10 11 3486 ------------------- 3487 row 3 | o o o d d d o o o o o o 3488 row 4 | o o o d d d o o o o o o 3489 row 5 | o o o d d d o o o o o o 3490 ------------------- 3491 .ve 3492 3493 Thus, any entries in the d locations are stored in the d (diagonal) 3494 submatrix, and any entries in the o locations are stored in the 3495 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3496 stored simply in the MATSEQBAIJ format for compressed row storage. 3497 3498 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3499 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3500 In general, for PDE problems in which most nonzeros are near the diagonal, 3501 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3502 or you will get TERRIBLE performance; see the users' manual chapter on 3503 matrices. 3504 3505 Level: intermediate 3506 3507 .keywords: matrix, block, aij, compressed row, sparse, parallel 3508 3509 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3510 @*/ 3511 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3512 { 3513 PetscErrorCode ierr; 3514 PetscMPIInt size; 3515 3516 PetscFunctionBegin; 3517 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3518 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3520 if (size > 1) { 3521 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3522 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3523 } else { 3524 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3525 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3526 } 3527 PetscFunctionReturn(0); 3528 } 3529 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3532 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3533 { 3534 Mat mat; 3535 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3536 PetscErrorCode ierr; 3537 PetscInt len=0; 3538 3539 PetscFunctionBegin; 3540 *newmat = 0; 3541 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3542 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3543 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3544 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3545 3546 mat->factortype = matin->factortype; 3547 mat->preallocated = PETSC_TRUE; 3548 mat->assembled = PETSC_TRUE; 3549 mat->insertmode = NOT_SET_VALUES; 3550 3551 a = (Mat_MPIBAIJ*)mat->data; 3552 mat->rmap->bs = matin->rmap->bs; 3553 a->bs2 = oldmat->bs2; 3554 a->mbs = oldmat->mbs; 3555 a->nbs = oldmat->nbs; 3556 a->Mbs = oldmat->Mbs; 3557 a->Nbs = oldmat->Nbs; 3558 3559 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3560 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3561 3562 a->size = oldmat->size; 3563 a->rank = oldmat->rank; 3564 a->donotstash = oldmat->donotstash; 3565 a->roworiented = oldmat->roworiented; 3566 a->rowindices = 0; 3567 a->rowvalues = 0; 3568 a->getrowactive = PETSC_FALSE; 3569 a->barray = 0; 3570 a->rstartbs = oldmat->rstartbs; 3571 a->rendbs = oldmat->rendbs; 3572 a->cstartbs = oldmat->cstartbs; 3573 a->cendbs = oldmat->cendbs; 3574 3575 /* hash table stuff */ 3576 a->ht = 0; 3577 a->hd = 0; 3578 a->ht_size = 0; 3579 a->ht_flag = oldmat->ht_flag; 3580 a->ht_fact = oldmat->ht_fact; 3581 a->ht_total_ct = 0; 3582 a->ht_insert_ct = 0; 3583 3584 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3585 if (oldmat->colmap) { 3586 #if defined (PETSC_USE_CTABLE) 3587 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3588 #else 3589 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3590 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3591 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3592 #endif 3593 } else a->colmap = 0; 3594 3595 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3596 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3597 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3598 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3599 } else a->garray = 0; 3600 3601 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3602 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3603 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3604 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3605 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3606 3607 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3608 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3609 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3610 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3611 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3612 *newmat = mat; 3613 3614 PetscFunctionReturn(0); 3615 } 3616 3617 #undef __FUNCT__ 3618 #define __FUNCT__ "MatLoad_MPIBAIJ" 3619 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3620 { 3621 PetscErrorCode ierr; 3622 int fd; 3623 PetscInt i,nz,j,rstart,rend; 3624 PetscScalar *vals,*buf; 3625 MPI_Comm comm = ((PetscObject)viewer)->comm; 3626 MPI_Status status; 3627 PetscMPIInt rank,size,maxnz; 3628 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3629 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3630 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3631 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3632 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3633 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3634 3635 PetscFunctionBegin; 3636 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3637 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3638 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3639 3640 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3641 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3642 if (!rank) { 3643 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3644 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3645 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3646 } 3647 3648 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3649 3650 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3651 M = header[1]; N = header[2]; 3652 3653 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3654 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3655 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3656 3657 /* If global sizes are set, check if they are consistent with that given in the file */ 3658 if (sizesset) { 3659 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3660 } 3661 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3662 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3663 3664 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3665 3666 /* 3667 This code adds extra rows to make sure the number of rows is 3668 divisible by the blocksize 3669 */ 3670 Mbs = M/bs; 3671 extra_rows = bs - M + bs*Mbs; 3672 if (extra_rows == bs) extra_rows = 0; 3673 else Mbs++; 3674 if (extra_rows && !rank) { 3675 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3676 } 3677 3678 /* determine ownership of all rows */ 3679 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3680 mbs = Mbs/size + ((Mbs % size) > rank); 3681 m = mbs*bs; 3682 } else { /* User set */ 3683 m = newmat->rmap->n; 3684 mbs = m/bs; 3685 } 3686 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3687 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3688 3689 /* process 0 needs enough room for process with most rows */ 3690 if (!rank) { 3691 mmax = rowners[1]; 3692 for (i=2; i<=size; i++) { 3693 mmax = PetscMax(mmax,rowners[i]); 3694 } 3695 mmax*=bs; 3696 } else mmax = m; 3697 3698 rowners[0] = 0; 3699 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3700 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3701 rstart = rowners[rank]; 3702 rend = rowners[rank+1]; 3703 3704 /* distribute row lengths to all processors */ 3705 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3706 if (!rank) { 3707 mend = m; 3708 if (size == 1) mend = mend - extra_rows; 3709 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3710 for (j=mend; j<m; j++) locrowlens[j] = 1; 3711 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3712 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3713 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3714 for (j=0; j<m; j++) { 3715 procsnz[0] += locrowlens[j]; 3716 } 3717 for (i=1; i<size; i++) { 3718 mend = browners[i+1] - browners[i]; 3719 if (i == size-1) mend = mend - extra_rows; 3720 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3721 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3722 /* calculate the number of nonzeros on each processor */ 3723 for (j=0; j<browners[i+1]-browners[i]; j++) { 3724 procsnz[i] += rowlengths[j]; 3725 } 3726 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3727 } 3728 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3729 } else { 3730 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3731 } 3732 3733 if (!rank) { 3734 /* determine max buffer needed and allocate it */ 3735 maxnz = procsnz[0]; 3736 for (i=1; i<size; i++) { 3737 maxnz = PetscMax(maxnz,procsnz[i]); 3738 } 3739 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3740 3741 /* read in my part of the matrix column indices */ 3742 nz = procsnz[0]; 3743 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3744 mycols = ibuf; 3745 if (size == 1) nz -= extra_rows; 3746 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3747 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3748 3749 /* read in every ones (except the last) and ship off */ 3750 for (i=1; i<size-1; i++) { 3751 nz = procsnz[i]; 3752 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3753 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3754 } 3755 /* read in the stuff for the last proc */ 3756 if (size != 1) { 3757 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3758 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3759 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3760 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3761 } 3762 ierr = PetscFree(cols);CHKERRQ(ierr); 3763 } else { 3764 /* determine buffer space needed for message */ 3765 nz = 0; 3766 for (i=0; i<m; i++) { 3767 nz += locrowlens[i]; 3768 } 3769 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3770 mycols = ibuf; 3771 /* receive message of column indices*/ 3772 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3773 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3774 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3775 } 3776 3777 /* loop over local rows, determining number of off diagonal entries */ 3778 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3779 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3780 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3781 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3782 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3783 rowcount = 0; nzcount = 0; 3784 for (i=0; i<mbs; i++) { 3785 dcount = 0; 3786 odcount = 0; 3787 for (j=0; j<bs; j++) { 3788 kmax = locrowlens[rowcount]; 3789 for (k=0; k<kmax; k++) { 3790 tmp = mycols[nzcount++]/bs; 3791 if (!mask[tmp]) { 3792 mask[tmp] = 1; 3793 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3794 else masked1[dcount++] = tmp; 3795 } 3796 } 3797 rowcount++; 3798 } 3799 3800 dlens[i] = dcount; 3801 odlens[i] = odcount; 3802 3803 /* zero out the mask elements we set */ 3804 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3805 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3806 } 3807 3808 3809 if (!sizesset) { 3810 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3811 } 3812 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3813 3814 if (!rank) { 3815 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3816 /* read in my part of the matrix numerical values */ 3817 nz = procsnz[0]; 3818 vals = buf; 3819 mycols = ibuf; 3820 if (size == 1) nz -= extra_rows; 3821 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3822 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3823 3824 /* insert into matrix */ 3825 jj = rstart*bs; 3826 for (i=0; i<m; i++) { 3827 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3828 mycols += locrowlens[i]; 3829 vals += locrowlens[i]; 3830 jj++; 3831 } 3832 /* read in other processors (except the last one) and ship out */ 3833 for (i=1; i<size-1; i++) { 3834 nz = procsnz[i]; 3835 vals = buf; 3836 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3837 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3838 } 3839 /* the last proc */ 3840 if (size != 1) { 3841 nz = procsnz[i] - extra_rows; 3842 vals = buf; 3843 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3844 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3845 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3846 } 3847 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3848 } else { 3849 /* receive numeric values */ 3850 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3851 3852 /* receive message of values*/ 3853 vals = buf; 3854 mycols = ibuf; 3855 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3856 3857 /* insert into matrix */ 3858 jj = rstart*bs; 3859 for (i=0; i<m; i++) { 3860 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3861 mycols += locrowlens[i]; 3862 vals += locrowlens[i]; 3863 jj++; 3864 } 3865 } 3866 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3867 ierr = PetscFree(buf);CHKERRQ(ierr); 3868 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3869 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3870 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3871 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3872 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 3875 PetscFunctionReturn(0); 3876 } 3877 3878 #undef __FUNCT__ 3879 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3880 /*@ 3881 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3882 3883 Input Parameters: 3884 . mat - the matrix 3885 . fact - factor 3886 3887 Not Collective, each process can use a different factor 3888 3889 Level: advanced 3890 3891 Notes: 3892 This can also be set by the command line option: -mat_use_hash_table <fact> 3893 3894 .keywords: matrix, hashtable, factor, HT 3895 3896 .seealso: MatSetOption() 3897 @*/ 3898 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3899 { 3900 PetscErrorCode ierr; 3901 3902 PetscFunctionBegin; 3903 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3904 PetscFunctionReturn(0); 3905 } 3906 3907 EXTERN_C_BEGIN 3908 #undef __FUNCT__ 3909 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3910 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3911 { 3912 Mat_MPIBAIJ *baij; 3913 3914 PetscFunctionBegin; 3915 baij = (Mat_MPIBAIJ*)mat->data; 3916 baij->ht_fact = fact; 3917 PetscFunctionReturn(0); 3918 } 3919 EXTERN_C_END 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3923 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3924 { 3925 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3926 PetscFunctionBegin; 3927 *Ad = a->A; 3928 *Ao = a->B; 3929 *colmap = a->garray; 3930 PetscFunctionReturn(0); 3931 } 3932 3933 /* 3934 Special version for direct calls from Fortran (to eliminate two function call overheads 3935 */ 3936 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3937 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3938 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3939 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3940 #endif 3941 3942 #undef __FUNCT__ 3943 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3944 /*@C 3945 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3946 3947 Collective on Mat 3948 3949 Input Parameters: 3950 + mat - the matrix 3951 . min - number of input rows 3952 . im - input rows 3953 . nin - number of input columns 3954 . in - input columns 3955 . v - numerical values input 3956 - addvin - INSERT_VALUES or ADD_VALUES 3957 3958 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3959 3960 Level: advanced 3961 3962 .seealso: MatSetValuesBlocked() 3963 @*/ 3964 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3965 { 3966 /* convert input arguments to C version */ 3967 Mat mat = *matin; 3968 PetscInt m = *min, n = *nin; 3969 InsertMode addv = *addvin; 3970 3971 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3972 const MatScalar *value; 3973 MatScalar *barray=baij->barray; 3974 PetscBool roworiented = baij->roworiented; 3975 PetscErrorCode ierr; 3976 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3977 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3978 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3979 3980 PetscFunctionBegin; 3981 /* tasks normally handled by MatSetValuesBlocked() */ 3982 if (mat->insertmode == NOT_SET_VALUES) { 3983 mat->insertmode = addv; 3984 } 3985 #if defined(PETSC_USE_DEBUG) 3986 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3987 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3988 #endif 3989 if (mat->assembled) { 3990 mat->was_assembled = PETSC_TRUE; 3991 mat->assembled = PETSC_FALSE; 3992 } 3993 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3994 3995 3996 if (!barray) { 3997 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3998 baij->barray = barray; 3999 } 4000 4001 if (roworiented) { 4002 stepval = (n-1)*bs; 4003 } else { 4004 stepval = (m-1)*bs; 4005 } 4006 for (i=0; i<m; i++) { 4007 if (im[i] < 0) continue; 4008 #if defined(PETSC_USE_DEBUG) 4009 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4010 #endif 4011 if (im[i] >= rstart && im[i] < rend) { 4012 row = im[i] - rstart; 4013 for (j=0; j<n; j++) { 4014 /* If NumCol = 1 then a copy is not required */ 4015 if ((roworiented) && (n == 1)) { 4016 barray = (MatScalar*)v + i*bs2; 4017 } else if ((!roworiented) && (m == 1)) { 4018 barray = (MatScalar*)v + j*bs2; 4019 } else { /* Here a copy is required */ 4020 if (roworiented) { 4021 value = v + i*(stepval+bs)*bs + j*bs; 4022 } else { 4023 value = v + j*(stepval+bs)*bs + i*bs; 4024 } 4025 for (ii=0; ii<bs; ii++,value+=stepval) { 4026 for (jj=0; jj<bs; jj++) { 4027 *barray++ = *value++; 4028 } 4029 } 4030 barray -=bs2; 4031 } 4032 4033 if (in[j] >= cstart && in[j] < cend) { 4034 col = in[j] - cstart; 4035 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4036 } 4037 else if (in[j] < 0) continue; 4038 #if defined(PETSC_USE_DEBUG) 4039 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4040 #endif 4041 else { 4042 if (mat->was_assembled) { 4043 if (!baij->colmap) { 4044 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4045 } 4046 4047 #if defined(PETSC_USE_DEBUG) 4048 #if defined (PETSC_USE_CTABLE) 4049 { PetscInt data; 4050 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4051 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4052 } 4053 #else 4054 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4055 #endif 4056 #endif 4057 #if defined (PETSC_USE_CTABLE) 4058 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4059 col = (col - 1)/bs; 4060 #else 4061 col = (baij->colmap[in[j]] - 1)/bs; 4062 #endif 4063 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4064 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4065 col = in[j]; 4066 } 4067 } 4068 else col = in[j]; 4069 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4070 } 4071 } 4072 } else { 4073 if (!baij->donotstash) { 4074 if (roworiented) { 4075 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4076 } else { 4077 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4078 } 4079 } 4080 } 4081 } 4082 4083 /* task normally handled by MatSetValuesBlocked() */ 4084 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4085 PetscFunctionReturn(0); 4086 } 4087 4088 #undef __FUNCT__ 4089 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4090 /*@ 4091 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4092 CSR format the local rows. 4093 4094 Collective on MPI_Comm 4095 4096 Input Parameters: 4097 + comm - MPI communicator 4098 . bs - the block size, only a block size of 1 is supported 4099 . m - number of local rows (Cannot be PETSC_DECIDE) 4100 . n - This value should be the same as the local size used in creating the 4101 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4102 calculated if N is given) For square matrices n is almost always m. 4103 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4104 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4105 . i - row indices 4106 . j - column indices 4107 - a - matrix values 4108 4109 Output Parameter: 4110 . mat - the matrix 4111 4112 Level: intermediate 4113 4114 Notes: 4115 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4116 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4117 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4118 4119 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4120 4121 .keywords: matrix, aij, compressed row, sparse, parallel 4122 4123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4124 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4125 @*/ 4126 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4127 { 4128 PetscErrorCode ierr; 4129 4130 4131 PetscFunctionBegin; 4132 if (i[0]) { 4133 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4134 } 4135 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4136 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4137 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4138 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4139 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4140 PetscFunctionReturn(0); 4141 } 4142