1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) { 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba=b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if ((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + (i*(stepval+bs) + j)*bs; 316 } else { 317 value = v + (j*(stepval+bs) + i)*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 320 for (jj=0; jj<bs; jj++) { 321 barray[jj] = value[jj]; 322 } 323 barray += bs; 324 } 325 barray -= bs2; 326 } 327 328 if (in[j] >= cstart && in[j] < cend) { 329 col = in[j] - cstart; 330 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 331 } 332 else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined (PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined (PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 427 if (idx == h1) { 428 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 429 } 430 } 431 } 432 #else 433 if (HT[idx] != key) { 434 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 435 if (idx == size) { 436 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 437 if (idx == h1) { 438 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 439 } 440 } 441 } 442 #endif 443 /* A HASH table entry is found, so insert the values at the correct address */ 444 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 445 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 446 } 447 } else { 448 if (!baij->donotstash) { 449 if (roworiented) { 450 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 451 } else { 452 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 453 } 454 } 455 } 456 } 457 #if defined(PETSC_USE_DEBUG) 458 baij->ht_total_ct = total_ct; 459 baij->ht_insert_ct = insert_ct; 460 #endif 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 466 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 467 { 468 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 469 PetscBool roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col; 472 PetscInt rstart=baij->rstartbs; 473 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 474 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 475 PetscReal tmp; 476 MatScalar **HD = baij->hd,*baij_a; 477 const PetscScalar *v_t,*value; 478 #if defined(PETSC_USE_DEBUG) 479 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 480 #endif 481 482 PetscFunctionBegin; 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend) { 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 } 646 v = bmat->a; 647 nz = bmat->nz*bs2; 648 for (i=0; i<nz; i++) { 649 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 650 } 651 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 652 *nrm = PetscSqrtReal(*nrm); 653 } else if (type == NORM_1) { /* max column sum */ 654 PetscReal *tmp,*tmp2; 655 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 656 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 657 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 658 v = amat->a; jj = amat->j; 659 for (i=0; i<amat->nz; i++) { 660 for (j=0; j<bs; j++) { 661 col = bs*(cstart + *jj) + j; /* column index */ 662 for (row=0; row<bs; row++) { 663 tmp[col] += PetscAbsScalar(*v); v++; 664 } 665 } 666 jj++; 667 } 668 v = bmat->a; jj = bmat->j; 669 for (i=0; i<bmat->nz; i++) { 670 for (j=0; j<bs; j++) { 671 col = bs*garray[*jj] + j; 672 for (row=0; row<bs; row++) { 673 tmp[col] += PetscAbsScalar(*v); v++; 674 } 675 } 676 jj++; 677 } 678 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 679 *nrm = 0.0; 680 for (j=0; j<mat->cmap->N; j++) { 681 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 682 } 683 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 684 } else if (type == NORM_INFINITY) { /* max row sum */ 685 PetscReal *sums; 686 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 687 sum = 0.0; 688 for (j=0; j<amat->mbs; j++) { 689 for (row=0; row<bs; row++) sums[row] = 0.0; 690 v = amat->a + bs2*amat->i[j]; 691 nz = amat->i[j+1]-amat->i[j]; 692 for (i=0; i<nz; i++) { 693 for (col=0; col<bs; col++) { 694 for (row=0; row<bs; row++) { 695 sums[row] += PetscAbsScalar(*v); v++; 696 } 697 } 698 } 699 v = bmat->a + bs2*bmat->i[j]; 700 nz = bmat->i[j+1]-bmat->i[j]; 701 for (i=0; i<nz; i++) { 702 for (col=0; col<bs; col++) { 703 for (row=0; row<bs; row++) { 704 sums[row] += PetscAbsScalar(*v); v++; 705 } 706 } 707 } 708 for (row=0; row<bs; row++) { 709 if (sums[row] > sum) sum = sums[row]; 710 } 711 } 712 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 713 ierr = PetscFree(sums);CHKERRQ(ierr); 714 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 /* 720 Creates the hash table, and sets the table 721 This table is created only once. 722 If new entried need to be added to the matrix 723 then the hash table has to be destroyed and 724 recreated. 725 */ 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 728 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 729 { 730 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 731 Mat A = baij->A,B=baij->B; 732 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 733 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 734 PetscErrorCode ierr; 735 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 736 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 737 PetscInt *HT,key; 738 MatScalar **HD; 739 PetscReal tmp; 740 #if defined(PETSC_USE_INFO) 741 PetscInt ct=0,max=0; 742 #endif 743 744 PetscFunctionBegin; 745 if (baij->ht) PetscFunctionReturn(0); 746 747 baij->ht_size = (PetscInt)(factor*nz); 748 ht_size = baij->ht_size; 749 750 /* Allocate Memory for Hash Table */ 751 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 752 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 753 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 754 HD = baij->hd; 755 HT = baij->ht; 756 757 /* Loop Over A */ 758 for (i=0; i<a->mbs; i++) { 759 for (j=ai[i]; j<ai[i+1]; j++) { 760 row = i+rstart; 761 col = aj[j]+cstart; 762 763 key = row*Nbs + col + 1; 764 h1 = HASH(ht_size,key,tmp); 765 for (k=0; k<ht_size; k++) { 766 if (!HT[(h1+k)%ht_size]) { 767 HT[(h1+k)%ht_size] = key; 768 HD[(h1+k)%ht_size] = a->a + j*bs2; 769 break; 770 #if defined(PETSC_USE_INFO) 771 } else { 772 ct++; 773 #endif 774 } 775 } 776 #if defined(PETSC_USE_INFO) 777 if (k> max) max = k; 778 #endif 779 } 780 } 781 /* Loop Over B */ 782 for (i=0; i<b->mbs; i++) { 783 for (j=bi[i]; j<bi[i+1]; j++) { 784 row = i+rstart; 785 col = garray[bj[j]]; 786 key = row*Nbs + col + 1; 787 h1 = HASH(ht_size,key,tmp); 788 for (k=0; k<ht_size; k++) { 789 if (!HT[(h1+k)%ht_size]) { 790 HT[(h1+k)%ht_size] = key; 791 HD[(h1+k)%ht_size] = b->a + j*bs2; 792 break; 793 #if defined(PETSC_USE_INFO) 794 } else { 795 ct++; 796 #endif 797 } 798 } 799 #if defined(PETSC_USE_INFO) 800 if (k> max) max = k; 801 #endif 802 } 803 } 804 805 /* Print Summary */ 806 #if defined(PETSC_USE_INFO) 807 for (i=0,j=0; i<ht_size; i++) { 808 if (HT[i]) {j++;} 809 } 810 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 811 #endif 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 817 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 818 { 819 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 820 PetscErrorCode ierr; 821 PetscInt nstash,reallocs; 822 InsertMode addv; 823 824 PetscFunctionBegin; 825 if (baij->donotstash || mat->nooffprocentries) { 826 PetscFunctionReturn(0); 827 } 828 829 /* make sure all processors are either in INSERTMODE or ADDMODE */ 830 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 831 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 832 mat->insertmode = addv; /* in case this processor had no cache */ 833 834 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 835 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 836 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 837 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 838 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 839 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 845 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 846 { 847 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 848 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 849 PetscErrorCode ierr; 850 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 851 PetscInt *row,*col; 852 PetscBool r1,r2,r3,other_disassembled; 853 MatScalar *val; 854 InsertMode addv = mat->insertmode; 855 PetscMPIInt n; 856 857 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 858 PetscFunctionBegin; 859 if (!baij->donotstash && !mat->nooffprocentries) { 860 while (1) { 861 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 862 if (!flg) break; 863 864 for (i=0; i<n;) { 865 /* Now identify the consecutive vals belonging to the same row */ 866 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 867 if (j < n) ncols = j-i; 868 else ncols = n-i; 869 /* Now assemble all these values with a single function call */ 870 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 871 i = j; 872 } 873 } 874 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 875 /* Now process the block-stash. Since the values are stashed column-oriented, 876 set the roworiented flag to column oriented, and after MatSetValues() 877 restore the original flags */ 878 r1 = baij->roworiented; 879 r2 = a->roworiented; 880 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 881 baij->roworiented = PETSC_FALSE; 882 a->roworiented = PETSC_FALSE; 883 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 884 while (1) { 885 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 886 if (!flg) break; 887 888 for (i=0; i<n;) { 889 /* Now identify the consecutive vals belonging to the same row */ 890 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 891 if (j < n) ncols = j-i; 892 else ncols = n-i; 893 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 894 i = j; 895 } 896 } 897 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 898 baij->roworiented = r1; 899 a->roworiented = r2; 900 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 901 } 902 903 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 904 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 905 906 /* determine if any processor has disassembled, if so we must 907 also disassemble ourselfs, in order that we may reassemble. */ 908 /* 909 if nonzero structure of submatrix B cannot change then we know that 910 no processor disassembled thus we can skip this stuff 911 */ 912 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 913 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 914 if (mat->was_assembled && !other_disassembled) { 915 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 916 } 917 } 918 919 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 920 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 923 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 924 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 925 926 #if defined(PETSC_USE_INFO) 927 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 928 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 929 baij->ht_total_ct = 0; 930 baij->ht_insert_ct = 0; 931 } 932 #endif 933 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 934 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 935 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 936 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 937 } 938 939 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 940 baij->rowvalues = 0; 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 946 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 947 { 948 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 949 PetscErrorCode ierr; 950 PetscMPIInt size = baij->size,rank = baij->rank; 951 PetscInt bs = mat->rmap->bs; 952 PetscBool iascii,isdraw; 953 PetscViewer sviewer; 954 PetscViewerFormat format; 955 956 PetscFunctionBegin; 957 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 958 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 959 if (iascii) { 960 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 961 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 962 MatInfo info; 963 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 964 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 966 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 967 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 968 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 969 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 970 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 972 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 974 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 975 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } else if (format == PETSC_VIEWER_ASCII_INFO) { 978 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 981 PetscFunctionReturn(0); 982 } 983 } 984 985 if (isdraw) { 986 PetscDraw draw; 987 PetscBool isnull; 988 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 989 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 990 } 991 992 if (size == 1) { 993 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 994 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 995 } else { 996 /* assemble the entire matrix onto first processor. */ 997 Mat A; 998 Mat_SeqBAIJ *Aloc; 999 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1000 MatScalar *a; 1001 1002 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1003 /* Perhaps this should be the type of mat? */ 1004 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1005 if (!rank) { 1006 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1007 } else { 1008 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1009 } 1010 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1011 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1012 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1013 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1014 1015 /* copy over the A part */ 1016 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1017 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1018 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1019 1020 for (i=0; i<mbs; i++) { 1021 rvals[0] = bs*(baij->rstartbs + i); 1022 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1023 for (j=ai[i]; j<ai[i+1]; j++) { 1024 col = (baij->cstartbs+aj[j])*bs; 1025 for (k=0; k<bs; k++) { 1026 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1027 col++; a += bs; 1028 } 1029 } 1030 } 1031 /* copy over the B part */ 1032 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1033 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1034 for (i=0; i<mbs; i++) { 1035 rvals[0] = bs*(baij->rstartbs + i); 1036 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1037 for (j=ai[i]; j<ai[i+1]; j++) { 1038 col = baij->garray[aj[j]]*bs; 1039 for (k=0; k<bs; k++) { 1040 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1041 col++; a += bs; 1042 } 1043 } 1044 } 1045 ierr = PetscFree(rvals);CHKERRQ(ierr); 1046 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1048 /* 1049 Everyone has to call to draw the matrix since the graphics waits are 1050 synchronized across all processors that share the PetscDraw object 1051 */ 1052 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1053 if (!rank) { 1054 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1055 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1056 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1057 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1058 } 1059 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1060 ierr = MatDestroy(&A);CHKERRQ(ierr); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1067 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1068 { 1069 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1070 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1071 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1072 PetscErrorCode ierr; 1073 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1074 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1075 int fd; 1076 PetscScalar *column_values; 1077 FILE *file; 1078 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1079 PetscInt message_count,flowcontrolcount; 1080 1081 PetscFunctionBegin; 1082 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1083 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1084 nz = bs2*(A->nz + B->nz); 1085 rlen = mat->rmap->n; 1086 if (!rank) { 1087 header[0] = MAT_FILE_CLASSID; 1088 header[1] = mat->rmap->N; 1089 header[2] = mat->cmap->N; 1090 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1091 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1092 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1093 /* get largest number of rows any processor has */ 1094 range = mat->rmap->range; 1095 for (i=1; i<size; i++) { 1096 rlen = PetscMax(rlen,range[i+1] - range[i]); 1097 } 1098 } else { 1099 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1100 } 1101 1102 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1103 /* compute lengths of each row */ 1104 for (i=0; i<a->mbs; i++) { 1105 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1106 } 1107 /* store the row lengths to the file */ 1108 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1109 if (!rank) { 1110 MPI_Status status; 1111 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1112 rlen = (range[1] - range[0])/bs; 1113 for (i=0; i<rlen; i++) { 1114 for (j=0; j<bs; j++) { 1115 row_lens[i*bs+j] = bs*crow_lens[i]; 1116 } 1117 } 1118 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1119 for (i=1; i<size; i++) { 1120 rlen = (range[i+1] - range[i])/bs; 1121 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1122 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1123 for (k=0; k<rlen; k++) { 1124 for (j=0; j<bs; j++) { 1125 row_lens[k*bs+j] = bs*crow_lens[k]; 1126 } 1127 } 1128 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1129 } 1130 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1131 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1132 } else { 1133 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1134 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1135 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1136 } 1137 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1138 1139 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1140 information needed to make it for each row from a block row. This does require more communication but still not more than 1141 the communication needed for the nonzero values */ 1142 nzmax = nz; /* space a largest processor needs */ 1143 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1144 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1145 cnt = 0; 1146 for (i=0; i<a->mbs; i++) { 1147 pcnt = cnt; 1148 for (j=B->i[i]; j<B->i[i+1]; j++) { 1149 if ((col = garray[B->j[j]]) > cstart) break; 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*col+l; 1152 } 1153 } 1154 for (k=A->i[i]; k<A->i[i+1]; k++) { 1155 for (l=0; l<bs; l++) { 1156 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1157 } 1158 } 1159 for (; j<B->i[i+1]; j++) { 1160 for (l=0; l<bs; l++) { 1161 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1162 } 1163 } 1164 len = cnt - pcnt; 1165 for (k=1; k<bs; k++) { 1166 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1167 cnt += len; 1168 } 1169 } 1170 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1171 1172 /* store the columns to the file */ 1173 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1174 if (!rank) { 1175 MPI_Status status; 1176 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1177 for (i=1; i<size; i++) { 1178 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1179 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1180 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1181 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1184 } else { 1185 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1186 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1187 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1188 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1189 } 1190 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1191 1192 /* load up the numerical values */ 1193 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1194 cnt = 0; 1195 for (i=0; i<a->mbs; i++) { 1196 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1197 for (j=B->i[i]; j<B->i[i+1]; j++) { 1198 if (garray[B->j[j]] > cstart) break; 1199 for (l=0; l<bs; l++) { 1200 for (ll=0; ll<bs; ll++) { 1201 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1202 } 1203 } 1204 cnt += bs; 1205 } 1206 for (k=A->i[i]; k<A->i[i+1]; k++) { 1207 for (l=0; l<bs; l++) { 1208 for (ll=0; ll<bs; ll++) { 1209 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1210 } 1211 } 1212 cnt += bs; 1213 } 1214 for (; j<B->i[i+1]; j++) { 1215 for (l=0; l<bs; l++) { 1216 for (ll=0; ll<bs; ll++) { 1217 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1218 } 1219 } 1220 cnt += bs; 1221 } 1222 cnt += (bs-1)*rlen; 1223 } 1224 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1225 1226 /* store the column values to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 MPI_Status status; 1230 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1231 for (i=1; i<size; i++) { 1232 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1233 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1234 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1235 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1236 } 1237 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1238 } else { 1239 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1240 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1241 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1242 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1243 } 1244 ierr = PetscFree(column_values);CHKERRQ(ierr); 1245 1246 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1247 if (file) { 1248 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1249 } 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatView_MPIBAIJ" 1255 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1256 { 1257 PetscErrorCode ierr; 1258 PetscBool iascii,isdraw,issocket,isbinary; 1259 1260 PetscFunctionBegin; 1261 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1262 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1265 if (iascii || isdraw || issocket) { 1266 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1267 } else if (isbinary) { 1268 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1269 } else { 1270 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1271 } 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1277 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1278 { 1279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1280 PetscErrorCode ierr; 1281 1282 PetscFunctionBegin; 1283 #if defined(PETSC_USE_LOG) 1284 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1285 #endif 1286 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1287 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1288 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1289 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1290 #if defined (PETSC_USE_CTABLE) 1291 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1292 #else 1293 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1294 #endif 1295 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1296 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1297 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1298 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1299 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1300 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1301 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1302 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1303 1304 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatMult_MPIBAIJ" 1319 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1320 { 1321 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 PetscInt nt; 1324 1325 PetscFunctionBegin; 1326 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1327 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1328 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1329 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1330 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1331 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1332 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1339 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1340 { 1341 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1347 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1354 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1355 { 1356 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 PetscBool merged; 1359 1360 PetscFunctionBegin; 1361 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1362 /* do nondiagonal part */ 1363 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1364 if (!merged) { 1365 /* send it on its way */ 1366 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* receive remote parts: note this assumes the values are not actually */ 1370 /* inserted in yy until the next line */ 1371 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1372 } else { 1373 /* do local part */ 1374 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1375 /* send it on its way */ 1376 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377 /* values actually were received in the Begin() but we need to call this nop */ 1378 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1385 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1386 { 1387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 /* do nondiagonal part */ 1392 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1393 /* send it on its way */ 1394 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1395 /* do local part */ 1396 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1397 /* receive remote parts: note this assumes the values are not actually */ 1398 /* inserted in yy until the next line, which is true for my implementation*/ 1399 /* but is not perhaps always true. */ 1400 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 /* 1405 This only works correctly for square matrices where the subblock A->A is the 1406 diagonal block 1407 */ 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1410 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1417 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatScale_MPIBAIJ" 1423 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1424 { 1425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1430 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1436 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1437 { 1438 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1439 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1440 PetscErrorCode ierr; 1441 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1442 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1443 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1444 1445 PetscFunctionBegin; 1446 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1447 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1448 mat->getrowactive = PETSC_TRUE; 1449 1450 if (!mat->rowvalues && (idx || v)) { 1451 /* 1452 allocate enough space to hold information from the longest row. 1453 */ 1454 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1455 PetscInt max = 1,mbs = mat->mbs,tmp; 1456 for (i=0; i<mbs; i++) { 1457 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1458 if (max < tmp) { max = tmp; } 1459 } 1460 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1461 } 1462 lrow = row - brstart; 1463 1464 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1465 if (!v) {pvA = 0; pvB = 0;} 1466 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1467 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1468 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1469 nztot = nzA + nzB; 1470 1471 cmap = mat->garray; 1472 if (v || idx) { 1473 if (nztot) { 1474 /* Sort by increasing column numbers, assuming A and B already sorted */ 1475 PetscInt imark = -1; 1476 if (v) { 1477 *v = v_p = mat->rowvalues; 1478 for (i=0; i<nzB; i++) { 1479 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1480 else break; 1481 } 1482 imark = i; 1483 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1484 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1485 } 1486 if (idx) { 1487 *idx = idx_p = mat->rowindices; 1488 if (imark > -1) { 1489 for (i=0; i<imark; i++) { 1490 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1491 } 1492 } else { 1493 for (i=0; i<nzB; i++) { 1494 if (cmap[cworkB[i]/bs] < cstart) 1495 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1496 else break; 1497 } 1498 imark = i; 1499 } 1500 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1501 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1502 } 1503 } else { 1504 if (idx) *idx = 0; 1505 if (v) *v = 0; 1506 } 1507 } 1508 *nz = nztot; 1509 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1510 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1516 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1517 { 1518 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1519 1520 PetscFunctionBegin; 1521 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1522 baij->getrowactive = PETSC_FALSE; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1528 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1529 { 1530 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1535 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1541 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1542 { 1543 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1544 Mat A = a->A,B = a->B; 1545 PetscErrorCode ierr; 1546 PetscReal isend[5],irecv[5]; 1547 1548 PetscFunctionBegin; 1549 info->block_size = (PetscReal)matin->rmap->bs; 1550 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1551 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1552 isend[3] = info->memory; isend[4] = info->mallocs; 1553 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1554 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1555 isend[3] += info->memory; isend[4] += info->mallocs; 1556 if (flag == MAT_LOCAL) { 1557 info->nz_used = isend[0]; 1558 info->nz_allocated = isend[1]; 1559 info->nz_unneeded = isend[2]; 1560 info->memory = isend[3]; 1561 info->mallocs = isend[4]; 1562 } else if (flag == MAT_GLOBAL_MAX) { 1563 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1564 info->nz_used = irecv[0]; 1565 info->nz_allocated = irecv[1]; 1566 info->nz_unneeded = irecv[2]; 1567 info->memory = irecv[3]; 1568 info->mallocs = irecv[4]; 1569 } else if (flag == MAT_GLOBAL_SUM) { 1570 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else { 1577 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1578 } 1579 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1580 info->fill_ratio_needed = 0; 1581 info->factor_mallocs = 0; 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1587 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1588 { 1589 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 switch (op) { 1594 case MAT_NEW_NONZERO_LOCATIONS: 1595 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1596 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1597 case MAT_KEEP_NONZERO_PATTERN: 1598 case MAT_NEW_NONZERO_LOCATION_ERR: 1599 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1600 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1601 break; 1602 case MAT_ROW_ORIENTED: 1603 a->roworiented = flg; 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_NEW_DIAGONALS: 1608 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1609 break; 1610 case MAT_IGNORE_OFF_PROC_ENTRIES: 1611 a->donotstash = flg; 1612 break; 1613 case MAT_USE_HASH_TABLE: 1614 a->ht_flag = flg; 1615 break; 1616 case MAT_SYMMETRIC: 1617 case MAT_STRUCTURALLY_SYMMETRIC: 1618 case MAT_HERMITIAN: 1619 case MAT_SYMMETRY_ETERNAL: 1620 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1621 break; 1622 default: 1623 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1630 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1633 Mat_SeqBAIJ *Aloc; 1634 Mat B; 1635 PetscErrorCode ierr; 1636 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1637 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1638 MatScalar *a; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1643 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1644 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1645 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1646 /* Do not know preallocation information, but must set block size */ 1647 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1648 } else { 1649 B = *matout; 1650 } 1651 1652 /* copy over the A part */ 1653 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1654 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1655 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1656 1657 for (i=0; i<mbs; i++) { 1658 rvals[0] = bs*(baij->rstartbs + i); 1659 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1660 for (j=ai[i]; j<ai[i+1]; j++) { 1661 col = (baij->cstartbs+aj[j])*bs; 1662 for (k=0; k<bs; k++) { 1663 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 /* copy over the B part */ 1669 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1670 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1671 for (i=0; i<mbs; i++) { 1672 rvals[0] = bs*(baij->rstartbs + i); 1673 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1674 for (j=ai[i]; j<ai[i+1]; j++) { 1675 col = baij->garray[aj[j]]*bs; 1676 for (k=0; k<bs; k++) { 1677 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1678 col++; a += bs; 1679 } 1680 } 1681 } 1682 ierr = PetscFree(rvals);CHKERRQ(ierr); 1683 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 1686 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1687 *matout = B; 1688 } else { 1689 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1697 { 1698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1699 Mat a = baij->A,b = baij->B; 1700 PetscErrorCode ierr; 1701 PetscInt s1,s2,s3; 1702 1703 PetscFunctionBegin; 1704 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1705 if (rr) { 1706 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1707 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1708 /* Overlap communication with computation. */ 1709 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 } 1711 if (ll) { 1712 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1713 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1714 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1715 } 1716 /* scale the diagonal block */ 1717 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1718 1719 if (rr) { 1720 /* Do a scatter end and then right scale the off-diagonal block */ 1721 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1723 } 1724 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1730 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1731 { 1732 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1733 PetscErrorCode ierr; 1734 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1735 PetscInt i,*owners = A->rmap->range; 1736 PetscInt *nprocs,j,idx,nsends,row; 1737 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1738 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1739 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1740 MPI_Comm comm = ((PetscObject)A)->comm; 1741 MPI_Request *send_waits,*recv_waits; 1742 MPI_Status recv_status,*send_status; 1743 const PetscScalar *xx; 1744 PetscScalar *bb; 1745 #if defined(PETSC_DEBUG) 1746 PetscBool found = PETSC_FALSE; 1747 #endif 1748 1749 PetscFunctionBegin; 1750 /* first count number of contributors to each processor */ 1751 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1752 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1754 j = 0; 1755 for (i=0; i<N; i++) { 1756 if (lastidx > (idx = rows[i])) j = 0; 1757 lastidx = idx; 1758 for (; j<size; j++) { 1759 if (idx >= owners[j] && idx < owners[j+1]) { 1760 nprocs[2*j]++; 1761 nprocs[2*j+1] = 1; 1762 owner[i] = j; 1763 #if defined(PETSC_DEBUG) 1764 found = PETSC_TRUE; 1765 #endif 1766 break; 1767 } 1768 } 1769 #if defined(PETSC_DEBUG) 1770 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1771 found = PETSC_FALSE; 1772 #endif 1773 } 1774 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1775 1776 if (A->nooffproczerorows) { 1777 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1778 nrecvs = nsends; 1779 nmax = N; 1780 } else { 1781 /* inform other processors of number of messages and max length*/ 1782 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1783 } 1784 1785 /* post receives: */ 1786 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1787 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1788 for (i=0; i<nrecvs; i++) { 1789 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1790 } 1791 1792 /* do sends: 1793 1) starts[i] gives the starting index in svalues for stuff going to 1794 the ith processor 1795 */ 1796 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1797 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1798 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1799 starts[0] = 0; 1800 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1801 for (i=0; i<N; i++) { 1802 svalues[starts[owner[i]]++] = rows[i]; 1803 } 1804 1805 starts[0] = 0; 1806 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1807 count = 0; 1808 for (i=0; i<size; i++) { 1809 if (nprocs[2*i+1]) { 1810 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1811 } 1812 } 1813 ierr = PetscFree(starts);CHKERRQ(ierr); 1814 1815 base = owners[rank]; 1816 1817 /* wait on receives */ 1818 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1819 count = nrecvs; 1820 slen = 0; 1821 while (count) { 1822 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1823 /* unpack receives into our local space */ 1824 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1825 source[imdex] = recv_status.MPI_SOURCE; 1826 lens[imdex] = n; 1827 slen += n; 1828 count--; 1829 } 1830 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1831 1832 /* move the data into the send scatter */ 1833 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1834 count = 0; 1835 for (i=0; i<nrecvs; i++) { 1836 values = rvalues + i*nmax; 1837 for (j=0; j<lens[i]; j++) { 1838 lrows[count++] = values[j] - base; 1839 } 1840 } 1841 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1842 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1843 ierr = PetscFree(owner);CHKERRQ(ierr); 1844 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1845 1846 /* fix right hand side if needed */ 1847 if (x && b) { 1848 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1849 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1850 for (i=0; i<slen; i++) { 1851 bb[lrows[i]] = diag*xx[lrows[i]]; 1852 } 1853 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1855 } 1856 1857 /* actually zap the local rows */ 1858 /* 1859 Zero the required rows. If the "diagonal block" of the matrix 1860 is square and the user wishes to set the diagonal we use separate 1861 code so that MatSetValues() is not called for each diagonal allocating 1862 new memory, thus calling lots of mallocs and slowing things down. 1863 1864 */ 1865 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1866 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1867 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1868 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1869 } else if (diag != 0.0) { 1870 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1871 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1872 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1873 for (i=0; i<slen; i++) { 1874 row = lrows[i] + rstart_bs; 1875 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1876 } 1877 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1878 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 } else { 1880 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1881 } 1882 1883 ierr = PetscFree(lrows);CHKERRQ(ierr); 1884 1885 /* wait on sends */ 1886 if (nsends) { 1887 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1888 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1889 ierr = PetscFree(send_status);CHKERRQ(ierr); 1890 } 1891 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1892 ierr = PetscFree(svalues);CHKERRQ(ierr); 1893 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1900 { 1901 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatEqual_MPIBAIJ" 1913 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1914 { 1915 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1916 Mat a,b,c,d; 1917 PetscBool flg; 1918 PetscErrorCode ierr; 1919 1920 PetscFunctionBegin; 1921 a = matA->A; b = matA->B; 1922 c = matB->A; d = matB->B; 1923 1924 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1925 if (flg) { 1926 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1927 } 1928 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatCopy_MPIBAIJ" 1934 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1935 { 1936 PetscErrorCode ierr; 1937 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1938 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1939 1940 PetscFunctionBegin; 1941 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1942 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1943 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1944 } else { 1945 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1946 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1953 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1954 { 1955 PetscErrorCode ierr; 1956 1957 PetscFunctionBegin; 1958 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1964 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1965 { 1966 PetscErrorCode ierr; 1967 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1968 PetscBLASInt bnz,one=1; 1969 Mat_SeqBAIJ *x,*y; 1970 1971 PetscFunctionBegin; 1972 if (str == SAME_NONZERO_PATTERN) { 1973 PetscScalar alpha = a; 1974 x = (Mat_SeqBAIJ *)xx->A->data; 1975 y = (Mat_SeqBAIJ *)yy->A->data; 1976 bnz = PetscBLASIntCast(x->nz); 1977 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1978 x = (Mat_SeqBAIJ *)xx->B->data; 1979 y = (Mat_SeqBAIJ *)yy->B->data; 1980 bnz = PetscBLASIntCast(x->nz); 1981 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1982 } else { 1983 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1990 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1991 { 1992 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1997 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2003 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2004 { 2005 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2006 PetscErrorCode ierr; 2007 2008 PetscFunctionBegin; 2009 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2010 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #undef __FUNCT__ 2015 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2016 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2017 { 2018 PetscErrorCode ierr; 2019 IS iscol_local; 2020 PetscInt csize; 2021 2022 PetscFunctionBegin; 2023 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2024 if (call == MAT_REUSE_MATRIX) { 2025 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2026 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2027 } else { 2028 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2029 } 2030 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2031 if (call == MAT_INITIAL_MATRIX) { 2032 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2033 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2040 /* 2041 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2042 in local and then by concatenating the local matrices the end result. 2043 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2044 */ 2045 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2046 { 2047 PetscErrorCode ierr; 2048 PetscMPIInt rank,size; 2049 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2050 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2051 Mat M,Mreuse; 2052 MatScalar *vwork,*aa; 2053 MPI_Comm comm = ((PetscObject)mat)->comm; 2054 IS isrow_new, iscol_new; 2055 PetscBool idflag,allrows, allcols; 2056 Mat_SeqBAIJ *aij; 2057 2058 2059 PetscFunctionBegin; 2060 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2062 /* The compression and expansion should be avoided. Doesn't point 2063 out errors, might change the indices, hence buggey */ 2064 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2065 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2066 2067 /* Check for special case: each processor gets entire matrix columns */ 2068 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2069 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2070 if (idflag && ncol == mat->cmap->N) { 2071 allcols = PETSC_TRUE; 2072 } else { 2073 allcols = PETSC_FALSE; 2074 } 2075 2076 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2077 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2078 if (idflag && nrow == mat->rmap->N) { 2079 allrows = PETSC_TRUE; 2080 } else { 2081 allrows = PETSC_FALSE; 2082 } 2083 if (call == MAT_REUSE_MATRIX) { 2084 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2085 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2086 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2089 } 2090 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2091 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2092 /* 2093 m - number of local rows 2094 n - number of columns (same on all processors) 2095 rstart - first row in new global matrix generated 2096 */ 2097 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2098 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2099 m = m/bs; 2100 n = n/bs; 2101 2102 if (call == MAT_INITIAL_MATRIX) { 2103 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2104 ii = aij->i; 2105 jj = aij->j; 2106 2107 /* 2108 Determine the number of non-zeros in the diagonal and off-diagonal 2109 portions of the matrix in order to do correct preallocation 2110 */ 2111 2112 /* first get start and end of "diagonal" columns */ 2113 if (csize == PETSC_DECIDE) { 2114 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2115 if (mglobal == n*bs) { /* square matrix */ 2116 nlocal = m; 2117 } else { 2118 nlocal = n/size + ((n % size) > rank); 2119 } 2120 } else { 2121 nlocal = csize/bs; 2122 } 2123 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2124 rstart = rend - nlocal; 2125 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2126 2127 /* next, compute all the lengths */ 2128 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2129 olens = dlens + m; 2130 for (i=0; i<m; i++) { 2131 jend = ii[i+1] - ii[i]; 2132 olen = 0; 2133 dlen = 0; 2134 for (j=0; j<jend; j++) { 2135 if (*jj < rstart || *jj >= rend) olen++; 2136 else dlen++; 2137 jj++; 2138 } 2139 olens[i] = olen; 2140 dlens[i] = dlen; 2141 } 2142 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2143 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2144 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2145 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2146 ierr = PetscFree(dlens);CHKERRQ(ierr); 2147 } else { 2148 PetscInt ml,nl; 2149 2150 M = *newmat; 2151 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2152 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2153 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2154 /* 2155 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2156 rather than the slower MatSetValues(). 2157 */ 2158 M->was_assembled = PETSC_TRUE; 2159 M->assembled = PETSC_FALSE; 2160 } 2161 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2162 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2163 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2164 ii = aij->i; 2165 jj = aij->j; 2166 aa = aij->a; 2167 for (i=0; i<m; i++) { 2168 row = rstart/bs + i; 2169 nz = ii[i+1] - ii[i]; 2170 cwork = jj; jj += nz; 2171 vwork = aa; aa += nz*bs*bs; 2172 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2173 } 2174 2175 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2176 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 *newmat = M; 2178 2179 /* save submatrix used in processor for next request */ 2180 if (call == MAT_INITIAL_MATRIX) { 2181 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2182 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2183 } 2184 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatPermute_MPIBAIJ" 2190 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2191 { 2192 MPI_Comm comm,pcomm; 2193 PetscInt first,rlocal_size,clocal_size,nrows; 2194 const PetscInt *rows; 2195 PetscMPIInt size; 2196 IS crowp,growp,irowp,lrowp,lcolp; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2201 /* make a collective version of 'rowp' */ 2202 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2203 if (pcomm==comm) { 2204 crowp = rowp; 2205 } else { 2206 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2207 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2208 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2209 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2210 } 2211 /* collect the global row permutation and invert it */ 2212 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2213 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2214 if (pcomm!=comm) { 2215 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2216 } 2217 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2218 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2219 /* get the local target indices */ 2220 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2222 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2224 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2225 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2226 /* the column permutation is so much easier; 2227 make a local version of 'colp' and invert it */ 2228 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2229 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2230 if (size==1) { 2231 lcolp = colp; 2232 } else { 2233 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2234 } 2235 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2236 /* now we just get the submatrix */ 2237 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2238 if (size>1) { 2239 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2240 } 2241 /* clean up */ 2242 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2243 PetscFunctionReturn(0); 2244 } 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2248 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2249 { 2250 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2251 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2252 2253 PetscFunctionBegin; 2254 if (nghosts) { *nghosts = B->nbs;} 2255 if (ghosts) {*ghosts = baij->garray;} 2256 PetscFunctionReturn(0); 2257 } 2258 2259 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2263 /* 2264 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2265 */ 2266 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2267 { 2268 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2269 PetscErrorCode ierr; 2270 PetscMPIInt size,*ncolsonproc,*disp,nn; 2271 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2272 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2273 PetscInt nis = iscoloring->n,nctot,*cols; 2274 PetscInt *rowhit,M,cstart,cend,colb; 2275 PetscInt *columnsforrow,l; 2276 IS *isa; 2277 PetscBool done,flg; 2278 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2279 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2280 2281 PetscFunctionBegin; 2282 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2283 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2284 2285 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2286 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2287 M = mat->rmap->n/bs; 2288 cstart = mat->cmap->rstart/bs; 2289 cend = mat->cmap->rend/bs; 2290 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2291 c->N = mat->cmap->N/bs; 2292 c->m = mat->rmap->n/bs; 2293 c->rstart = mat->rmap->rstart/bs; 2294 2295 c->ncolors = nis; 2296 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2297 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2298 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2299 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2300 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2301 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2302 2303 /* Allow access to data structures of local part of matrix */ 2304 if (!baij->colmap) { 2305 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2306 } 2307 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2308 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2309 2310 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2311 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2312 2313 for (i=0; i<nis; i++) { 2314 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2315 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2316 c->ncolumns[i] = n; 2317 if (n) { 2318 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2319 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2320 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2321 } else { 2322 c->columns[i] = 0; 2323 } 2324 2325 if (ctype == IS_COLORING_GLOBAL) { 2326 /* Determine the total (parallel) number of columns of this color */ 2327 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2328 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2329 2330 nn = PetscMPIIntCast(n); 2331 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2332 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2333 if (!nctot) { 2334 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2335 } 2336 2337 disp[0] = 0; 2338 for (j=1; j<size; j++) { 2339 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2340 } 2341 2342 /* Get complete list of columns for color on each processor */ 2343 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2344 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2345 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2346 } else if (ctype == IS_COLORING_GHOSTED) { 2347 /* Determine local number of columns of this color on this process, including ghost points */ 2348 nctot = n; 2349 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2350 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2351 } else { 2352 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2353 } 2354 2355 /* 2356 Mark all rows affect by these columns 2357 */ 2358 /* Temporary option to allow for debugging/testing */ 2359 flg = PETSC_FALSE; 2360 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2361 if (!flg) {/*-----------------------------------------------------------------------------*/ 2362 /* crude, fast version */ 2363 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2364 /* loop over columns*/ 2365 for (j=0; j<nctot; j++) { 2366 if (ctype == IS_COLORING_GHOSTED) { 2367 col = ltog[cols[j]]; 2368 } else { 2369 col = cols[j]; 2370 } 2371 if (col >= cstart && col < cend) { 2372 /* column is in diagonal block of matrix */ 2373 rows = A_cj + A_ci[col-cstart]; 2374 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2375 } else { 2376 #if defined (PETSC_USE_CTABLE) 2377 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2378 colb --; 2379 #else 2380 colb = baij->colmap[col] - 1; 2381 #endif 2382 if (colb == -1) { 2383 m = 0; 2384 } else { 2385 colb = colb/bs; 2386 rows = B_cj + B_ci[colb]; 2387 m = B_ci[colb+1] - B_ci[colb]; 2388 } 2389 } 2390 /* loop over columns marking them in rowhit */ 2391 for (k=0; k<m; k++) { 2392 rowhit[*rows++] = col + 1; 2393 } 2394 } 2395 2396 /* count the number of hits */ 2397 nrows = 0; 2398 for (j=0; j<M; j++) { 2399 if (rowhit[j]) nrows++; 2400 } 2401 c->nrows[i] = nrows; 2402 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2403 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2404 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2405 nrows = 0; 2406 for (j=0; j<M; j++) { 2407 if (rowhit[j]) { 2408 c->rows[i][nrows] = j; 2409 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2410 nrows++; 2411 } 2412 } 2413 } else {/*-------------------------------------------------------------------------------*/ 2414 /* slow version, using rowhit as a linked list */ 2415 PetscInt currentcol,fm,mfm; 2416 rowhit[M] = M; 2417 nrows = 0; 2418 /* loop over columns*/ 2419 for (j=0; j<nctot; j++) { 2420 if (ctype == IS_COLORING_GHOSTED) { 2421 col = ltog[cols[j]]; 2422 } else { 2423 col = cols[j]; 2424 } 2425 if (col >= cstart && col < cend) { 2426 /* column is in diagonal block of matrix */ 2427 rows = A_cj + A_ci[col-cstart]; 2428 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2429 } else { 2430 #if defined (PETSC_USE_CTABLE) 2431 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2432 colb --; 2433 #else 2434 colb = baij->colmap[col] - 1; 2435 #endif 2436 if (colb == -1) { 2437 m = 0; 2438 } else { 2439 colb = colb/bs; 2440 rows = B_cj + B_ci[colb]; 2441 m = B_ci[colb+1] - B_ci[colb]; 2442 } 2443 } 2444 2445 /* loop over columns marking them in rowhit */ 2446 fm = M; /* fm points to first entry in linked list */ 2447 for (k=0; k<m; k++) { 2448 currentcol = *rows++; 2449 /* is it already in the list? */ 2450 do { 2451 mfm = fm; 2452 fm = rowhit[fm]; 2453 } while (fm < currentcol); 2454 /* not in list so add it */ 2455 if (fm != currentcol) { 2456 nrows++; 2457 columnsforrow[currentcol] = col; 2458 /* next three lines insert new entry into linked list */ 2459 rowhit[mfm] = currentcol; 2460 rowhit[currentcol] = fm; 2461 fm = currentcol; 2462 /* fm points to present position in list since we know the columns are sorted */ 2463 } else { 2464 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2465 } 2466 } 2467 } 2468 c->nrows[i] = nrows; 2469 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2470 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2471 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2472 /* now store the linked list of rows into c->rows[i] */ 2473 nrows = 0; 2474 fm = rowhit[M]; 2475 do { 2476 c->rows[i][nrows] = fm; 2477 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2478 fm = rowhit[fm]; 2479 } while (fm < M); 2480 } /* ---------------------------------------------------------------------------------------*/ 2481 ierr = PetscFree(cols);CHKERRQ(ierr); 2482 } 2483 2484 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2485 /* 2486 vscale will contain the "diagonal" on processor scalings followed by the off processor 2487 */ 2488 if (ctype == IS_COLORING_GLOBAL) { 2489 PetscInt *garray; 2490 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2491 for (i=0; i<baij->B->cmap->n/bs; i++) { 2492 for (j=0; j<bs; j++) { 2493 garray[i*bs+j] = bs*baij->garray[i]+j; 2494 } 2495 } 2496 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2497 ierr = PetscFree(garray);CHKERRQ(ierr); 2498 CHKMEMQ; 2499 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2500 for (k=0; k<c->ncolors; k++) { 2501 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2502 for (l=0; l<c->nrows[k]; l++) { 2503 col = c->columnsforrow[k][l]; 2504 if (col >= cstart && col < cend) { 2505 /* column is in diagonal block of matrix */ 2506 colb = col - cstart; 2507 } else { 2508 /* column is in "off-processor" part */ 2509 #if defined (PETSC_USE_CTABLE) 2510 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2511 colb --; 2512 #else 2513 colb = baij->colmap[col] - 1; 2514 #endif 2515 colb = colb/bs; 2516 colb += cend - cstart; 2517 } 2518 c->vscaleforrow[k][l] = colb; 2519 } 2520 } 2521 } else if (ctype == IS_COLORING_GHOSTED) { 2522 /* Get gtol mapping */ 2523 PetscInt N = mat->cmap->N, *gtol; 2524 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2525 for (i=0; i<N; i++) gtol[i] = -1; 2526 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2527 2528 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2529 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2530 for (k=0; k<c->ncolors; k++) { 2531 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2532 for (l=0; l<c->nrows[k]; l++) { 2533 col = c->columnsforrow[k][l]; /* global column index */ 2534 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2535 } 2536 } 2537 ierr = PetscFree(gtol);CHKERRQ(ierr); 2538 } 2539 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2540 2541 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2542 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2543 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2544 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2545 CHKMEMQ; 2546 PetscFunctionReturn(0); 2547 } 2548 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2551 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2552 { 2553 Mat B; 2554 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2555 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2556 Mat_SeqAIJ *b; 2557 PetscErrorCode ierr; 2558 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2559 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2560 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2561 2562 PetscFunctionBegin; 2563 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2564 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2565 2566 /* ---------------------------------------------------------------- 2567 Tell every processor the number of nonzeros per row 2568 */ 2569 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2570 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2571 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2572 } 2573 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2574 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2575 displs = recvcounts + size; 2576 for (i=0; i<size; i++) { 2577 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2578 displs[i] = A->rmap->range[i]/bs; 2579 } 2580 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2581 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2582 #else 2583 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2584 #endif 2585 /* --------------------------------------------------------------- 2586 Create the sequential matrix of the same type as the local block diagonal 2587 */ 2588 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2589 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2590 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2591 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2592 b = (Mat_SeqAIJ *)B->data; 2593 2594 /*-------------------------------------------------------------------- 2595 Copy my part of matrix column indices over 2596 */ 2597 sendcount = ad->nz + bd->nz; 2598 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2599 a_jsendbuf = ad->j; 2600 b_jsendbuf = bd->j; 2601 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2602 cnt = 0; 2603 for (i=0; i<n; i++) { 2604 2605 /* put in lower diagonal portion */ 2606 m = bd->i[i+1] - bd->i[i]; 2607 while (m > 0) { 2608 /* is it above diagonal (in bd (compressed) numbering) */ 2609 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2610 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2611 m--; 2612 } 2613 2614 /* put in diagonal portion */ 2615 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2616 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2617 } 2618 2619 /* put in upper diagonal portion */ 2620 while (m-- > 0) { 2621 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2622 } 2623 } 2624 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2625 2626 /*-------------------------------------------------------------------- 2627 Gather all column indices to all processors 2628 */ 2629 for (i=0; i<size; i++) { 2630 recvcounts[i] = 0; 2631 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2632 recvcounts[i] += lens[j]; 2633 } 2634 } 2635 displs[0] = 0; 2636 for (i=1; i<size; i++) { 2637 displs[i] = displs[i-1] + recvcounts[i-1]; 2638 } 2639 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2640 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2641 #else 2642 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2643 #endif 2644 /*-------------------------------------------------------------------- 2645 Assemble the matrix into useable form (note numerical values not yet set) 2646 */ 2647 /* set the b->ilen (length of each row) values */ 2648 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2649 /* set the b->i indices */ 2650 b->i[0] = 0; 2651 for (i=1; i<=A->rmap->N/bs; i++) { 2652 b->i[i] = b->i[i-1] + lens[i-1]; 2653 } 2654 ierr = PetscFree(lens);CHKERRQ(ierr); 2655 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2656 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2657 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2658 2659 if (A->symmetric) { 2660 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2661 } else if (A->hermitian) { 2662 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2663 } else if (A->structurally_symmetric) { 2664 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2665 } 2666 *newmat = B; 2667 PetscFunctionReturn(0); 2668 } 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatSOR_MPIBAIJ" 2672 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2673 { 2674 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2675 PetscErrorCode ierr; 2676 Vec bb1 = 0; 2677 2678 PetscFunctionBegin; 2679 if (flag == SOR_APPLY_UPPER) { 2680 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2681 PetscFunctionReturn(0); 2682 } 2683 2684 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2685 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2686 } 2687 2688 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2689 if (flag & SOR_ZERO_INITIAL_GUESS) { 2690 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2691 its--; 2692 } 2693 2694 while (its--) { 2695 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2696 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2697 2698 /* update rhs: bb1 = bb - B*x */ 2699 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2700 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2701 2702 /* local sweep */ 2703 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2704 } 2705 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2706 if (flag & SOR_ZERO_INITIAL_GUESS) { 2707 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2708 its--; 2709 } 2710 while (its--) { 2711 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2712 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2713 2714 /* update rhs: bb1 = bb - B*x */ 2715 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2716 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2717 2718 /* local sweep */ 2719 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2720 } 2721 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2722 if (flag & SOR_ZERO_INITIAL_GUESS) { 2723 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2724 its--; 2725 } 2726 while (its--) { 2727 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2728 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2729 2730 /* update rhs: bb1 = bb - B*x */ 2731 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2732 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2733 2734 /* local sweep */ 2735 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2736 } 2737 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2738 2739 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2747 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2748 { 2749 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 2758 /* -------------------------------------------------------------------*/ 2759 static struct _MatOps MatOps_Values = { 2760 MatSetValues_MPIBAIJ, 2761 MatGetRow_MPIBAIJ, 2762 MatRestoreRow_MPIBAIJ, 2763 MatMult_MPIBAIJ, 2764 /* 4*/ MatMultAdd_MPIBAIJ, 2765 MatMultTranspose_MPIBAIJ, 2766 MatMultTransposeAdd_MPIBAIJ, 2767 0, 2768 0, 2769 0, 2770 /*10*/ 0, 2771 0, 2772 0, 2773 MatSOR_MPIBAIJ, 2774 MatTranspose_MPIBAIJ, 2775 /*15*/ MatGetInfo_MPIBAIJ, 2776 MatEqual_MPIBAIJ, 2777 MatGetDiagonal_MPIBAIJ, 2778 MatDiagonalScale_MPIBAIJ, 2779 MatNorm_MPIBAIJ, 2780 /*20*/ MatAssemblyBegin_MPIBAIJ, 2781 MatAssemblyEnd_MPIBAIJ, 2782 MatSetOption_MPIBAIJ, 2783 MatZeroEntries_MPIBAIJ, 2784 /*24*/ MatZeroRows_MPIBAIJ, 2785 0, 2786 0, 2787 0, 2788 0, 2789 /*29*/ MatSetUp_MPIBAIJ, 2790 0, 2791 0, 2792 0, 2793 0, 2794 /*34*/ MatDuplicate_MPIBAIJ, 2795 0, 2796 0, 2797 0, 2798 0, 2799 /*39*/ MatAXPY_MPIBAIJ, 2800 MatGetSubMatrices_MPIBAIJ, 2801 MatIncreaseOverlap_MPIBAIJ, 2802 MatGetValues_MPIBAIJ, 2803 MatCopy_MPIBAIJ, 2804 /*44*/ 0, 2805 MatScale_MPIBAIJ, 2806 0, 2807 0, 2808 0, 2809 /*49*/ 0, 2810 0, 2811 0, 2812 0, 2813 0, 2814 /*54*/ MatFDColoringCreate_MPIBAIJ, 2815 0, 2816 MatSetUnfactored_MPIBAIJ, 2817 MatPermute_MPIBAIJ, 2818 MatSetValuesBlocked_MPIBAIJ, 2819 /*59*/ MatGetSubMatrix_MPIBAIJ, 2820 MatDestroy_MPIBAIJ, 2821 MatView_MPIBAIJ, 2822 0, 2823 0, 2824 /*64*/ 0, 2825 0, 2826 0, 2827 0, 2828 0, 2829 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2830 0, 2831 0, 2832 0, 2833 0, 2834 /*74*/ 0, 2835 MatFDColoringApply_BAIJ, 2836 0, 2837 0, 2838 0, 2839 /*79*/ 0, 2840 0, 2841 0, 2842 0, 2843 MatLoad_MPIBAIJ, 2844 /*84*/ 0, 2845 0, 2846 0, 2847 0, 2848 0, 2849 /*89*/ 0, 2850 0, 2851 0, 2852 0, 2853 0, 2854 /*94*/ 0, 2855 0, 2856 0, 2857 0, 2858 0, 2859 /*99*/ 0, 2860 0, 2861 0, 2862 0, 2863 0, 2864 /*104*/0, 2865 MatRealPart_MPIBAIJ, 2866 MatImaginaryPart_MPIBAIJ, 2867 0, 2868 0, 2869 /*109*/0, 2870 0, 2871 0, 2872 0, 2873 0, 2874 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2875 0, 2876 MatGetGhosts_MPIBAIJ, 2877 0, 2878 0, 2879 /*119*/0, 2880 0, 2881 0, 2882 0, 2883 0, 2884 /*124*/0, 2885 0, 2886 MatInvertBlockDiagonal_MPIBAIJ 2887 }; 2888 2889 EXTERN_C_BEGIN 2890 #undef __FUNCT__ 2891 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2892 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2893 { 2894 PetscFunctionBegin; 2895 *a = ((Mat_MPIBAIJ *)A->data)->A; 2896 PetscFunctionReturn(0); 2897 } 2898 EXTERN_C_END 2899 2900 EXTERN_C_BEGIN 2901 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2902 EXTERN_C_END 2903 2904 EXTERN_C_BEGIN 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2907 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2908 { 2909 PetscInt m,rstart,cstart,cend; 2910 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2911 const PetscInt *JJ=0; 2912 PetscScalar *values=0; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2917 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2918 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2919 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2920 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2921 m = B->rmap->n/bs; 2922 rstart = B->rmap->rstart/bs; 2923 cstart = B->cmap->rstart/bs; 2924 cend = B->cmap->rend/bs; 2925 2926 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2927 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2928 for (i=0; i<m; i++) { 2929 nz = ii[i+1] - ii[i]; 2930 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2931 nz_max = PetscMax(nz_max,nz); 2932 JJ = jj + ii[i]; 2933 for (j=0; j<nz; j++) { 2934 if (*JJ >= cstart) break; 2935 JJ++; 2936 } 2937 d = 0; 2938 for (; j<nz; j++) { 2939 if (*JJ++ >= cend) break; 2940 d++; 2941 } 2942 d_nnz[i] = d; 2943 o_nnz[i] = nz - d; 2944 } 2945 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2946 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2947 2948 values = (PetscScalar*)V; 2949 if (!values) { 2950 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2951 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2952 } 2953 for (i=0; i<m; i++) { 2954 PetscInt row = i + rstart; 2955 PetscInt ncols = ii[i+1] - ii[i]; 2956 const PetscInt *icols = jj + ii[i]; 2957 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2958 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2959 } 2960 2961 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2962 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2963 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2964 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2965 PetscFunctionReturn(0); 2966 } 2967 EXTERN_C_END 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2971 /*@C 2972 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2973 (the default parallel PETSc format). 2974 2975 Collective on MPI_Comm 2976 2977 Input Parameters: 2978 + A - the matrix 2979 . bs - the block size 2980 . i - the indices into j for the start of each local row (starts with zero) 2981 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2982 - v - optional values in the matrix 2983 2984 Level: developer 2985 2986 .keywords: matrix, aij, compressed row, sparse, parallel 2987 2988 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2989 @*/ 2990 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2991 { 2992 PetscErrorCode ierr; 2993 2994 PetscFunctionBegin; 2995 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2996 PetscValidType(B,1); 2997 PetscValidLogicalCollectiveInt(B,bs,2); 2998 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 EXTERN_C_BEGIN 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3005 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3006 { 3007 Mat_MPIBAIJ *b; 3008 PetscErrorCode ierr; 3009 PetscInt i; 3010 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3011 3012 PetscFunctionBegin; 3013 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3014 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3015 3016 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3017 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3018 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3019 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3020 3021 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3022 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3023 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3024 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3025 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3026 3027 if (d_nnz) { 3028 for (i=0; i<B->rmap->n/bs; i++) { 3029 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3030 } 3031 } 3032 if (o_nnz) { 3033 for (i=0; i<B->rmap->n/bs; i++) { 3034 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3035 } 3036 } 3037 3038 b = (Mat_MPIBAIJ*)B->data; 3039 b->bs2 = bs*bs; 3040 b->mbs = B->rmap->n/bs; 3041 b->nbs = B->cmap->n/bs; 3042 b->Mbs = B->rmap->N/bs; 3043 b->Nbs = B->cmap->N/bs; 3044 3045 for (i=0; i<=b->size; i++) { 3046 b->rangebs[i] = B->rmap->range[i]/bs; 3047 } 3048 b->rstartbs = B->rmap->rstart/bs; 3049 b->rendbs = B->rmap->rend/bs; 3050 b->cstartbs = B->cmap->rstart/bs; 3051 b->cendbs = B->cmap->rend/bs; 3052 3053 if (!B->preallocated) { 3054 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3055 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3056 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3057 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3058 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3059 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3060 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3061 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3062 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3063 } 3064 3065 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3066 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3067 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3068 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3069 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3070 B->preallocated = PETSC_TRUE; 3071 PetscFunctionReturn(0); 3072 } 3073 EXTERN_C_END 3074 3075 EXTERN_C_BEGIN 3076 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3077 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3078 EXTERN_C_END 3079 3080 3081 EXTERN_C_BEGIN 3082 #undef __FUNCT__ 3083 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3084 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3085 { 3086 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3087 PetscErrorCode ierr; 3088 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3089 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3090 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3091 3092 PetscFunctionBegin; 3093 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3094 ii[0] = 0; 3095 CHKMEMQ; 3096 for (i=0; i<M; i++) { 3097 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3098 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3099 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3100 /* remove one from count of matrix has diagonal */ 3101 for (j=id[i]; j<id[i+1]; j++) { 3102 if (jd[j] == i) {ii[i+1]--;break;} 3103 } 3104 CHKMEMQ; 3105 } 3106 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3107 cnt = 0; 3108 for (i=0; i<M; i++) { 3109 for (j=io[i]; j<io[i+1]; j++) { 3110 if (garray[jo[j]] > rstart) break; 3111 jj[cnt++] = garray[jo[j]]; 3112 CHKMEMQ; 3113 } 3114 for (k=id[i]; k<id[i+1]; k++) { 3115 if (jd[k] != i) { 3116 jj[cnt++] = rstart + jd[k]; 3117 CHKMEMQ; 3118 } 3119 } 3120 for (;j<io[i+1]; j++) { 3121 jj[cnt++] = garray[jo[j]]; 3122 CHKMEMQ; 3123 } 3124 } 3125 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 EXTERN_C_END 3129 3130 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3131 EXTERN_C_BEGIN 3132 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3133 EXTERN_C_END 3134 3135 EXTERN_C_BEGIN 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3138 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3139 { 3140 PetscErrorCode ierr; 3141 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3142 Mat B; 3143 Mat_MPIAIJ *b; 3144 3145 PetscFunctionBegin; 3146 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3147 3148 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3149 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3150 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3151 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3152 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3153 b = (Mat_MPIAIJ*) B->data; 3154 3155 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3156 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3157 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3158 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3159 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3160 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 if (reuse == MAT_REUSE_MATRIX) { 3165 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3166 } else { 3167 *newmat = B; 3168 } 3169 PetscFunctionReturn(0); 3170 } 3171 EXTERN_C_END 3172 3173 EXTERN_C_BEGIN 3174 #if defined(PETSC_HAVE_MUMPS) 3175 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3176 #endif 3177 EXTERN_C_END 3178 3179 /*MC 3180 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3181 3182 Options Database Keys: 3183 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3184 . -mat_block_size <bs> - set the blocksize used to store the matrix 3185 - -mat_use_hash_table <fact> 3186 3187 Level: beginner 3188 3189 .seealso: MatCreateMPIBAIJ 3190 M*/ 3191 3192 EXTERN_C_BEGIN 3193 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3194 EXTERN_C_END 3195 3196 EXTERN_C_BEGIN 3197 #undef __FUNCT__ 3198 #define __FUNCT__ "MatCreate_MPIBAIJ" 3199 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3200 { 3201 Mat_MPIBAIJ *b; 3202 PetscErrorCode ierr; 3203 PetscBool flg; 3204 3205 PetscFunctionBegin; 3206 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3207 B->data = (void*)b; 3208 3209 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3210 B->assembled = PETSC_FALSE; 3211 3212 B->insertmode = NOT_SET_VALUES; 3213 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3214 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3215 3216 /* build local table of row and column ownerships */ 3217 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3218 3219 /* build cache for off array entries formed */ 3220 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3221 b->donotstash = PETSC_FALSE; 3222 b->colmap = PETSC_NULL; 3223 b->garray = PETSC_NULL; 3224 b->roworiented = PETSC_TRUE; 3225 3226 /* stuff used in block assembly */ 3227 b->barray = 0; 3228 3229 /* stuff used for matrix vector multiply */ 3230 b->lvec = 0; 3231 b->Mvctx = 0; 3232 3233 /* stuff for MatGetRow() */ 3234 b->rowindices = 0; 3235 b->rowvalues = 0; 3236 b->getrowactive = PETSC_FALSE; 3237 3238 /* hash table stuff */ 3239 b->ht = 0; 3240 b->hd = 0; 3241 b->ht_size = 0; 3242 b->ht_flag = PETSC_FALSE; 3243 b->ht_fact = 0; 3244 b->ht_total_ct = 0; 3245 b->ht_insert_ct = 0; 3246 3247 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3248 b->ijonly = PETSC_FALSE; 3249 3250 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3251 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3252 if (flg) { 3253 PetscReal fact = 1.39; 3254 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3255 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3256 if (fact <= 1.0) fact = 1.39; 3257 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3258 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3259 } 3260 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3261 3262 #if defined(PETSC_HAVE_MUMPS) 3263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3264 #endif 3265 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3266 "MatConvert_MPIBAIJ_MPIAdj", 3267 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3268 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3269 "MatConvert_MPIBAIJ_MPIAIJ", 3270 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3272 "MatConvert_MPIBAIJ_MPISBAIJ", 3273 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3275 "MatStoreValues_MPIBAIJ", 3276 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3278 "MatRetrieveValues_MPIBAIJ", 3279 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3281 "MatGetDiagonalBlock_MPIBAIJ", 3282 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3284 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3285 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3287 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3288 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3289 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3290 "MatDiagonalScaleLocal_MPIBAIJ", 3291 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3293 "MatSetHashTableFactor_MPIBAIJ", 3294 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3295 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3296 "MatConvert_MPIBAIJ_MPIBSTRM", 3297 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3298 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3299 PetscFunctionReturn(0); 3300 } 3301 EXTERN_C_END 3302 3303 /*MC 3304 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3305 3306 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3307 and MATMPIBAIJ otherwise. 3308 3309 Options Database Keys: 3310 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3311 3312 Level: beginner 3313 3314 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3315 M*/ 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3319 /*@C 3320 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3321 (block compressed row). For good matrix assembly performance 3322 the user should preallocate the matrix storage by setting the parameters 3323 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3324 performance can be increased by more than a factor of 50. 3325 3326 Collective on Mat 3327 3328 Input Parameters: 3329 + A - the matrix 3330 . bs - size of blockk 3331 . d_nz - number of block nonzeros per block row in diagonal portion of local 3332 submatrix (same for all local rows) 3333 . d_nnz - array containing the number of block nonzeros in the various block rows 3334 of the in diagonal portion of the local (possibly different for each block 3335 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3336 set it even if it is zero. 3337 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3338 submatrix (same for all local rows). 3339 - o_nnz - array containing the number of nonzeros in the various block rows of the 3340 off-diagonal portion of the local submatrix (possibly different for 3341 each block row) or PETSC_NULL. 3342 3343 If the *_nnz parameter is given then the *_nz parameter is ignored 3344 3345 Options Database Keys: 3346 + -mat_block_size - size of the blocks to use 3347 - -mat_use_hash_table <fact> 3348 3349 Notes: 3350 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3351 than it must be used on all processors that share the object for that argument. 3352 3353 Storage Information: 3354 For a square global matrix we define each processor's diagonal portion 3355 to be its local rows and the corresponding columns (a square submatrix); 3356 each processor's off-diagonal portion encompasses the remainder of the 3357 local matrix (a rectangular submatrix). 3358 3359 The user can specify preallocated storage for the diagonal part of 3360 the local submatrix with either d_nz or d_nnz (not both). Set 3361 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3362 memory allocation. Likewise, specify preallocated storage for the 3363 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3364 3365 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3366 the figure below we depict these three local rows and all columns (0-11). 3367 3368 .vb 3369 0 1 2 3 4 5 6 7 8 9 10 11 3370 ------------------- 3371 row 3 | o o o d d d o o o o o o 3372 row 4 | o o o d d d o o o o o o 3373 row 5 | o o o d d d o o o o o o 3374 ------------------- 3375 .ve 3376 3377 Thus, any entries in the d locations are stored in the d (diagonal) 3378 submatrix, and any entries in the o locations are stored in the 3379 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3380 stored simply in the MATSEQBAIJ format for compressed row storage. 3381 3382 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3383 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3384 In general, for PDE problems in which most nonzeros are near the diagonal, 3385 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3386 or you will get TERRIBLE performance; see the users' manual chapter on 3387 matrices. 3388 3389 You can call MatGetInfo() to get information on how effective the preallocation was; 3390 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3391 You can also run with the option -info and look for messages with the string 3392 malloc in them to see if additional memory allocation was needed. 3393 3394 Level: intermediate 3395 3396 .keywords: matrix, block, aij, compressed row, sparse, parallel 3397 3398 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3399 @*/ 3400 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3401 { 3402 PetscErrorCode ierr; 3403 3404 PetscFunctionBegin; 3405 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3406 PetscValidType(B,1); 3407 PetscValidLogicalCollectiveInt(B,bs,2); 3408 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3409 PetscFunctionReturn(0); 3410 } 3411 3412 #undef __FUNCT__ 3413 #define __FUNCT__ "MatCreateBAIJ" 3414 /*@C 3415 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3416 (block compressed row). For good matrix assembly performance 3417 the user should preallocate the matrix storage by setting the parameters 3418 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3419 performance can be increased by more than a factor of 50. 3420 3421 Collective on MPI_Comm 3422 3423 Input Parameters: 3424 + comm - MPI communicator 3425 . bs - size of blockk 3426 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3427 This value should be the same as the local size used in creating the 3428 y vector for the matrix-vector product y = Ax. 3429 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3430 This value should be the same as the local size used in creating the 3431 x vector for the matrix-vector product y = Ax. 3432 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3433 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3434 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3435 submatrix (same for all local rows) 3436 . d_nnz - array containing the number of nonzero blocks in the various block rows 3437 of the in diagonal portion of the local (possibly different for each block 3438 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3439 and set it even if it is zero. 3440 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3441 submatrix (same for all local rows). 3442 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3443 off-diagonal portion of the local submatrix (possibly different for 3444 each block row) or PETSC_NULL. 3445 3446 Output Parameter: 3447 . A - the matrix 3448 3449 Options Database Keys: 3450 + -mat_block_size - size of the blocks to use 3451 - -mat_use_hash_table <fact> 3452 3453 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3454 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3455 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3456 3457 Notes: 3458 If the *_nnz parameter is given then the *_nz parameter is ignored 3459 3460 A nonzero block is any block that as 1 or more nonzeros in it 3461 3462 The user MUST specify either the local or global matrix dimensions 3463 (possibly both). 3464 3465 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3466 than it must be used on all processors that share the object for that argument. 3467 3468 Storage Information: 3469 For a square global matrix we define each processor's diagonal portion 3470 to be its local rows and the corresponding columns (a square submatrix); 3471 each processor's off-diagonal portion encompasses the remainder of the 3472 local matrix (a rectangular submatrix). 3473 3474 The user can specify preallocated storage for the diagonal part of 3475 the local submatrix with either d_nz or d_nnz (not both). Set 3476 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3477 memory allocation. Likewise, specify preallocated storage for the 3478 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3479 3480 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3481 the figure below we depict these three local rows and all columns (0-11). 3482 3483 .vb 3484 0 1 2 3 4 5 6 7 8 9 10 11 3485 ------------------- 3486 row 3 | o o o d d d o o o o o o 3487 row 4 | o o o d d d o o o o o o 3488 row 5 | o o o d d d o o o o o o 3489 ------------------- 3490 .ve 3491 3492 Thus, any entries in the d locations are stored in the d (diagonal) 3493 submatrix, and any entries in the o locations are stored in the 3494 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3495 stored simply in the MATSEQBAIJ format for compressed row storage. 3496 3497 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3498 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3499 In general, for PDE problems in which most nonzeros are near the diagonal, 3500 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3501 or you will get TERRIBLE performance; see the users' manual chapter on 3502 matrices. 3503 3504 Level: intermediate 3505 3506 .keywords: matrix, block, aij, compressed row, sparse, parallel 3507 3508 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3509 @*/ 3510 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3511 { 3512 PetscErrorCode ierr; 3513 PetscMPIInt size; 3514 3515 PetscFunctionBegin; 3516 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3517 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3518 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3519 if (size > 1) { 3520 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3521 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3522 } else { 3523 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3524 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3525 } 3526 PetscFunctionReturn(0); 3527 } 3528 3529 #undef __FUNCT__ 3530 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3531 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3532 { 3533 Mat mat; 3534 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3535 PetscErrorCode ierr; 3536 PetscInt len=0; 3537 3538 PetscFunctionBegin; 3539 *newmat = 0; 3540 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3541 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3542 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3543 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3544 3545 mat->factortype = matin->factortype; 3546 mat->preallocated = PETSC_TRUE; 3547 mat->assembled = PETSC_TRUE; 3548 mat->insertmode = NOT_SET_VALUES; 3549 3550 a = (Mat_MPIBAIJ*)mat->data; 3551 mat->rmap->bs = matin->rmap->bs; 3552 a->bs2 = oldmat->bs2; 3553 a->mbs = oldmat->mbs; 3554 a->nbs = oldmat->nbs; 3555 a->Mbs = oldmat->Mbs; 3556 a->Nbs = oldmat->Nbs; 3557 3558 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3559 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3560 3561 a->size = oldmat->size; 3562 a->rank = oldmat->rank; 3563 a->donotstash = oldmat->donotstash; 3564 a->roworiented = oldmat->roworiented; 3565 a->rowindices = 0; 3566 a->rowvalues = 0; 3567 a->getrowactive = PETSC_FALSE; 3568 a->barray = 0; 3569 a->rstartbs = oldmat->rstartbs; 3570 a->rendbs = oldmat->rendbs; 3571 a->cstartbs = oldmat->cstartbs; 3572 a->cendbs = oldmat->cendbs; 3573 3574 /* hash table stuff */ 3575 a->ht = 0; 3576 a->hd = 0; 3577 a->ht_size = 0; 3578 a->ht_flag = oldmat->ht_flag; 3579 a->ht_fact = oldmat->ht_fact; 3580 a->ht_total_ct = 0; 3581 a->ht_insert_ct = 0; 3582 3583 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3584 if (oldmat->colmap) { 3585 #if defined (PETSC_USE_CTABLE) 3586 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3587 #else 3588 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3589 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3590 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3591 #endif 3592 } else a->colmap = 0; 3593 3594 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3595 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3596 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3597 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3598 } else a->garray = 0; 3599 3600 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3601 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3602 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3603 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3604 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3605 3606 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3607 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3608 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3609 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3610 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3611 *newmat = mat; 3612 3613 PetscFunctionReturn(0); 3614 } 3615 3616 #undef __FUNCT__ 3617 #define __FUNCT__ "MatLoad_MPIBAIJ" 3618 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3619 { 3620 PetscErrorCode ierr; 3621 int fd; 3622 PetscInt i,nz,j,rstart,rend; 3623 PetscScalar *vals,*buf; 3624 MPI_Comm comm = ((PetscObject)viewer)->comm; 3625 MPI_Status status; 3626 PetscMPIInt rank,size,maxnz; 3627 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3628 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3629 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3630 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3631 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3632 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3633 3634 PetscFunctionBegin; 3635 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3636 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3637 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3638 3639 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3640 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3641 if (!rank) { 3642 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3643 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3644 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3645 } 3646 3647 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3648 3649 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3650 M = header[1]; N = header[2]; 3651 3652 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3653 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3654 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3655 3656 /* If global sizes are set, check if they are consistent with that given in the file */ 3657 if (sizesset) { 3658 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3659 } 3660 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3661 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3662 3663 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3664 3665 /* 3666 This code adds extra rows to make sure the number of rows is 3667 divisible by the blocksize 3668 */ 3669 Mbs = M/bs; 3670 extra_rows = bs - M + bs*Mbs; 3671 if (extra_rows == bs) extra_rows = 0; 3672 else Mbs++; 3673 if (extra_rows && !rank) { 3674 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3675 } 3676 3677 /* determine ownership of all rows */ 3678 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3679 mbs = Mbs/size + ((Mbs % size) > rank); 3680 m = mbs*bs; 3681 } else { /* User set */ 3682 m = newmat->rmap->n; 3683 mbs = m/bs; 3684 } 3685 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3686 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3687 3688 /* process 0 needs enough room for process with most rows */ 3689 if (!rank) { 3690 mmax = rowners[1]; 3691 for (i=2; i<=size; i++) { 3692 mmax = PetscMax(mmax,rowners[i]); 3693 } 3694 mmax*=bs; 3695 } else mmax = m; 3696 3697 rowners[0] = 0; 3698 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3699 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3700 rstart = rowners[rank]; 3701 rend = rowners[rank+1]; 3702 3703 /* distribute row lengths to all processors */ 3704 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3705 if (!rank) { 3706 mend = m; 3707 if (size == 1) mend = mend - extra_rows; 3708 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3709 for (j=mend; j<m; j++) locrowlens[j] = 1; 3710 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3711 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3712 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3713 for (j=0; j<m; j++) { 3714 procsnz[0] += locrowlens[j]; 3715 } 3716 for (i=1; i<size; i++) { 3717 mend = browners[i+1] - browners[i]; 3718 if (i == size-1) mend = mend - extra_rows; 3719 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3720 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3721 /* calculate the number of nonzeros on each processor */ 3722 for (j=0; j<browners[i+1]-browners[i]; j++) { 3723 procsnz[i] += rowlengths[j]; 3724 } 3725 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3726 } 3727 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3728 } else { 3729 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3730 } 3731 3732 if (!rank) { 3733 /* determine max buffer needed and allocate it */ 3734 maxnz = procsnz[0]; 3735 for (i=1; i<size; i++) { 3736 maxnz = PetscMax(maxnz,procsnz[i]); 3737 } 3738 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3739 3740 /* read in my part of the matrix column indices */ 3741 nz = procsnz[0]; 3742 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3743 mycols = ibuf; 3744 if (size == 1) nz -= extra_rows; 3745 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3746 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3747 3748 /* read in every ones (except the last) and ship off */ 3749 for (i=1; i<size-1; i++) { 3750 nz = procsnz[i]; 3751 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3752 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3753 } 3754 /* read in the stuff for the last proc */ 3755 if (size != 1) { 3756 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3757 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3758 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3759 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3760 } 3761 ierr = PetscFree(cols);CHKERRQ(ierr); 3762 } else { 3763 /* determine buffer space needed for message */ 3764 nz = 0; 3765 for (i=0; i<m; i++) { 3766 nz += locrowlens[i]; 3767 } 3768 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3769 mycols = ibuf; 3770 /* receive message of column indices*/ 3771 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3772 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3773 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3774 } 3775 3776 /* loop over local rows, determining number of off diagonal entries */ 3777 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3778 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3779 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3780 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3781 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3782 rowcount = 0; nzcount = 0; 3783 for (i=0; i<mbs; i++) { 3784 dcount = 0; 3785 odcount = 0; 3786 for (j=0; j<bs; j++) { 3787 kmax = locrowlens[rowcount]; 3788 for (k=0; k<kmax; k++) { 3789 tmp = mycols[nzcount++]/bs; 3790 if (!mask[tmp]) { 3791 mask[tmp] = 1; 3792 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3793 else masked1[dcount++] = tmp; 3794 } 3795 } 3796 rowcount++; 3797 } 3798 3799 dlens[i] = dcount; 3800 odlens[i] = odcount; 3801 3802 /* zero out the mask elements we set */ 3803 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3804 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3805 } 3806 3807 3808 if (!sizesset) { 3809 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3810 } 3811 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3812 3813 if (!rank) { 3814 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3815 /* read in my part of the matrix numerical values */ 3816 nz = procsnz[0]; 3817 vals = buf; 3818 mycols = ibuf; 3819 if (size == 1) nz -= extra_rows; 3820 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3821 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3822 3823 /* insert into matrix */ 3824 jj = rstart*bs; 3825 for (i=0; i<m; i++) { 3826 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3827 mycols += locrowlens[i]; 3828 vals += locrowlens[i]; 3829 jj++; 3830 } 3831 /* read in other processors (except the last one) and ship out */ 3832 for (i=1; i<size-1; i++) { 3833 nz = procsnz[i]; 3834 vals = buf; 3835 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3836 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3837 } 3838 /* the last proc */ 3839 if (size != 1) { 3840 nz = procsnz[i] - extra_rows; 3841 vals = buf; 3842 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3843 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3844 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3845 } 3846 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3847 } else { 3848 /* receive numeric values */ 3849 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3850 3851 /* receive message of values*/ 3852 vals = buf; 3853 mycols = ibuf; 3854 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3855 3856 /* insert into matrix */ 3857 jj = rstart*bs; 3858 for (i=0; i<m; i++) { 3859 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3860 mycols += locrowlens[i]; 3861 vals += locrowlens[i]; 3862 jj++; 3863 } 3864 } 3865 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3866 ierr = PetscFree(buf);CHKERRQ(ierr); 3867 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3868 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3869 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3870 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3871 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 3874 PetscFunctionReturn(0); 3875 } 3876 3877 #undef __FUNCT__ 3878 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3879 /*@ 3880 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3881 3882 Input Parameters: 3883 . mat - the matrix 3884 . fact - factor 3885 3886 Not Collective, each process can use a different factor 3887 3888 Level: advanced 3889 3890 Notes: 3891 This can also be set by the command line option: -mat_use_hash_table <fact> 3892 3893 .keywords: matrix, hashtable, factor, HT 3894 3895 .seealso: MatSetOption() 3896 @*/ 3897 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3898 { 3899 PetscErrorCode ierr; 3900 3901 PetscFunctionBegin; 3902 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3903 PetscFunctionReturn(0); 3904 } 3905 3906 EXTERN_C_BEGIN 3907 #undef __FUNCT__ 3908 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3909 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3910 { 3911 Mat_MPIBAIJ *baij; 3912 3913 PetscFunctionBegin; 3914 baij = (Mat_MPIBAIJ*)mat->data; 3915 baij->ht_fact = fact; 3916 PetscFunctionReturn(0); 3917 } 3918 EXTERN_C_END 3919 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3922 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3923 { 3924 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3925 PetscFunctionBegin; 3926 *Ad = a->A; 3927 *Ao = a->B; 3928 *colmap = a->garray; 3929 PetscFunctionReturn(0); 3930 } 3931 3932 /* 3933 Special version for direct calls from Fortran (to eliminate two function call overheads 3934 */ 3935 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3936 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3937 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3938 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3939 #endif 3940 3941 #undef __FUNCT__ 3942 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3943 /*@C 3944 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3945 3946 Collective on Mat 3947 3948 Input Parameters: 3949 + mat - the matrix 3950 . min - number of input rows 3951 . im - input rows 3952 . nin - number of input columns 3953 . in - input columns 3954 . v - numerical values input 3955 - addvin - INSERT_VALUES or ADD_VALUES 3956 3957 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3958 3959 Level: advanced 3960 3961 .seealso: MatSetValuesBlocked() 3962 @*/ 3963 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3964 { 3965 /* convert input arguments to C version */ 3966 Mat mat = *matin; 3967 PetscInt m = *min, n = *nin; 3968 InsertMode addv = *addvin; 3969 3970 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3971 const MatScalar *value; 3972 MatScalar *barray=baij->barray; 3973 PetscBool roworiented = baij->roworiented; 3974 PetscErrorCode ierr; 3975 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3976 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3977 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3978 3979 PetscFunctionBegin; 3980 /* tasks normally handled by MatSetValuesBlocked() */ 3981 if (mat->insertmode == NOT_SET_VALUES) { 3982 mat->insertmode = addv; 3983 } 3984 #if defined(PETSC_USE_DEBUG) 3985 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3986 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3987 #endif 3988 if (mat->assembled) { 3989 mat->was_assembled = PETSC_TRUE; 3990 mat->assembled = PETSC_FALSE; 3991 } 3992 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3993 3994 3995 if (!barray) { 3996 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3997 baij->barray = barray; 3998 } 3999 4000 if (roworiented) { 4001 stepval = (n-1)*bs; 4002 } else { 4003 stepval = (m-1)*bs; 4004 } 4005 for (i=0; i<m; i++) { 4006 if (im[i] < 0) continue; 4007 #if defined(PETSC_USE_DEBUG) 4008 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4009 #endif 4010 if (im[i] >= rstart && im[i] < rend) { 4011 row = im[i] - rstart; 4012 for (j=0; j<n; j++) { 4013 /* If NumCol = 1 then a copy is not required */ 4014 if ((roworiented) && (n == 1)) { 4015 barray = (MatScalar*)v + i*bs2; 4016 } else if ((!roworiented) && (m == 1)) { 4017 barray = (MatScalar*)v + j*bs2; 4018 } else { /* Here a copy is required */ 4019 if (roworiented) { 4020 value = v + i*(stepval+bs)*bs + j*bs; 4021 } else { 4022 value = v + j*(stepval+bs)*bs + i*bs; 4023 } 4024 for (ii=0; ii<bs; ii++,value+=stepval) { 4025 for (jj=0; jj<bs; jj++) { 4026 *barray++ = *value++; 4027 } 4028 } 4029 barray -=bs2; 4030 } 4031 4032 if (in[j] >= cstart && in[j] < cend) { 4033 col = in[j] - cstart; 4034 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4035 } 4036 else if (in[j] < 0) continue; 4037 #if defined(PETSC_USE_DEBUG) 4038 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4039 #endif 4040 else { 4041 if (mat->was_assembled) { 4042 if (!baij->colmap) { 4043 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4044 } 4045 4046 #if defined(PETSC_USE_DEBUG) 4047 #if defined (PETSC_USE_CTABLE) 4048 { PetscInt data; 4049 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4050 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4051 } 4052 #else 4053 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4054 #endif 4055 #endif 4056 #if defined (PETSC_USE_CTABLE) 4057 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4058 col = (col - 1)/bs; 4059 #else 4060 col = (baij->colmap[in[j]] - 1)/bs; 4061 #endif 4062 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4063 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4064 col = in[j]; 4065 } 4066 } 4067 else col = in[j]; 4068 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4069 } 4070 } 4071 } else { 4072 if (!baij->donotstash) { 4073 if (roworiented) { 4074 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4075 } else { 4076 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4077 } 4078 } 4079 } 4080 } 4081 4082 /* task normally handled by MatSetValuesBlocked() */ 4083 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4084 PetscFunctionReturn(0); 4085 } 4086 4087 #undef __FUNCT__ 4088 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4089 /*@ 4090 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4091 CSR format the local rows. 4092 4093 Collective on MPI_Comm 4094 4095 Input Parameters: 4096 + comm - MPI communicator 4097 . bs - the block size, only a block size of 1 is supported 4098 . m - number of local rows (Cannot be PETSC_DECIDE) 4099 . n - This value should be the same as the local size used in creating the 4100 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4101 calculated if N is given) For square matrices n is almost always m. 4102 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4103 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4104 . i - row indices 4105 . j - column indices 4106 - a - matrix values 4107 4108 Output Parameter: 4109 . mat - the matrix 4110 4111 Level: intermediate 4112 4113 Notes: 4114 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4115 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4116 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4117 4118 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4119 4120 .keywords: matrix, aij, compressed row, sparse, parallel 4121 4122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4123 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4124 @*/ 4125 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4126 { 4127 PetscErrorCode ierr; 4128 4129 4130 PetscFunctionBegin; 4131 if (i[0]) { 4132 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4133 } 4134 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4135 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4136 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4137 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4138 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4139 PetscFunctionReturn(0); 4140 } 4141