1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 /* reaccess the b because aij->B was changed */ 388 b = (Mat_SeqAIJ *)aij->B->data; 389 } 390 } 391 392 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 393 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 396 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 397 398 if (aij->rowvalues) { 399 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 400 aij->rowvalues = 0; 401 } 402 403 /* used by MatAXPY() */ 404 a->xtoy = 0; b->xtoy = 0; 405 a->XtoY = 0; b->XtoY = 0; 406 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 412 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 413 { 414 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 419 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatZeroRows_MPIAIJ" 425 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 426 { 427 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 428 PetscErrorCode ierr; 429 int i,N,*rows,*owners = l->rowners,size = l->size; 430 int *nprocs,j,idx,nsends,row; 431 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 432 int *rvalues,tag = A->tag,count,base,slen,n,*source; 433 int *lens,imdex,*lrows,*values,rstart=l->rstart; 434 MPI_Comm comm = A->comm; 435 MPI_Request *send_waits,*recv_waits; 436 MPI_Status recv_status,*send_status; 437 IS istmp; 438 PetscTruth found; 439 440 PetscFunctionBegin; 441 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 443 444 /* first count number of contributors to each processor */ 445 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 446 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 447 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 448 for (i=0; i<N; i++) { 449 idx = rows[i]; 450 found = PETSC_FALSE; 451 for (j=0; j<size; j++) { 452 if (idx >= owners[j] && idx < owners[j+1]) { 453 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 454 } 455 } 456 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 457 } 458 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 459 460 /* inform other processors of number of messages and max length*/ 461 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (nprocs[2*i+1]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 PetscErrorCode ierr; 577 int nt; 578 579 PetscFunctionBegin; 580 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 581 if (nt != A->n) { 582 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 583 } 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatMultAdd_MPIAIJ" 593 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 596 PetscErrorCode ierr; 597 598 PetscFunctionBegin; 599 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 601 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 608 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 609 { 610 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 611 PetscErrorCode ierr; 612 613 PetscFunctionBegin; 614 /* do nondiagonal part */ 615 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 616 /* send it on its way */ 617 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 618 /* do local part */ 619 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 620 /* receive remote parts: note this assumes the values are not actually */ 621 /* inserted in yy until the next line, which is true for my implementation*/ 622 /* but is not perhaps always true. */ 623 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 EXTERN_C_BEGIN 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 630 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 631 { 632 MPI_Comm comm; 633 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 634 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 635 IS Me,Notme; 636 PetscErrorCode ierr; 637 int M,N,first,last,*notme,ntids,i; 638 639 PetscFunctionBegin; 640 641 /* Easy test: symmetric diagonal block */ 642 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 643 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 644 if (!*f) PetscFunctionReturn(0); 645 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 646 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 647 if (ntids==1) PetscFunctionReturn(0); 648 649 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 650 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 651 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 652 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 653 for (i=0; i<first; i++) notme[i] = i; 654 for (i=last; i<M; i++) notme[i-last+first] = i; 655 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 656 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 657 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 658 Aoff = Aoffs[0]; 659 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 660 Boff = Boffs[0]; 661 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 662 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 663 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 664 ierr = ISDestroy(Me);CHKERRQ(ierr); 665 ierr = ISDestroy(Notme);CHKERRQ(ierr); 666 667 PetscFunctionReturn(0); 668 } 669 EXTERN_C_END 670 671 #undef __FUNCT__ 672 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 673 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 676 PetscErrorCode ierr; 677 678 PetscFunctionBegin; 679 /* do nondiagonal part */ 680 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 681 /* send it on its way */ 682 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 683 /* do local part */ 684 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 685 /* receive remote parts: note this assumes the values are not actually */ 686 /* inserted in yy until the next line, which is true for my implementation*/ 687 /* but is not perhaps always true. */ 688 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 /* 693 This only works correctly for square matrices where the subblock A->A is the 694 diagonal block 695 */ 696 #undef __FUNCT__ 697 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 698 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 699 { 700 PetscErrorCode ierr; 701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 702 703 PetscFunctionBegin; 704 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 705 if (a->rstart != a->cstart || a->rend != a->cend) { 706 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 707 } 708 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatScale_MPIAIJ" 714 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 719 PetscFunctionBegin; 720 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 721 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatDestroy_MPIAIJ" 727 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 #if defined(PETSC_USE_LOG) 734 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 735 #endif 736 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 737 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 738 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 739 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 740 #if defined (PETSC_USE_CTABLE) 741 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 742 #else 743 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 744 #endif 745 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 746 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 747 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 748 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 749 ierr = PetscFree(aij);CHKERRQ(ierr); 750 751 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 752 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatView_MPIAIJ_Binary" 762 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 763 { 764 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 765 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 766 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 767 PetscErrorCode ierr; 768 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 769 int fd; 770 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 771 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 772 PetscScalar *column_values; 773 774 PetscFunctionBegin; 775 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 776 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 777 nz = A->nz + B->nz; 778 if (!rank) { 779 header[0] = MAT_FILE_COOKIE; 780 header[1] = mat->M; 781 header[2] = mat->N; 782 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 783 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 784 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 /* get largest number of rows any processor has */ 786 rlen = mat->m; 787 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 788 for (i=1; i<size; i++) { 789 rlen = PetscMax(rlen,range[i+1] - range[i]); 790 } 791 } else { 792 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 793 rlen = mat->m; 794 } 795 796 /* load up the local row counts */ 797 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 798 for (i=0; i<mat->m; i++) { 799 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 800 } 801 802 /* store the row lengths to the file */ 803 if (!rank) { 804 MPI_Status status; 805 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 806 for (i=1; i<size; i++) { 807 rlen = range[i+1] - range[i]; 808 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 809 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 810 } 811 } else { 812 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 813 } 814 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 815 816 /* load up the local column indices */ 817 nzmax = nz; /* )th processor needs space a largest processor needs */ 818 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 819 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 820 cnt = 0; 821 for (i=0; i<mat->m; i++) { 822 for (j=B->i[i]; j<B->i[i+1]; j++) { 823 if ( (col = garray[B->j[j]]) > cstart) break; 824 column_indices[cnt++] = col; 825 } 826 for (k=A->i[i]; k<A->i[i+1]; k++) { 827 column_indices[cnt++] = A->j[k] + cstart; 828 } 829 for (; j<B->i[i+1]; j++) { 830 column_indices[cnt++] = garray[B->j[j]]; 831 } 832 } 833 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 834 835 /* store the column indices to the file */ 836 if (!rank) { 837 MPI_Status status; 838 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 839 for (i=1; i<size; i++) { 840 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 841 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 842 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 843 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 844 } 845 } else { 846 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 847 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 848 } 849 ierr = PetscFree(column_indices);CHKERRQ(ierr); 850 851 /* load up the local column values */ 852 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 853 cnt = 0; 854 for (i=0; i<mat->m; i++) { 855 for (j=B->i[i]; j<B->i[i+1]; j++) { 856 if ( garray[B->j[j]] > cstart) break; 857 column_values[cnt++] = B->a[j]; 858 } 859 for (k=A->i[i]; k<A->i[i+1]; k++) { 860 column_values[cnt++] = A->a[k]; 861 } 862 for (; j<B->i[i+1]; j++) { 863 column_values[cnt++] = B->a[j]; 864 } 865 } 866 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 867 868 /* store the column values to the file */ 869 if (!rank) { 870 MPI_Status status; 871 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 872 for (i=1; i<size; i++) { 873 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 874 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 875 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 876 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 877 } 878 } else { 879 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 880 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 881 } 882 ierr = PetscFree(column_values);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 888 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 PetscMPIInt rank = aij->rank,size = aij->size; 893 PetscTruth isdraw,iascii,flg,isbinary; 894 PetscViewer sviewer; 895 PetscViewerFormat format; 896 897 PetscFunctionBegin; 898 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 899 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 900 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 901 if (iascii) { 902 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 903 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 904 MatInfo info; 905 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 906 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 907 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 908 if (flg) { 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 910 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 911 } else { 912 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 913 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 914 } 915 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 917 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 918 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 919 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 920 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } else if (format == PETSC_VIEWER_ASCII_INFO) { 923 PetscFunctionReturn(0); 924 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 925 PetscFunctionReturn(0); 926 } 927 } else if (isbinary) { 928 if (size == 1) { 929 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 930 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 931 } else { 932 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 933 } 934 PetscFunctionReturn(0); 935 } else if (isdraw) { 936 PetscDraw draw; 937 PetscTruth isnull; 938 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 939 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 940 } 941 942 if (size == 1) { 943 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 944 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 945 } else { 946 /* assemble the entire matrix onto first processor. */ 947 Mat A; 948 Mat_SeqAIJ *Aloc; 949 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 950 PetscScalar *a; 951 952 if (!rank) { 953 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 954 } else { 955 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 956 } 957 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 958 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 959 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 960 PetscLogObjectParent(mat,A); 961 962 /* copy over the A part */ 963 Aloc = (Mat_SeqAIJ*)aij->A->data; 964 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 965 row = aij->rstart; 966 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 967 for (i=0; i<m; i++) { 968 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 969 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 970 } 971 aj = Aloc->j; 972 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 973 974 /* copy over the B part */ 975 Aloc = (Mat_SeqAIJ*)aij->B->data; 976 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 977 row = aij->rstart; 978 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 979 ct = cols; 980 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 981 for (i=0; i<m; i++) { 982 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 983 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 984 } 985 ierr = PetscFree(ct);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 /* 989 Everyone has to call to draw the matrix since the graphics waits are 990 synchronized across all processors that share the PetscDraw object 991 */ 992 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 993 if (!rank) { 994 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 995 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 996 } 997 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 998 ierr = MatDestroy(A);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatView_MPIAIJ" 1005 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1006 { 1007 PetscErrorCode ierr; 1008 PetscTruth iascii,isdraw,issocket,isbinary; 1009 1010 PetscFunctionBegin; 1011 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1012 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1013 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1014 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1015 if (iascii || isdraw || isbinary || issocket) { 1016 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1017 } else { 1018 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatRelax_MPIAIJ" 1027 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1028 { 1029 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1030 PetscErrorCode ierr; 1031 Vec bb1; 1032 PetscScalar mone=-1.0; 1033 1034 PetscFunctionBegin; 1035 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1036 1037 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1038 1039 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1040 if (flag & SOR_ZERO_INITIAL_GUESS) { 1041 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1042 its--; 1043 } 1044 1045 while (its--) { 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 1049 /* update rhs: bb1 = bb - B*x */ 1050 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1051 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1052 1053 /* local sweep */ 1054 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1055 CHKERRQ(ierr); 1056 } 1057 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1058 if (flag & SOR_ZERO_INITIAL_GUESS) { 1059 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1060 its--; 1061 } 1062 while (its--) { 1063 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1064 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 1066 /* update rhs: bb1 = bb - B*x */ 1067 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1069 1070 /* local sweep */ 1071 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1072 CHKERRQ(ierr); 1073 } 1074 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1075 if (flag & SOR_ZERO_INITIAL_GUESS) { 1076 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1077 its--; 1078 } 1079 while (its--) { 1080 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 1083 /* update rhs: bb1 = bb - B*x */ 1084 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1086 1087 /* local sweep */ 1088 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1089 CHKERRQ(ierr); 1090 } 1091 } else { 1092 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1093 } 1094 1095 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1101 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1104 Mat A = mat->A,B = mat->B; 1105 PetscErrorCode ierr; 1106 PetscReal isend[5],irecv[5]; 1107 1108 PetscFunctionBegin; 1109 info->block_size = 1.0; 1110 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1111 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1112 isend[3] = info->memory; isend[4] = info->mallocs; 1113 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1114 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1115 isend[3] += info->memory; isend[4] += info->mallocs; 1116 if (flag == MAT_LOCAL) { 1117 info->nz_used = isend[0]; 1118 info->nz_allocated = isend[1]; 1119 info->nz_unneeded = isend[2]; 1120 info->memory = isend[3]; 1121 info->mallocs = isend[4]; 1122 } else if (flag == MAT_GLOBAL_MAX) { 1123 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1124 info->nz_used = irecv[0]; 1125 info->nz_allocated = irecv[1]; 1126 info->nz_unneeded = irecv[2]; 1127 info->memory = irecv[3]; 1128 info->mallocs = irecv[4]; 1129 } else if (flag == MAT_GLOBAL_SUM) { 1130 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1131 info->nz_used = irecv[0]; 1132 info->nz_allocated = irecv[1]; 1133 info->nz_unneeded = irecv[2]; 1134 info->memory = irecv[3]; 1135 info->mallocs = irecv[4]; 1136 } 1137 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1138 info->fill_ratio_needed = 0; 1139 info->factor_mallocs = 0; 1140 info->rows_global = (double)matin->M; 1141 info->columns_global = (double)matin->N; 1142 info->rows_local = (double)matin->m; 1143 info->columns_local = (double)matin->N; 1144 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatSetOption_MPIAIJ" 1150 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1151 { 1152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 switch (op) { 1157 case MAT_NO_NEW_NONZERO_LOCATIONS: 1158 case MAT_YES_NEW_NONZERO_LOCATIONS: 1159 case MAT_COLUMNS_UNSORTED: 1160 case MAT_COLUMNS_SORTED: 1161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1162 case MAT_KEEP_ZEROED_ROWS: 1163 case MAT_NEW_NONZERO_LOCATION_ERR: 1164 case MAT_USE_INODES: 1165 case MAT_DO_NOT_USE_INODES: 1166 case MAT_IGNORE_ZERO_ENTRIES: 1167 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1168 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1169 break; 1170 case MAT_ROW_ORIENTED: 1171 a->roworiented = PETSC_TRUE; 1172 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1173 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_ROWS_SORTED: 1176 case MAT_ROWS_UNSORTED: 1177 case MAT_YES_NEW_DIAGONALS: 1178 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1179 break; 1180 case MAT_COLUMN_ORIENTED: 1181 a->roworiented = PETSC_FALSE; 1182 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1183 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1184 break; 1185 case MAT_IGNORE_OFF_PROC_ENTRIES: 1186 a->donotstash = PETSC_TRUE; 1187 break; 1188 case MAT_NO_NEW_DIAGONALS: 1189 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1190 case MAT_SYMMETRIC: 1191 case MAT_STRUCTURALLY_SYMMETRIC: 1192 case MAT_HERMITIAN: 1193 case MAT_SYMMETRY_ETERNAL: 1194 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1195 break; 1196 case MAT_NOT_SYMMETRIC: 1197 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1198 case MAT_NOT_HERMITIAN: 1199 case MAT_NOT_SYMMETRY_ETERNAL: 1200 break; 1201 default: 1202 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatGetRow_MPIAIJ" 1209 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1210 { 1211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1213 PetscErrorCode ierr; 1214 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1215 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1216 int *cmap,*idx_p; 1217 1218 PetscFunctionBegin; 1219 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1220 mat->getrowactive = PETSC_TRUE; 1221 1222 if (!mat->rowvalues && (idx || v)) { 1223 /* 1224 allocate enough space to hold information from the longest row. 1225 */ 1226 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1227 int max = 1,tmp; 1228 for (i=0; i<matin->m; i++) { 1229 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1230 if (max < tmp) { max = tmp; } 1231 } 1232 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1233 mat->rowindices = (int*)(mat->rowvalues + max); 1234 } 1235 1236 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1237 lrow = row - rstart; 1238 1239 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1240 if (!v) {pvA = 0; pvB = 0;} 1241 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1242 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1243 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1244 nztot = nzA + nzB; 1245 1246 cmap = mat->garray; 1247 if (v || idx) { 1248 if (nztot) { 1249 /* Sort by increasing column numbers, assuming A and B already sorted */ 1250 int imark = -1; 1251 if (v) { 1252 *v = v_p = mat->rowvalues; 1253 for (i=0; i<nzB; i++) { 1254 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1255 else break; 1256 } 1257 imark = i; 1258 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1259 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1260 } 1261 if (idx) { 1262 *idx = idx_p = mat->rowindices; 1263 if (imark > -1) { 1264 for (i=0; i<imark; i++) { 1265 idx_p[i] = cmap[cworkB[i]]; 1266 } 1267 } else { 1268 for (i=0; i<nzB; i++) { 1269 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1270 else break; 1271 } 1272 imark = i; 1273 } 1274 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1275 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1276 } 1277 } else { 1278 if (idx) *idx = 0; 1279 if (v) *v = 0; 1280 } 1281 } 1282 *nz = nztot; 1283 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1284 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1290 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 1294 PetscFunctionBegin; 1295 if (aij->getrowactive == PETSC_FALSE) { 1296 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1297 } 1298 aij->getrowactive = PETSC_FALSE; 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatNorm_MPIAIJ" 1304 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1308 PetscErrorCode ierr; 1309 int i,j,cstart = aij->cstart; 1310 PetscReal sum = 0.0; 1311 PetscScalar *v; 1312 1313 PetscFunctionBegin; 1314 if (aij->size == 1) { 1315 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1316 } else { 1317 if (type == NORM_FROBENIUS) { 1318 v = amat->a; 1319 for (i=0; i<amat->nz; i++) { 1320 #if defined(PETSC_USE_COMPLEX) 1321 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1322 #else 1323 sum += (*v)*(*v); v++; 1324 #endif 1325 } 1326 v = bmat->a; 1327 for (i=0; i<bmat->nz; i++) { 1328 #if defined(PETSC_USE_COMPLEX) 1329 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1330 #else 1331 sum += (*v)*(*v); v++; 1332 #endif 1333 } 1334 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1335 *norm = sqrt(*norm); 1336 } else if (type == NORM_1) { /* max column norm */ 1337 PetscReal *tmp,*tmp2; 1338 int *jj,*garray = aij->garray; 1339 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1340 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1341 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1342 *norm = 0.0; 1343 v = amat->a; jj = amat->j; 1344 for (j=0; j<amat->nz; j++) { 1345 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1346 } 1347 v = bmat->a; jj = bmat->j; 1348 for (j=0; j<bmat->nz; j++) { 1349 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1350 } 1351 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1352 for (j=0; j<mat->N; j++) { 1353 if (tmp2[j] > *norm) *norm = tmp2[j]; 1354 } 1355 ierr = PetscFree(tmp);CHKERRQ(ierr); 1356 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1357 } else if (type == NORM_INFINITY) { /* max row norm */ 1358 PetscReal ntemp = 0.0; 1359 for (j=0; j<aij->A->m; j++) { 1360 v = amat->a + amat->i[j]; 1361 sum = 0.0; 1362 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1363 sum += PetscAbsScalar(*v); v++; 1364 } 1365 v = bmat->a + bmat->i[j]; 1366 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 if (sum > ntemp) ntemp = sum; 1370 } 1371 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1372 } else { 1373 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1374 } 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatTranspose_MPIAIJ" 1381 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1382 { 1383 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1384 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1385 PetscErrorCode ierr; 1386 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1387 Mat B; 1388 PetscScalar *array; 1389 1390 PetscFunctionBegin; 1391 if (!matout && M != N) { 1392 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1393 } 1394 1395 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1396 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1397 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1398 1399 /* copy over the A part */ 1400 Aloc = (Mat_SeqAIJ*)a->A->data; 1401 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1402 row = a->rstart; 1403 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1407 } 1408 aj = Aloc->j; 1409 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1410 1411 /* copy over the B part */ 1412 Aloc = (Mat_SeqAIJ*)a->B->data; 1413 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1414 row = a->rstart; 1415 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1416 ct = cols; 1417 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1418 for (i=0; i<m; i++) { 1419 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1420 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1421 } 1422 ierr = PetscFree(ct);CHKERRQ(ierr); 1423 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 if (matout) { 1426 *matout = B; 1427 } else { 1428 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1435 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1436 { 1437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1438 Mat a = aij->A,b = aij->B; 1439 PetscErrorCode ierr; 1440 int s1,s2,s3; 1441 1442 PetscFunctionBegin; 1443 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1444 if (rr) { 1445 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1446 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1447 /* Overlap communication with computation. */ 1448 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1449 } 1450 if (ll) { 1451 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1452 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1453 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1454 } 1455 /* scale the diagonal block */ 1456 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1457 1458 if (rr) { 1459 /* Do a scatter end and then right scale the off-diagonal block */ 1460 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1461 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1462 } 1463 1464 PetscFunctionReturn(0); 1465 } 1466 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1470 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1471 { 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 if (!a->rank) { 1477 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1484 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1485 { 1486 PetscFunctionBegin; 1487 *bs = 1; 1488 PetscFunctionReturn(0); 1489 } 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1492 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatEqual_MPIAIJ" 1504 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1505 { 1506 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1507 Mat a,b,c,d; 1508 PetscTruth flg; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 a = matA->A; b = matA->B; 1513 c = matB->A; d = matB->B; 1514 1515 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1516 if (flg == PETSC_TRUE) { 1517 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1518 } 1519 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatCopy_MPIAIJ" 1525 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1526 { 1527 PetscErrorCode ierr; 1528 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1529 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1530 1531 PetscFunctionBegin; 1532 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1533 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1534 /* because of the column compression in the off-processor part of the matrix a->B, 1535 the number of columns in a->B and b->B may be different, hence we cannot call 1536 the MatCopy() directly on the two parts. If need be, we can provide a more 1537 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1538 then copying the submatrices */ 1539 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1540 } else { 1541 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1542 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1543 } 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1549 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1550 { 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #include "petscblaslapack.h" 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatAXPY_MPIAIJ" 1561 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1562 { 1563 PetscErrorCode ierr; 1564 int i; 1565 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1566 PetscBLASInt bnz,one=1; 1567 Mat_SeqAIJ *x,*y; 1568 1569 PetscFunctionBegin; 1570 if (str == SAME_NONZERO_PATTERN) { 1571 x = (Mat_SeqAIJ *)xx->A->data; 1572 y = (Mat_SeqAIJ *)yy->A->data; 1573 bnz = (PetscBLASInt)x->nz; 1574 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1575 x = (Mat_SeqAIJ *)xx->B->data; 1576 y = (Mat_SeqAIJ *)yy->B->data; 1577 bnz = (PetscBLASInt)x->nz; 1578 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1579 } else if (str == SUBSET_NONZERO_PATTERN) { 1580 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1581 1582 x = (Mat_SeqAIJ *)xx->B->data; 1583 y = (Mat_SeqAIJ *)yy->B->data; 1584 if (y->xtoy && y->XtoY != xx->B) { 1585 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1586 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1587 } 1588 if (!y->xtoy) { /* get xtoy */ 1589 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1590 y->XtoY = xx->B; 1591 } 1592 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1593 } else { 1594 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1595 } 1596 PetscFunctionReturn(0); 1597 } 1598 1599 /* -------------------------------------------------------------------*/ 1600 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1601 MatGetRow_MPIAIJ, 1602 MatRestoreRow_MPIAIJ, 1603 MatMult_MPIAIJ, 1604 /* 4*/ MatMultAdd_MPIAIJ, 1605 MatMultTranspose_MPIAIJ, 1606 MatMultTransposeAdd_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 /*10*/ 0, 1611 0, 1612 0, 1613 MatRelax_MPIAIJ, 1614 MatTranspose_MPIAIJ, 1615 /*15*/ MatGetInfo_MPIAIJ, 1616 MatEqual_MPIAIJ, 1617 MatGetDiagonal_MPIAIJ, 1618 MatDiagonalScale_MPIAIJ, 1619 MatNorm_MPIAIJ, 1620 /*20*/ MatAssemblyBegin_MPIAIJ, 1621 MatAssemblyEnd_MPIAIJ, 1622 0, 1623 MatSetOption_MPIAIJ, 1624 MatZeroEntries_MPIAIJ, 1625 /*25*/ MatZeroRows_MPIAIJ, 1626 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1627 MatLUFactorSymbolic_MPIAIJ_TFS, 1628 #else 1629 0, 1630 #endif 1631 0, 1632 0, 1633 0, 1634 /*30*/ MatSetUpPreallocation_MPIAIJ, 1635 0, 1636 0, 1637 0, 1638 0, 1639 /*35*/ MatDuplicate_MPIAIJ, 1640 0, 1641 0, 1642 0, 1643 0, 1644 /*40*/ MatAXPY_MPIAIJ, 1645 MatGetSubMatrices_MPIAIJ, 1646 MatIncreaseOverlap_MPIAIJ, 1647 MatGetValues_MPIAIJ, 1648 MatCopy_MPIAIJ, 1649 /*45*/ MatPrintHelp_MPIAIJ, 1650 MatScale_MPIAIJ, 1651 0, 1652 0, 1653 0, 1654 /*50*/ MatGetBlockSize_MPIAIJ, 1655 0, 1656 0, 1657 0, 1658 0, 1659 /*55*/ MatFDColoringCreate_MPIAIJ, 1660 0, 1661 MatSetUnfactored_MPIAIJ, 1662 0, 1663 0, 1664 /*60*/ MatGetSubMatrix_MPIAIJ, 1665 MatDestroy_MPIAIJ, 1666 MatView_MPIAIJ, 1667 MatGetPetscMaps_Petsc, 1668 0, 1669 /*65*/ 0, 1670 0, 1671 0, 1672 0, 1673 0, 1674 /*70*/ 0, 1675 0, 1676 MatSetColoring_MPIAIJ, 1677 MatSetValuesAdic_MPIAIJ, 1678 MatSetValuesAdifor_MPIAIJ, 1679 /*75*/ 0, 1680 0, 1681 0, 1682 0, 1683 0, 1684 /*80*/ 0, 1685 0, 1686 0, 1687 0, 1688 /*84*/ MatLoad_MPIAIJ, 1689 0, 1690 0, 1691 0, 1692 0, 1693 0, 1694 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1695 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1696 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1697 MatPtAP_MPIAIJ_MPIAIJ, 1698 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1699 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1700 0, 1701 0, 1702 0}; 1703 1704 /* ----------------------------------------------------------------------------------------*/ 1705 1706 EXTERN_C_BEGIN 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1709 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1710 { 1711 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1716 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 EXTERN_C_BEGIN 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1724 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1725 { 1726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1727 PetscErrorCode ierr; 1728 1729 PetscFunctionBegin; 1730 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1731 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 EXTERN_C_END 1735 1736 #include "petscpc.h" 1737 EXTERN_C_BEGIN 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1740 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1741 { 1742 Mat_MPIAIJ *b; 1743 PetscErrorCode ierr; 1744 int i; 1745 1746 PetscFunctionBegin; 1747 B->preallocated = PETSC_TRUE; 1748 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1749 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1750 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1751 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1752 if (d_nnz) { 1753 for (i=0; i<B->m; i++) { 1754 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1755 } 1756 } 1757 if (o_nnz) { 1758 for (i=0; i<B->m; i++) { 1759 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1760 } 1761 } 1762 b = (Mat_MPIAIJ*)B->data; 1763 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1764 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1765 1766 PetscFunctionReturn(0); 1767 } 1768 EXTERN_C_END 1769 1770 /*MC 1771 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1772 1773 Options Database Keys: 1774 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1775 1776 Level: beginner 1777 1778 .seealso: MatCreateMPIAIJ 1779 M*/ 1780 1781 EXTERN_C_BEGIN 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatCreate_MPIAIJ" 1784 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1785 { 1786 Mat_MPIAIJ *b; 1787 PetscErrorCode ierr; 1788 int i,size; 1789 1790 PetscFunctionBegin; 1791 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1792 1793 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1794 B->data = (void*)b; 1795 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1796 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1797 B->factor = 0; 1798 B->assembled = PETSC_FALSE; 1799 B->mapping = 0; 1800 1801 B->insertmode = NOT_SET_VALUES; 1802 b->size = size; 1803 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1804 1805 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1806 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1807 1808 /* the information in the maps duplicates the information computed below, eventually 1809 we should remove the duplicate information that is not contained in the maps */ 1810 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1811 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1812 1813 /* build local table of row and column ownerships */ 1814 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1815 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1816 b->cowners = b->rowners + b->size + 2; 1817 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1818 b->rowners[0] = 0; 1819 for (i=2; i<=b->size; i++) { 1820 b->rowners[i] += b->rowners[i-1]; 1821 } 1822 b->rstart = b->rowners[b->rank]; 1823 b->rend = b->rowners[b->rank+1]; 1824 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1825 b->cowners[0] = 0; 1826 for (i=2; i<=b->size; i++) { 1827 b->cowners[i] += b->cowners[i-1]; 1828 } 1829 b->cstart = b->cowners[b->rank]; 1830 b->cend = b->cowners[b->rank+1]; 1831 1832 /* build cache for off array entries formed */ 1833 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1834 b->donotstash = PETSC_FALSE; 1835 b->colmap = 0; 1836 b->garray = 0; 1837 b->roworiented = PETSC_TRUE; 1838 1839 /* stuff used for matrix vector multiply */ 1840 b->lvec = PETSC_NULL; 1841 b->Mvctx = PETSC_NULL; 1842 1843 /* stuff for MatGetRow() */ 1844 b->rowindices = 0; 1845 b->rowvalues = 0; 1846 b->getrowactive = PETSC_FALSE; 1847 1848 /* Explicitly create 2 MATSEQAIJ matrices. */ 1849 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1850 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1851 PetscLogObjectParent(B,b->A); 1852 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1853 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1854 PetscLogObjectParent(B,b->B); 1855 1856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1857 "MatStoreValues_MPIAIJ", 1858 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1860 "MatRetrieveValues_MPIAIJ", 1861 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1863 "MatGetDiagonalBlock_MPIAIJ", 1864 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1866 "MatIsTranspose_MPIAIJ", 1867 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1868 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1869 "MatMPIAIJSetPreallocation_MPIAIJ", 1870 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1871 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1872 "MatDiagonalScaleLocal_MPIAIJ", 1873 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 /*MC 1879 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1880 1881 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1882 and MATMPIAIJ otherwise. As a result, for single process communicators, 1883 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1884 for communicators controlling multiple processes. It is recommended that you call both of 1885 the above preallocation routines for simplicity. 1886 1887 Options Database Keys: 1888 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1889 1890 Level: beginner 1891 1892 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1893 M*/ 1894 1895 EXTERN_C_BEGIN 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatCreate_AIJ" 1898 PetscErrorCode MatCreate_AIJ(Mat A) 1899 { 1900 PetscErrorCode ierr; 1901 int size; 1902 1903 PetscFunctionBegin; 1904 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1905 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1906 if (size == 1) { 1907 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1908 } else { 1909 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 EXTERN_C_END 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1917 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1918 { 1919 Mat mat; 1920 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 *newmat = 0; 1925 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1926 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1927 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1928 a = (Mat_MPIAIJ*)mat->data; 1929 1930 mat->factor = matin->factor; 1931 mat->assembled = PETSC_TRUE; 1932 mat->insertmode = NOT_SET_VALUES; 1933 mat->preallocated = PETSC_TRUE; 1934 1935 a->rstart = oldmat->rstart; 1936 a->rend = oldmat->rend; 1937 a->cstart = oldmat->cstart; 1938 a->cend = oldmat->cend; 1939 a->size = oldmat->size; 1940 a->rank = oldmat->rank; 1941 a->donotstash = oldmat->donotstash; 1942 a->roworiented = oldmat->roworiented; 1943 a->rowindices = 0; 1944 a->rowvalues = 0; 1945 a->getrowactive = PETSC_FALSE; 1946 1947 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1948 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1949 if (oldmat->colmap) { 1950 #if defined (PETSC_USE_CTABLE) 1951 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1952 #else 1953 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1954 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1955 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1956 #endif 1957 } else a->colmap = 0; 1958 if (oldmat->garray) { 1959 int len; 1960 len = oldmat->B->n; 1961 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1962 PetscLogObjectMemory(mat,len*sizeof(int)); 1963 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1964 } else a->garray = 0; 1965 1966 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1967 PetscLogObjectParent(mat,a->lvec); 1968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1969 PetscLogObjectParent(mat,a->Mvctx); 1970 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1971 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1972 PetscLogObjectParent(mat,a->A); 1973 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1974 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1975 PetscLogObjectParent(mat,a->B); 1976 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1977 *newmat = mat; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #include "petscsys.h" 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatLoad_MPIAIJ" 1985 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1986 { 1987 Mat A; 1988 PetscScalar *vals,*svals; 1989 MPI_Comm comm = ((PetscObject)viewer)->comm; 1990 MPI_Status status; 1991 PetscErrorCode ierr; 1992 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1993 int i,nz,j,rstart,rend,fd; 1994 int header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1995 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1996 int cend,cstart,n; 1997 1998 PetscFunctionBegin; 1999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2000 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2001 if (!rank) { 2002 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2003 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2004 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2005 if (header[3] < 0) { 2006 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 2007 } 2008 } 2009 2010 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2011 M = header[1]; N = header[2]; 2012 /* determine ownership of all rows */ 2013 m = M/size + ((M % size) > rank); 2014 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2015 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2016 rowners[0] = 0; 2017 for (i=2; i<=size; i++) { 2018 rowners[i] += rowners[i-1]; 2019 } 2020 rstart = rowners[rank]; 2021 rend = rowners[rank+1]; 2022 2023 /* distribute row lengths to all processors */ 2024 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2025 offlens = ourlens + (rend-rstart); 2026 if (!rank) { 2027 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2028 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2029 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2030 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2031 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2032 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2033 } else { 2034 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2035 } 2036 2037 if (!rank) { 2038 /* calculate the number of nonzeros on each processor */ 2039 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2040 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2041 for (i=0; i<size; i++) { 2042 for (j=rowners[i]; j< rowners[i+1]; j++) { 2043 procsnz[i] += rowlengths[j]; 2044 } 2045 } 2046 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2047 2048 /* determine max buffer needed and allocate it */ 2049 maxnz = 0; 2050 for (i=0; i<size; i++) { 2051 maxnz = PetscMax(maxnz,procsnz[i]); 2052 } 2053 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2054 2055 /* read in my part of the matrix column indices */ 2056 nz = procsnz[0]; 2057 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2058 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2059 2060 /* read in every one elses and ship off */ 2061 for (i=1; i<size; i++) { 2062 nz = procsnz[i]; 2063 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2064 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2065 } 2066 ierr = PetscFree(cols);CHKERRQ(ierr); 2067 } else { 2068 /* determine buffer space needed for message */ 2069 nz = 0; 2070 for (i=0; i<m; i++) { 2071 nz += ourlens[i]; 2072 } 2073 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2074 2075 /* receive message of column indices*/ 2076 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2077 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2078 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2079 } 2080 2081 /* determine column ownership if matrix is not square */ 2082 if (N != M) { 2083 n = N/size + ((N % size) > rank); 2084 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2085 cstart = cend - n; 2086 } else { 2087 cstart = rstart; 2088 cend = rend; 2089 n = cend - cstart; 2090 } 2091 2092 /* loop over local rows, determining number of off diagonal entries */ 2093 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2094 jj = 0; 2095 for (i=0; i<m; i++) { 2096 for (j=0; j<ourlens[i]; j++) { 2097 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2098 jj++; 2099 } 2100 } 2101 2102 /* create our matrix */ 2103 for (i=0; i<m; i++) { 2104 ourlens[i] -= offlens[i]; 2105 } 2106 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2107 ierr = MatSetType(A,type);CHKERRQ(ierr); 2108 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2109 2110 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2111 for (i=0; i<m; i++) { 2112 ourlens[i] += offlens[i]; 2113 } 2114 2115 if (!rank) { 2116 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2117 2118 /* read in my part of the matrix numerical values */ 2119 nz = procsnz[0]; 2120 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2121 2122 /* insert into matrix */ 2123 jj = rstart; 2124 smycols = mycols; 2125 svals = vals; 2126 for (i=0; i<m; i++) { 2127 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2128 smycols += ourlens[i]; 2129 svals += ourlens[i]; 2130 jj++; 2131 } 2132 2133 /* read in other processors and ship out */ 2134 for (i=1; i<size; i++) { 2135 nz = procsnz[i]; 2136 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2137 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2138 } 2139 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2140 } else { 2141 /* receive numeric values */ 2142 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2143 2144 /* receive message of values*/ 2145 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2146 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2147 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2148 2149 /* insert into matrix */ 2150 jj = rstart; 2151 smycols = mycols; 2152 svals = vals; 2153 for (i=0; i<m; i++) { 2154 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2155 smycols += ourlens[i]; 2156 svals += ourlens[i]; 2157 jj++; 2158 } 2159 } 2160 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2161 ierr = PetscFree(vals);CHKERRQ(ierr); 2162 ierr = PetscFree(mycols);CHKERRQ(ierr); 2163 ierr = PetscFree(rowners);CHKERRQ(ierr); 2164 2165 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2167 *newmat = A; 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2173 /* 2174 Not great since it makes two copies of the submatrix, first an SeqAIJ 2175 in local and then by concatenating the local matrices the end result. 2176 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2177 */ 2178 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2179 { 2180 PetscErrorCode ierr; 2181 PetscMPIInt rank,size; 2182 int i,m,n,rstart,row,rend,nz,*cwork,j; 2183 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2184 Mat *local,M,Mreuse; 2185 PetscScalar *vwork,*aa; 2186 MPI_Comm comm = mat->comm; 2187 Mat_SeqAIJ *aij; 2188 2189 2190 PetscFunctionBegin; 2191 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2192 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2193 2194 if (call == MAT_REUSE_MATRIX) { 2195 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2196 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2197 local = &Mreuse; 2198 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2199 } else { 2200 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2201 Mreuse = *local; 2202 ierr = PetscFree(local);CHKERRQ(ierr); 2203 } 2204 2205 /* 2206 m - number of local rows 2207 n - number of columns (same on all processors) 2208 rstart - first row in new global matrix generated 2209 */ 2210 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2211 if (call == MAT_INITIAL_MATRIX) { 2212 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2213 ii = aij->i; 2214 jj = aij->j; 2215 2216 /* 2217 Determine the number of non-zeros in the diagonal and off-diagonal 2218 portions of the matrix in order to do correct preallocation 2219 */ 2220 2221 /* first get start and end of "diagonal" columns */ 2222 if (csize == PETSC_DECIDE) { 2223 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2224 if (mglobal == n) { /* square matrix */ 2225 nlocal = m; 2226 } else { 2227 nlocal = n/size + ((n % size) > rank); 2228 } 2229 } else { 2230 nlocal = csize; 2231 } 2232 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2233 rstart = rend - nlocal; 2234 if (rank == size - 1 && rend != n) { 2235 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2236 } 2237 2238 /* next, compute all the lengths */ 2239 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2240 olens = dlens + m; 2241 for (i=0; i<m; i++) { 2242 jend = ii[i+1] - ii[i]; 2243 olen = 0; 2244 dlen = 0; 2245 for (j=0; j<jend; j++) { 2246 if (*jj < rstart || *jj >= rend) olen++; 2247 else dlen++; 2248 jj++; 2249 } 2250 olens[i] = olen; 2251 dlens[i] = dlen; 2252 } 2253 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2254 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2255 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2256 ierr = PetscFree(dlens);CHKERRQ(ierr); 2257 } else { 2258 int ml,nl; 2259 2260 M = *newmat; 2261 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2262 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2263 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2264 /* 2265 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2266 rather than the slower MatSetValues(). 2267 */ 2268 M->was_assembled = PETSC_TRUE; 2269 M->assembled = PETSC_FALSE; 2270 } 2271 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2272 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2273 ii = aij->i; 2274 jj = aij->j; 2275 aa = aij->a; 2276 for (i=0; i<m; i++) { 2277 row = rstart + i; 2278 nz = ii[i+1] - ii[i]; 2279 cwork = jj; jj += nz; 2280 vwork = aa; aa += nz; 2281 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2282 } 2283 2284 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2285 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 *newmat = M; 2287 2288 /* save submatrix used in processor for next request */ 2289 if (call == MAT_INITIAL_MATRIX) { 2290 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2291 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2292 } 2293 2294 PetscFunctionReturn(0); 2295 } 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2299 /*@C 2300 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2301 (the default parallel PETSc format). For good matrix assembly performance 2302 the user should preallocate the matrix storage by setting the parameters 2303 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2304 performance can be increased by more than a factor of 50. 2305 2306 Collective on MPI_Comm 2307 2308 Input Parameters: 2309 + A - the matrix 2310 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2311 (same value is used for all local rows) 2312 . d_nnz - array containing the number of nonzeros in the various rows of the 2313 DIAGONAL portion of the local submatrix (possibly different for each row) 2314 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2315 The size of this array is equal to the number of local rows, i.e 'm'. 2316 You must leave room for the diagonal entry even if it is zero. 2317 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2318 submatrix (same value is used for all local rows). 2319 - o_nnz - array containing the number of nonzeros in the various rows of the 2320 OFF-DIAGONAL portion of the local submatrix (possibly different for 2321 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2322 structure. The size of this array is equal to the number 2323 of local rows, i.e 'm'. 2324 2325 The AIJ format (also called the Yale sparse matrix format or 2326 compressed row storage), is fully compatible with standard Fortran 77 2327 storage. That is, the stored row and column indices can begin at 2328 either one (as in Fortran) or zero. See the users manual for details. 2329 2330 The user MUST specify either the local or global matrix dimensions 2331 (possibly both). 2332 2333 The parallel matrix is partitioned such that the first m0 rows belong to 2334 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2335 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2336 2337 The DIAGONAL portion of the local submatrix of a processor can be defined 2338 as the submatrix which is obtained by extraction the part corresponding 2339 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2340 first row that belongs to the processor, and r2 is the last row belonging 2341 to the this processor. This is a square mxm matrix. The remaining portion 2342 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2343 2344 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2345 2346 By default, this format uses inodes (identical nodes) when possible. 2347 We search for consecutive rows with the same nonzero structure, thereby 2348 reusing matrix information to achieve increased efficiency. 2349 2350 Options Database Keys: 2351 + -mat_aij_no_inode - Do not use inodes 2352 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2353 - -mat_aij_oneindex - Internally use indexing starting at 1 2354 rather than 0. Note that when calling MatSetValues(), 2355 the user still MUST index entries starting at 0! 2356 2357 Example usage: 2358 2359 Consider the following 8x8 matrix with 34 non-zero values, that is 2360 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2361 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2362 as follows: 2363 2364 .vb 2365 1 2 0 | 0 3 0 | 0 4 2366 Proc0 0 5 6 | 7 0 0 | 8 0 2367 9 0 10 | 11 0 0 | 12 0 2368 ------------------------------------- 2369 13 0 14 | 15 16 17 | 0 0 2370 Proc1 0 18 0 | 19 20 21 | 0 0 2371 0 0 0 | 22 23 0 | 24 0 2372 ------------------------------------- 2373 Proc2 25 26 27 | 0 0 28 | 29 0 2374 30 0 0 | 31 32 33 | 0 34 2375 .ve 2376 2377 This can be represented as a collection of submatrices as: 2378 2379 .vb 2380 A B C 2381 D E F 2382 G H I 2383 .ve 2384 2385 Where the submatrices A,B,C are owned by proc0, D,E,F are 2386 owned by proc1, G,H,I are owned by proc2. 2387 2388 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2389 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2390 The 'M','N' parameters are 8,8, and have the same values on all procs. 2391 2392 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2393 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2394 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2395 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2396 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2397 matrix, ans [DF] as another SeqAIJ matrix. 2398 2399 When d_nz, o_nz parameters are specified, d_nz storage elements are 2400 allocated for every row of the local diagonal submatrix, and o_nz 2401 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2402 One way to choose d_nz and o_nz is to use the max nonzerors per local 2403 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2404 In this case, the values of d_nz,o_nz are: 2405 .vb 2406 proc0 : dnz = 2, o_nz = 2 2407 proc1 : dnz = 3, o_nz = 2 2408 proc2 : dnz = 1, o_nz = 4 2409 .ve 2410 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2411 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2412 for proc3. i.e we are using 12+15+10=37 storage locations to store 2413 34 values. 2414 2415 When d_nnz, o_nnz parameters are specified, the storage is specified 2416 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2417 In the above case the values for d_nnz,o_nnz are: 2418 .vb 2419 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2420 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2421 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2422 .ve 2423 Here the space allocated is sum of all the above values i.e 34, and 2424 hence pre-allocation is perfect. 2425 2426 Level: intermediate 2427 2428 .keywords: matrix, aij, compressed row, sparse, parallel 2429 2430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2431 @*/ 2432 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2433 { 2434 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2435 2436 PetscFunctionBegin; 2437 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2438 if (f) { 2439 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2440 } 2441 PetscFunctionReturn(0); 2442 } 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "MatCreateMPIAIJ" 2446 /*@C 2447 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2448 (the default parallel PETSc format). For good matrix assembly performance 2449 the user should preallocate the matrix storage by setting the parameters 2450 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2451 performance can be increased by more than a factor of 50. 2452 2453 Collective on MPI_Comm 2454 2455 Input Parameters: 2456 + comm - MPI communicator 2457 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2458 This value should be the same as the local size used in creating the 2459 y vector for the matrix-vector product y = Ax. 2460 . n - This value should be the same as the local size used in creating the 2461 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2462 calculated if N is given) For square matrices n is almost always m. 2463 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2464 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2465 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2466 (same value is used for all local rows) 2467 . d_nnz - array containing the number of nonzeros in the various rows of the 2468 DIAGONAL portion of the local submatrix (possibly different for each row) 2469 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2470 The size of this array is equal to the number of local rows, i.e 'm'. 2471 You must leave room for the diagonal entry even if it is zero. 2472 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2473 submatrix (same value is used for all local rows). 2474 - o_nnz - array containing the number of nonzeros in the various rows of the 2475 OFF-DIAGONAL portion of the local submatrix (possibly different for 2476 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2477 structure. The size of this array is equal to the number 2478 of local rows, i.e 'm'. 2479 2480 Output Parameter: 2481 . A - the matrix 2482 2483 Notes: 2484 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2485 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2486 storage requirements for this matrix. 2487 2488 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2489 processor than it must be used on all processors that share the object for 2490 that argument. 2491 2492 The AIJ format (also called the Yale sparse matrix format or 2493 compressed row storage), is fully compatible with standard Fortran 77 2494 storage. That is, the stored row and column indices can begin at 2495 either one (as in Fortran) or zero. See the users manual for details. 2496 2497 The user MUST specify either the local or global matrix dimensions 2498 (possibly both). 2499 2500 The parallel matrix is partitioned such that the first m0 rows belong to 2501 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2502 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2503 2504 The DIAGONAL portion of the local submatrix of a processor can be defined 2505 as the submatrix which is obtained by extraction the part corresponding 2506 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2507 first row that belongs to the processor, and r2 is the last row belonging 2508 to the this processor. This is a square mxm matrix. The remaining portion 2509 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2510 2511 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2512 2513 When calling this routine with a single process communicator, a matrix of 2514 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2515 type of communicator, use the construction mechanism: 2516 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2517 2518 By default, this format uses inodes (identical nodes) when possible. 2519 We search for consecutive rows with the same nonzero structure, thereby 2520 reusing matrix information to achieve increased efficiency. 2521 2522 Options Database Keys: 2523 + -mat_aij_no_inode - Do not use inodes 2524 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2525 - -mat_aij_oneindex - Internally use indexing starting at 1 2526 rather than 0. Note that when calling MatSetValues(), 2527 the user still MUST index entries starting at 0! 2528 2529 2530 Example usage: 2531 2532 Consider the following 8x8 matrix with 34 non-zero values, that is 2533 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2534 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2535 as follows: 2536 2537 .vb 2538 1 2 0 | 0 3 0 | 0 4 2539 Proc0 0 5 6 | 7 0 0 | 8 0 2540 9 0 10 | 11 0 0 | 12 0 2541 ------------------------------------- 2542 13 0 14 | 15 16 17 | 0 0 2543 Proc1 0 18 0 | 19 20 21 | 0 0 2544 0 0 0 | 22 23 0 | 24 0 2545 ------------------------------------- 2546 Proc2 25 26 27 | 0 0 28 | 29 0 2547 30 0 0 | 31 32 33 | 0 34 2548 .ve 2549 2550 This can be represented as a collection of submatrices as: 2551 2552 .vb 2553 A B C 2554 D E F 2555 G H I 2556 .ve 2557 2558 Where the submatrices A,B,C are owned by proc0, D,E,F are 2559 owned by proc1, G,H,I are owned by proc2. 2560 2561 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2562 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2563 The 'M','N' parameters are 8,8, and have the same values on all procs. 2564 2565 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2566 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2567 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2568 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2569 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2570 matrix, ans [DF] as another SeqAIJ matrix. 2571 2572 When d_nz, o_nz parameters are specified, d_nz storage elements are 2573 allocated for every row of the local diagonal submatrix, and o_nz 2574 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2575 One way to choose d_nz and o_nz is to use the max nonzerors per local 2576 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2577 In this case, the values of d_nz,o_nz are: 2578 .vb 2579 proc0 : dnz = 2, o_nz = 2 2580 proc1 : dnz = 3, o_nz = 2 2581 proc2 : dnz = 1, o_nz = 4 2582 .ve 2583 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2584 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2585 for proc3. i.e we are using 12+15+10=37 storage locations to store 2586 34 values. 2587 2588 When d_nnz, o_nnz parameters are specified, the storage is specified 2589 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2590 In the above case the values for d_nnz,o_nnz are: 2591 .vb 2592 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2593 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2594 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2595 .ve 2596 Here the space allocated is sum of all the above values i.e 34, and 2597 hence pre-allocation is perfect. 2598 2599 Level: intermediate 2600 2601 .keywords: matrix, aij, compressed row, sparse, parallel 2602 2603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2604 @*/ 2605 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2606 { 2607 PetscErrorCode ierr; 2608 int size; 2609 2610 PetscFunctionBegin; 2611 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2612 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2613 if (size > 1) { 2614 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2615 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2616 } else { 2617 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2618 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2619 } 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2625 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2626 { 2627 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2628 PetscFunctionBegin; 2629 *Ad = a->A; 2630 *Ao = a->B; 2631 *colmap = a->garray; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2637 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2638 { 2639 PetscErrorCode ierr; 2640 int i; 2641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2642 2643 PetscFunctionBegin; 2644 if (coloring->ctype == IS_COLORING_LOCAL) { 2645 ISColoringValue *allcolors,*colors; 2646 ISColoring ocoloring; 2647 2648 /* set coloring for diagonal portion */ 2649 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2650 2651 /* set coloring for off-diagonal portion */ 2652 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2653 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2654 for (i=0; i<a->B->n; i++) { 2655 colors[i] = allcolors[a->garray[i]]; 2656 } 2657 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2658 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2659 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2660 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2661 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2662 ISColoringValue *colors; 2663 int *larray; 2664 ISColoring ocoloring; 2665 2666 /* set coloring for diagonal portion */ 2667 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2668 for (i=0; i<a->A->n; i++) { 2669 larray[i] = i + a->cstart; 2670 } 2671 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2672 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2673 for (i=0; i<a->A->n; i++) { 2674 colors[i] = coloring->colors[larray[i]]; 2675 } 2676 ierr = PetscFree(larray);CHKERRQ(ierr); 2677 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2678 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2679 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2680 2681 /* set coloring for off-diagonal portion */ 2682 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2683 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2684 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2685 for (i=0; i<a->B->n; i++) { 2686 colors[i] = coloring->colors[larray[i]]; 2687 } 2688 ierr = PetscFree(larray);CHKERRQ(ierr); 2689 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2690 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2691 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2692 } else { 2693 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2694 } 2695 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2701 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2702 { 2703 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2708 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2714 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2721 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "MatMerge" 2727 /*@C 2728 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2729 matrices from each processor 2730 2731 Collective on MPI_Comm 2732 2733 Input Parameters: 2734 + comm - the communicators the parallel matrix will live on 2735 . inmat - the input sequential matrices 2736 . n - number of local columns (or PETSC_DECIDE) 2737 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2738 2739 Output Parameter: 2740 . outmat - the parallel matrix generated 2741 2742 Level: advanced 2743 2744 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2745 2746 @*/ 2747 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2748 { 2749 PetscErrorCode ierr; 2750 int m,N,i,rstart,nnz,I,*dnz,*onz; 2751 const int *indx; 2752 const PetscScalar *values; 2753 PetscMap columnmap,rowmap; 2754 2755 PetscFunctionBegin; 2756 2757 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2758 /* 2759 PetscMPIInt rank; 2760 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2761 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2762 */ 2763 if (scall == MAT_INITIAL_MATRIX){ 2764 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2765 if (n == PETSC_DECIDE){ 2766 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2767 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2768 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2769 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2770 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2771 } 2772 2773 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2774 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2775 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2776 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2777 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2778 2779 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2780 for (i=0;i<m;i++) { 2781 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2782 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2783 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2784 } 2785 /* This routine will ONLY return MPIAIJ type matrix */ 2786 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2787 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2788 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2789 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2790 2791 } else if (scall == MAT_REUSE_MATRIX){ 2792 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2793 } else { 2794 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2795 } 2796 2797 for (i=0;i<m;i++) { 2798 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2799 I = i + rstart; 2800 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2801 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2802 } 2803 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2804 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2805 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2806 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #undef __FUNCT__ 2811 #define __FUNCT__ "MatFileSplit" 2812 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2813 { 2814 PetscErrorCode ierr; 2815 PetscMPIInt rank; 2816 int m,N,i,rstart,nnz; 2817 size_t len; 2818 const int *indx; 2819 PetscViewer out; 2820 char *name; 2821 Mat B; 2822 const PetscScalar *values; 2823 2824 PetscFunctionBegin; 2825 2826 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2827 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2828 /* Should this be the type of the diagonal block of A? */ 2829 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2830 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2831 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2832 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2833 for (i=0;i<m;i++) { 2834 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2835 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2836 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2837 } 2838 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 2841 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2842 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2843 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2844 sprintf(name,"%s.%d",outfile,rank); 2845 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2846 ierr = PetscFree(name); 2847 ierr = MatView(B,out);CHKERRQ(ierr); 2848 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2849 ierr = MatDestroy(B);CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2856 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2857 { 2858 PetscErrorCode ierr; 2859 Mat_Merge_SeqsToMPI *merge; 2860 PetscObjectContainer container; 2861 2862 PetscFunctionBegin; 2863 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2864 if (container) { 2865 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2866 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2867 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2868 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2869 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2870 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2871 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2872 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2873 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2874 ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr); 2875 2876 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2877 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2878 } 2879 ierr = PetscFree(merge);CHKERRQ(ierr); 2880 2881 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #include "src/mat/utils/freespace.h" 2886 #include "petscbt.h" 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatMerge_SeqsToMPI" 2889 /*@C 2890 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2891 matrices from each processor 2892 2893 Collective on MPI_Comm 2894 2895 Input Parameters: 2896 + comm - the communicators the parallel matrix will live on 2897 . seqmat - the input sequential matrices 2898 . m - number of local rows (or PETSC_DECIDE) 2899 . n - number of local columns (or PETSC_DECIDE) 2900 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2901 2902 Output Parameter: 2903 . mpimat - the parallel matrix generated 2904 2905 Level: advanced 2906 2907 Notes: 2908 The dimensions of the sequential matrix in each processor MUST be the same. 2909 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2910 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2911 @*/ 2912 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2913 { 2914 PetscErrorCode ierr; 2915 MPI_Comm comm=mpimat->comm; 2916 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2917 PetscMPIInt size,rank; 2918 int N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2919 int *len_s,proc,m; 2920 int taga,**buf_ri,**buf_rj; 2921 int k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2922 int nrows,**buf_ri_k,**nextrow,**nextai; 2923 MPI_Request *s_waits,*r_waits; 2924 MPI_Status *status; 2925 MatScalar *aa=a->a,**abuf_r,*ba_i; 2926 Mat_Merge_SeqsToMPI *merge; 2927 PetscObjectContainer container; 2928 2929 PetscFunctionBegin; 2930 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2931 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2932 2933 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2934 if (container) { 2935 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2936 } 2937 bi = merge->bi; 2938 bj = merge->bj; 2939 buf_ri = merge->buf_ri; 2940 buf_rj = merge->buf_rj; 2941 2942 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2943 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2944 len_s = merge->len_s; 2945 2946 /* send and recv matrix values */ 2947 /*-----------------------------*/ 2948 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2949 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2950 2951 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2952 for (proc=0,k=0; proc<size; proc++){ 2953 if (!len_s[proc]) continue; 2954 i = owners[proc]; 2955 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2956 k++; 2957 } 2958 2959 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2960 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2961 ierr = PetscFree(status);CHKERRQ(ierr); 2962 2963 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2964 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2965 2966 /* insert mat values of mpimat */ 2967 /*----------------------------*/ 2968 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2969 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(int**),&buf_ri_k);CHKERRQ(ierr); 2970 nextrow = buf_ri_k + merge->nrecv; 2971 nextai = nextrow + merge->nrecv; 2972 2973 for (k=0; k<merge->nrecv; k++){ 2974 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2975 nrows = *(buf_ri_k[k]); 2976 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2977 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2978 } 2979 2980 /* set values of ba */ 2981 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2982 for (i=0; i<m; i++) { 2983 arow = owners[rank] + i; 2984 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2985 bnzi = bi[i+1] - bi[i]; 2986 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2987 2988 /* add local non-zero vals of this proc's seqmat into ba */ 2989 anzi = ai[arow+1] - ai[arow]; 2990 aj = a->j + ai[arow]; 2991 aa = a->a + ai[arow]; 2992 nextaj = 0; 2993 for (j=0; nextaj<anzi; j++){ 2994 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2995 ba_i[j] += aa[nextaj++]; 2996 } 2997 } 2998 2999 /* add received vals into ba */ 3000 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3001 /* i-th row */ 3002 if (i == *nextrow[k]) { 3003 anzi = *(nextai[k]+1) - *nextai[k]; 3004 aj = buf_rj[k] + *(nextai[k]); 3005 aa = abuf_r[k] + *(nextai[k]); 3006 nextaj = 0; 3007 for (j=0; nextaj<anzi; j++){ 3008 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3009 ba_i[j] += aa[nextaj++]; 3010 } 3011 } 3012 nextrow[k]++; nextai[k]++; 3013 } 3014 } 3015 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3016 } 3017 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3018 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3019 3020 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3021 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3022 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3023 3024 PetscFunctionReturn(0); 3025 } 3026 3027 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3028 { 3029 PetscErrorCode ierr; 3030 Mat B_seq,B_mpi; 3031 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3032 PetscMPIInt size,rank; 3033 int M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3034 int len,*len_s,proc; 3035 int tagi,tagj,*len_si,*len_ri,**buf_ri,**buf_rj; 3036 int k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3037 int nrows,*buf_s,*buf_si,*buf_si_i,**buf_ri_k,**nextrow,**nextai; 3038 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3039 MPI_Status *status; 3040 MatScalar *ba; 3041 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3042 PetscBT lnkbt; 3043 Mat_Merge_SeqsToMPI *merge; 3044 PetscObjectContainer container; 3045 3046 PetscFunctionBegin; 3047 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3048 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3049 3050 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3051 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3052 3053 /* determine the number of messages to send, their lengths */ 3054 /*---------------------------------------------------------*/ 3055 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3056 if (m == PETSC_DECIDE) { 3057 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3058 } else { 3059 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3060 } 3061 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3062 ierr = PetscMalloc(size*sizeof(int),&len_si);CHKERRQ(ierr); 3063 ierr = PetscMalloc(size*sizeof(int),&merge->len_s);CHKERRQ(ierr); 3064 3065 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3066 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3067 len_s = merge->len_s; 3068 3069 len = 0; /* length of buf_si[] */ 3070 merge->nsend = 0; 3071 for (proc=0; proc<size; proc++){ 3072 len_si[proc] = 0; 3073 if (proc == rank){ 3074 len_si[proc] = len_s[proc] = 0; 3075 } else { 3076 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3077 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3078 } 3079 if (len_s[proc]) { 3080 merge->nsend++; 3081 nrows = 0; 3082 for (i=owners[proc]; i<owners[proc+1]; i++){ 3083 if (ai[i+1] > ai[i]) nrows++; 3084 } 3085 len_si[proc] = 2*(nrows+1); 3086 len += len_si[proc]; 3087 } 3088 } 3089 3090 /* determine the number and length of messages to receive for ij-structure */ 3091 /*-------------------------------------------------------------------------*/ 3092 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3093 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3094 3095 /* post the Irecv of j-structure */ 3096 /*-------------------------------*/ 3097 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3098 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3099 3100 /* post the Isend of j-structure */ 3101 /*--------------------------------*/ 3102 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3103 sj_waits = si_waits + merge->nsend; 3104 3105 for (proc=0, k=0; proc<size; proc++){ 3106 if (!len_s[proc]) continue; 3107 i = owners[proc]; 3108 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPI_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3109 k++; 3110 } 3111 3112 /* receives and sends of j-structure are complete */ 3113 /*------------------------------------------------*/ 3114 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3115 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3116 3117 /* send and recv i-structure */ 3118 /*---------------------------*/ 3119 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3120 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3121 3122 ierr = PetscMalloc((len+1)*sizeof(int),&buf_s);CHKERRQ(ierr); 3123 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3124 for (proc=0,k=0; proc<size; proc++){ 3125 if (!len_s[proc]) continue; 3126 /* form outgoing message for i-structure: 3127 buf_si[0]: nrows to be sent 3128 [1:nrows]: row index (global) 3129 [nrows+1:2*nrows+1]: i-structure index 3130 */ 3131 /*-------------------------------------------*/ 3132 nrows = len_si[proc]/2 - 1; 3133 buf_si_i = buf_si + nrows+1; 3134 buf_si[0] = nrows; 3135 buf_si_i[0] = 0; 3136 nrows = 0; 3137 for (i=owners[proc]; i<owners[proc+1]; i++){ 3138 anzi = ai[i+1] - ai[i]; 3139 if (anzi) { 3140 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3141 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3142 nrows++; 3143 } 3144 } 3145 ierr = MPI_Isend(buf_si,len_si[proc],MPI_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3146 k++; 3147 buf_si += len_si[proc]; 3148 } 3149 3150 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3151 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3152 3153 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3154 for (i=0; i<merge->nrecv; i++){ 3155 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3156 } 3157 3158 ierr = PetscFree(len_si);CHKERRQ(ierr); 3159 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3160 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3161 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3162 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3163 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3164 3165 /* create seq matrix B_seq in each processor */ 3166 /*-------------------------------------------*/ 3167 /* allocate bi array and free space for accumulating nonzero column info */ 3168 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3169 bi[0] = 0; 3170 3171 /* create and initialize a linked list */ 3172 nlnk = N+1; 3173 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3174 3175 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3176 len = 0; 3177 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3178 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3179 current_space = free_space; 3180 3181 /* determine symbolic info for each row of B_seq */ 3182 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(int**),&buf_ri_k);CHKERRQ(ierr); 3183 nextrow = buf_ri_k + merge->nrecv; 3184 nextai = nextrow + merge->nrecv; 3185 for (k=0; k<merge->nrecv; k++){ 3186 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3187 nrows = *buf_ri_k[k]; 3188 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3189 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3190 } 3191 3192 for (i=0;i<m;i++) { 3193 bnzi = 0; 3194 /* add local non-zero cols of this proc's seqmat into lnk */ 3195 arow = owners[rank] + i; 3196 anzi = ai[arow+1] - ai[arow]; 3197 aj = a->j + ai[arow]; 3198 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3199 bnzi += nlnk; 3200 /* add received col data into lnk */ 3201 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3202 if (i == *nextrow[k]) { /* i-th row */ 3203 anzi = *(nextai[k]+1) - *nextai[k]; 3204 aj = buf_rj[k] + *nextai[k]; 3205 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3206 bnzi += nlnk; 3207 nextrow[k]++; nextai[k]++; 3208 } 3209 } 3210 3211 /* if free space is not available, make more free space */ 3212 if (current_space->local_remaining<bnzi) { 3213 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3214 nspacedouble++; 3215 } 3216 /* copy data into free space, then initialize lnk */ 3217 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3218 current_space->array += bnzi; 3219 current_space->local_used += bnzi; 3220 current_space->local_remaining -= bnzi; 3221 3222 bi[i+1] = bi[i] + bnzi; 3223 } 3224 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3225 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3226 3227 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3228 3229 /* allocate space for ba */ 3230 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3231 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3232 3233 /* put together B_seq */ 3234 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3235 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3236 3237 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3238 /*--------------------------------------------------------------*/ 3239 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 3240 ierr = MatMerge(comm,B_seq,n,MAT_INITIAL_MATRIX,&B_mpi);CHKERRQ(ierr); 3241 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3242 merge->bi = bi; 3243 merge->bj = bj; 3244 merge->buf_ri = buf_ri; 3245 merge->buf_rj = buf_rj; 3246 merge->C_seq = seqmat; 3247 3248 /* attach the supporting struct to B_mpi for reuse */ 3249 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3250 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3251 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3252 *mpimat = B_mpi; 3253 3254 ierr = PetscFree(status);CHKERRQ(ierr); 3255 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3256 3257 PetscFunctionReturn(0); 3258 } 3259 3260 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3261 { 3262 PetscErrorCode ierr; 3263 3264 PetscFunctionBegin; 3265 if (scall == MAT_INITIAL_MATRIX){ 3266 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3267 } 3268 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3269 3270 PetscFunctionReturn(0); 3271 } 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatGetLocalMat" 3275 /*@C 3276 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3277 3278 Collective on Mat 3279 3280 Input Parameters: 3281 + A - the matrix 3282 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3283 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3284 3285 Output Parameter: 3286 . A_loc - the local sequential matrix generated 3287 3288 Level: developer 3289 3290 @*/ 3291 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3292 { 3293 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3294 PetscErrorCode ierr; 3295 int *idx,i,start,end,ncols,nzA,nzB,*cmap,imark; 3296 IS isrowa,iscola; 3297 Mat *aloc; 3298 3299 PetscFunctionBegin; 3300 if (!row){ 3301 start = a->rstart; end = a->rend; 3302 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3303 } else { 3304 isrowa = *row; 3305 } 3306 if (!col){ 3307 start = a->cstart; 3308 cmap = a->garray; 3309 nzA = a->A->n; 3310 nzB = a->B->n; 3311 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3312 ncols = 0; 3313 for (i=0; i<nzB; i++) { 3314 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3315 else break; 3316 } 3317 imark = i; 3318 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3319 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3320 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3321 ierr = PetscFree(idx);CHKERRQ(ierr); 3322 } else { 3323 iscola = *col; 3324 } 3325 if (scall != MAT_INITIAL_MATRIX){ 3326 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3327 aloc[0] = *A_loc; 3328 } 3329 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3330 *A_loc = aloc[0]; 3331 ierr = PetscFree(aloc);CHKERRQ(ierr); 3332 if (!row){ 3333 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3334 } 3335 if (!col){ 3336 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3337 } 3338 PetscFunctionReturn(0); 3339 } 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatGetBrowsOfAcols" 3343 /*@C 3344 MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A 3345 3346 Collective on Mat 3347 3348 Input Parameters: 3349 + A,B - the matrices 3350 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3351 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3352 3353 Output Parameter: 3354 + rowb, colb - index sets of rows and columns of B to extract 3355 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3356 - B_seq - the sequential matrix generated 3357 3358 Level: developer 3359 3360 @*/ 3361 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3362 { 3363 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3364 PetscErrorCode ierr; 3365 int *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3366 IS isrowb,iscolb; 3367 Mat *bseq; 3368 3369 PetscFunctionBegin; 3370 if (a->cstart != b->rstart || a->cend != b->rend){ 3371 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3372 } 3373 3374 if (scall == MAT_INITIAL_MATRIX){ 3375 start = a->cstart; 3376 cmap = a->garray; 3377 nzA = a->A->n; 3378 nzB = a->B->n; 3379 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3380 ncols = 0; 3381 for (i=0; i<nzB; i++) { 3382 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3383 else break; 3384 } 3385 imark = i; 3386 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3387 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3388 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3389 ierr = PetscFree(idx);CHKERRQ(ierr); 3390 *brstart = imark; 3391 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3392 } else { 3393 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3394 isrowb = *rowb; iscolb = *colb; 3395 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3396 bseq[0] = *B_seq; 3397 } 3398 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3399 *B_seq = bseq[0]; 3400 ierr = PetscFree(bseq);CHKERRQ(ierr); 3401 if (!rowb){ 3402 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3403 } else { 3404 *rowb = isrowb; 3405 } 3406 if (!colb){ 3407 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3408 } else { 3409 *colb = iscolb; 3410 } 3411 3412 PetscFunctionReturn(0); 3413 } 3414