1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 /* reaccess the b because aij->B was changed */ 388 b = (Mat_SeqAIJ *)aij->B->data; 389 } 390 } 391 392 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 393 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 396 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 397 398 if (aij->rowvalues) { 399 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 400 aij->rowvalues = 0; 401 } 402 403 /* used by MatAXPY() */ 404 a->xtoy = 0; b->xtoy = 0; 405 a->XtoY = 0; b->XtoY = 0; 406 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 412 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 413 { 414 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 419 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatZeroRows_MPIAIJ" 425 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 426 { 427 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 428 PetscErrorCode ierr; 429 int i,N,*rows,*owners = l->rowners,size = l->size; 430 int *nprocs,j,idx,nsends,row; 431 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 432 int *rvalues,tag = A->tag,count,base,slen,n,*source; 433 int *lens,imdex,*lrows,*values,rstart=l->rstart; 434 MPI_Comm comm = A->comm; 435 MPI_Request *send_waits,*recv_waits; 436 MPI_Status recv_status,*send_status; 437 IS istmp; 438 PetscTruth found; 439 440 PetscFunctionBegin; 441 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 443 444 /* first count number of contributors to each processor */ 445 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 446 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 447 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 448 for (i=0; i<N; i++) { 449 idx = rows[i]; 450 found = PETSC_FALSE; 451 for (j=0; j<size; j++) { 452 if (idx >= owners[j] && idx < owners[j+1]) { 453 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 454 } 455 } 456 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 457 } 458 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 459 460 /* inform other processors of number of messages and max length*/ 461 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (nprocs[2*i+1]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 PetscErrorCode ierr; 577 int nt; 578 579 PetscFunctionBegin; 580 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 581 if (nt != A->n) { 582 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 583 } 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatMultAdd_MPIAIJ" 593 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 596 PetscErrorCode ierr; 597 598 PetscFunctionBegin; 599 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 601 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 608 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 609 { 610 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 611 PetscErrorCode ierr; 612 613 PetscFunctionBegin; 614 /* do nondiagonal part */ 615 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 616 /* send it on its way */ 617 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 618 /* do local part */ 619 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 620 /* receive remote parts: note this assumes the values are not actually */ 621 /* inserted in yy until the next line, which is true for my implementation*/ 622 /* but is not perhaps always true. */ 623 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 EXTERN_C_BEGIN 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 630 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 631 { 632 MPI_Comm comm; 633 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 634 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 635 IS Me,Notme; 636 PetscErrorCode ierr; 637 int M,N,first,last,*notme,ntids,i; 638 639 PetscFunctionBegin; 640 641 /* Easy test: symmetric diagonal block */ 642 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 643 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 644 if (!*f) PetscFunctionReturn(0); 645 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 646 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 647 if (ntids==1) PetscFunctionReturn(0); 648 649 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 650 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 651 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 652 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 653 for (i=0; i<first; i++) notme[i] = i; 654 for (i=last; i<M; i++) notme[i-last+first] = i; 655 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 656 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 657 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 658 Aoff = Aoffs[0]; 659 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 660 Boff = Boffs[0]; 661 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 662 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 663 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 664 ierr = ISDestroy(Me);CHKERRQ(ierr); 665 ierr = ISDestroy(Notme);CHKERRQ(ierr); 666 667 PetscFunctionReturn(0); 668 } 669 EXTERN_C_END 670 671 #undef __FUNCT__ 672 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 673 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 676 PetscErrorCode ierr; 677 678 PetscFunctionBegin; 679 /* do nondiagonal part */ 680 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 681 /* send it on its way */ 682 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 683 /* do local part */ 684 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 685 /* receive remote parts: note this assumes the values are not actually */ 686 /* inserted in yy until the next line, which is true for my implementation*/ 687 /* but is not perhaps always true. */ 688 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 /* 693 This only works correctly for square matrices where the subblock A->A is the 694 diagonal block 695 */ 696 #undef __FUNCT__ 697 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 698 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 699 { 700 PetscErrorCode ierr; 701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 702 703 PetscFunctionBegin; 704 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 705 if (a->rstart != a->cstart || a->rend != a->cend) { 706 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 707 } 708 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatScale_MPIAIJ" 714 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 719 PetscFunctionBegin; 720 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 721 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatDestroy_MPIAIJ" 727 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 #if defined(PETSC_USE_LOG) 734 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 735 #endif 736 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 737 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 738 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 739 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 740 #if defined (PETSC_USE_CTABLE) 741 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 742 #else 743 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 744 #endif 745 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 746 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 747 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 748 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 749 ierr = PetscFree(aij);CHKERRQ(ierr); 750 751 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 752 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatView_MPIAIJ_Binary" 762 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 763 { 764 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 765 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 766 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 767 PetscErrorCode ierr; 768 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 769 int fd; 770 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 771 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 772 PetscScalar *column_values; 773 774 PetscFunctionBegin; 775 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 776 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 777 nz = A->nz + B->nz; 778 if (!rank) { 779 header[0] = MAT_FILE_COOKIE; 780 header[1] = mat->M; 781 header[2] = mat->N; 782 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 783 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 784 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 /* get largest number of rows any processor has */ 786 rlen = mat->m; 787 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 788 for (i=1; i<size; i++) { 789 rlen = PetscMax(rlen,range[i+1] - range[i]); 790 } 791 } else { 792 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 793 rlen = mat->m; 794 } 795 796 /* load up the local row counts */ 797 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 798 for (i=0; i<mat->m; i++) { 799 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 800 } 801 802 /* store the row lengths to the file */ 803 if (!rank) { 804 MPI_Status status; 805 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 806 for (i=1; i<size; i++) { 807 rlen = range[i+1] - range[i]; 808 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 809 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 810 } 811 } else { 812 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 813 } 814 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 815 816 /* load up the local column indices */ 817 nzmax = nz; /* )th processor needs space a largest processor needs */ 818 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 819 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 820 cnt = 0; 821 for (i=0; i<mat->m; i++) { 822 for (j=B->i[i]; j<B->i[i+1]; j++) { 823 if ( (col = garray[B->j[j]]) > cstart) break; 824 column_indices[cnt++] = col; 825 } 826 for (k=A->i[i]; k<A->i[i+1]; k++) { 827 column_indices[cnt++] = A->j[k] + cstart; 828 } 829 for (; j<B->i[i+1]; j++) { 830 column_indices[cnt++] = garray[B->j[j]]; 831 } 832 } 833 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 834 835 /* store the column indices to the file */ 836 if (!rank) { 837 MPI_Status status; 838 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 839 for (i=1; i<size; i++) { 840 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 841 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 842 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 843 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 844 } 845 } else { 846 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 847 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 848 } 849 ierr = PetscFree(column_indices);CHKERRQ(ierr); 850 851 /* load up the local column values */ 852 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 853 cnt = 0; 854 for (i=0; i<mat->m; i++) { 855 for (j=B->i[i]; j<B->i[i+1]; j++) { 856 if ( garray[B->j[j]] > cstart) break; 857 column_values[cnt++] = B->a[j]; 858 } 859 for (k=A->i[i]; k<A->i[i+1]; k++) { 860 column_values[cnt++] = A->a[k]; 861 } 862 for (; j<B->i[i+1]; j++) { 863 column_values[cnt++] = B->a[j]; 864 } 865 } 866 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 867 868 /* store the column values to the file */ 869 if (!rank) { 870 MPI_Status status; 871 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 872 for (i=1; i<size; i++) { 873 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 874 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 875 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 876 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 877 } 878 } else { 879 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 880 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 881 } 882 ierr = PetscFree(column_values);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 888 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 PetscMPIInt rank = aij->rank,size = aij->size; 893 PetscTruth isdraw,iascii,flg,isbinary; 894 PetscViewer sviewer; 895 PetscViewerFormat format; 896 897 PetscFunctionBegin; 898 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 899 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 900 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 901 if (iascii) { 902 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 903 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 904 MatInfo info; 905 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 906 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 907 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 908 if (flg) { 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 910 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 911 } else { 912 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 913 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 914 } 915 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 917 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 918 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 919 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 920 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } else if (format == PETSC_VIEWER_ASCII_INFO) { 923 PetscFunctionReturn(0); 924 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 925 PetscFunctionReturn(0); 926 } 927 } else if (isbinary) { 928 if (size == 1) { 929 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 930 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 931 } else { 932 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 933 } 934 PetscFunctionReturn(0); 935 } else if (isdraw) { 936 PetscDraw draw; 937 PetscTruth isnull; 938 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 939 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 940 } 941 942 if (size == 1) { 943 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 944 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 945 } else { 946 /* assemble the entire matrix onto first processor. */ 947 Mat A; 948 Mat_SeqAIJ *Aloc; 949 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 950 PetscScalar *a; 951 952 if (!rank) { 953 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 954 } else { 955 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 956 } 957 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 958 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 959 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 960 PetscLogObjectParent(mat,A); 961 962 /* copy over the A part */ 963 Aloc = (Mat_SeqAIJ*)aij->A->data; 964 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 965 row = aij->rstart; 966 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 967 for (i=0; i<m; i++) { 968 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 969 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 970 } 971 aj = Aloc->j; 972 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 973 974 /* copy over the B part */ 975 Aloc = (Mat_SeqAIJ*)aij->B->data; 976 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 977 row = aij->rstart; 978 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 979 ct = cols; 980 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 981 for (i=0; i<m; i++) { 982 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 983 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 984 } 985 ierr = PetscFree(ct);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 /* 989 Everyone has to call to draw the matrix since the graphics waits are 990 synchronized across all processors that share the PetscDraw object 991 */ 992 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 993 if (!rank) { 994 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 995 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 996 } 997 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 998 ierr = MatDestroy(A);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatView_MPIAIJ" 1005 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1006 { 1007 PetscErrorCode ierr; 1008 PetscTruth iascii,isdraw,issocket,isbinary; 1009 1010 PetscFunctionBegin; 1011 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1012 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1013 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1014 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1015 if (iascii || isdraw || isbinary || issocket) { 1016 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1017 } else { 1018 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatRelax_MPIAIJ" 1027 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1028 { 1029 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1030 PetscErrorCode ierr; 1031 Vec bb1; 1032 PetscScalar mone=-1.0; 1033 1034 PetscFunctionBegin; 1035 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1036 1037 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1038 1039 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1040 if (flag & SOR_ZERO_INITIAL_GUESS) { 1041 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1042 its--; 1043 } 1044 1045 while (its--) { 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 1049 /* update rhs: bb1 = bb - B*x */ 1050 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1051 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1052 1053 /* local sweep */ 1054 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1055 CHKERRQ(ierr); 1056 } 1057 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1058 if (flag & SOR_ZERO_INITIAL_GUESS) { 1059 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1060 its--; 1061 } 1062 while (its--) { 1063 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1064 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 1066 /* update rhs: bb1 = bb - B*x */ 1067 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1069 1070 /* local sweep */ 1071 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1072 CHKERRQ(ierr); 1073 } 1074 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1075 if (flag & SOR_ZERO_INITIAL_GUESS) { 1076 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1077 its--; 1078 } 1079 while (its--) { 1080 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 1083 /* update rhs: bb1 = bb - B*x */ 1084 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1086 1087 /* local sweep */ 1088 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1089 CHKERRQ(ierr); 1090 } 1091 } else { 1092 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1093 } 1094 1095 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1101 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1104 Mat A = mat->A,B = mat->B; 1105 PetscErrorCode ierr; 1106 PetscReal isend[5],irecv[5]; 1107 1108 PetscFunctionBegin; 1109 info->block_size = 1.0; 1110 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1111 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1112 isend[3] = info->memory; isend[4] = info->mallocs; 1113 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1114 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1115 isend[3] += info->memory; isend[4] += info->mallocs; 1116 if (flag == MAT_LOCAL) { 1117 info->nz_used = isend[0]; 1118 info->nz_allocated = isend[1]; 1119 info->nz_unneeded = isend[2]; 1120 info->memory = isend[3]; 1121 info->mallocs = isend[4]; 1122 } else if (flag == MAT_GLOBAL_MAX) { 1123 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1124 info->nz_used = irecv[0]; 1125 info->nz_allocated = irecv[1]; 1126 info->nz_unneeded = irecv[2]; 1127 info->memory = irecv[3]; 1128 info->mallocs = irecv[4]; 1129 } else if (flag == MAT_GLOBAL_SUM) { 1130 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1131 info->nz_used = irecv[0]; 1132 info->nz_allocated = irecv[1]; 1133 info->nz_unneeded = irecv[2]; 1134 info->memory = irecv[3]; 1135 info->mallocs = irecv[4]; 1136 } 1137 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1138 info->fill_ratio_needed = 0; 1139 info->factor_mallocs = 0; 1140 info->rows_global = (double)matin->M; 1141 info->columns_global = (double)matin->N; 1142 info->rows_local = (double)matin->m; 1143 info->columns_local = (double)matin->N; 1144 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatSetOption_MPIAIJ" 1150 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1151 { 1152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 switch (op) { 1157 case MAT_NO_NEW_NONZERO_LOCATIONS: 1158 case MAT_YES_NEW_NONZERO_LOCATIONS: 1159 case MAT_COLUMNS_UNSORTED: 1160 case MAT_COLUMNS_SORTED: 1161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1162 case MAT_KEEP_ZEROED_ROWS: 1163 case MAT_NEW_NONZERO_LOCATION_ERR: 1164 case MAT_USE_INODES: 1165 case MAT_DO_NOT_USE_INODES: 1166 case MAT_IGNORE_ZERO_ENTRIES: 1167 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1168 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1169 break; 1170 case MAT_ROW_ORIENTED: 1171 a->roworiented = PETSC_TRUE; 1172 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1173 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_ROWS_SORTED: 1176 case MAT_ROWS_UNSORTED: 1177 case MAT_YES_NEW_DIAGONALS: 1178 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1179 break; 1180 case MAT_COLUMN_ORIENTED: 1181 a->roworiented = PETSC_FALSE; 1182 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1183 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1184 break; 1185 case MAT_IGNORE_OFF_PROC_ENTRIES: 1186 a->donotstash = PETSC_TRUE; 1187 break; 1188 case MAT_NO_NEW_DIAGONALS: 1189 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1190 case MAT_SYMMETRIC: 1191 case MAT_STRUCTURALLY_SYMMETRIC: 1192 case MAT_HERMITIAN: 1193 case MAT_SYMMETRY_ETERNAL: 1194 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1195 break; 1196 case MAT_NOT_SYMMETRIC: 1197 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1198 case MAT_NOT_HERMITIAN: 1199 case MAT_NOT_SYMMETRY_ETERNAL: 1200 break; 1201 default: 1202 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatGetRow_MPIAIJ" 1209 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1210 { 1211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1213 PetscErrorCode ierr; 1214 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1215 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1216 int *cmap,*idx_p; 1217 1218 PetscFunctionBegin; 1219 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1220 mat->getrowactive = PETSC_TRUE; 1221 1222 if (!mat->rowvalues && (idx || v)) { 1223 /* 1224 allocate enough space to hold information from the longest row. 1225 */ 1226 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1227 int max = 1,tmp; 1228 for (i=0; i<matin->m; i++) { 1229 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1230 if (max < tmp) { max = tmp; } 1231 } 1232 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1233 mat->rowindices = (int*)(mat->rowvalues + max); 1234 } 1235 1236 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1237 lrow = row - rstart; 1238 1239 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1240 if (!v) {pvA = 0; pvB = 0;} 1241 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1242 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1243 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1244 nztot = nzA + nzB; 1245 1246 cmap = mat->garray; 1247 if (v || idx) { 1248 if (nztot) { 1249 /* Sort by increasing column numbers, assuming A and B already sorted */ 1250 int imark = -1; 1251 if (v) { 1252 *v = v_p = mat->rowvalues; 1253 for (i=0; i<nzB; i++) { 1254 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1255 else break; 1256 } 1257 imark = i; 1258 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1259 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1260 } 1261 if (idx) { 1262 *idx = idx_p = mat->rowindices; 1263 if (imark > -1) { 1264 for (i=0; i<imark; i++) { 1265 idx_p[i] = cmap[cworkB[i]]; 1266 } 1267 } else { 1268 for (i=0; i<nzB; i++) { 1269 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1270 else break; 1271 } 1272 imark = i; 1273 } 1274 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1275 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1276 } 1277 } else { 1278 if (idx) *idx = 0; 1279 if (v) *v = 0; 1280 } 1281 } 1282 *nz = nztot; 1283 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1284 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1290 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 1294 PetscFunctionBegin; 1295 if (aij->getrowactive == PETSC_FALSE) { 1296 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1297 } 1298 aij->getrowactive = PETSC_FALSE; 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatNorm_MPIAIJ" 1304 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1308 PetscErrorCode ierr; 1309 int i,j,cstart = aij->cstart; 1310 PetscReal sum = 0.0; 1311 PetscScalar *v; 1312 1313 PetscFunctionBegin; 1314 if (aij->size == 1) { 1315 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1316 } else { 1317 if (type == NORM_FROBENIUS) { 1318 v = amat->a; 1319 for (i=0; i<amat->nz; i++) { 1320 #if defined(PETSC_USE_COMPLEX) 1321 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1322 #else 1323 sum += (*v)*(*v); v++; 1324 #endif 1325 } 1326 v = bmat->a; 1327 for (i=0; i<bmat->nz; i++) { 1328 #if defined(PETSC_USE_COMPLEX) 1329 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1330 #else 1331 sum += (*v)*(*v); v++; 1332 #endif 1333 } 1334 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1335 *norm = sqrt(*norm); 1336 } else if (type == NORM_1) { /* max column norm */ 1337 PetscReal *tmp,*tmp2; 1338 int *jj,*garray = aij->garray; 1339 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1340 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1341 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1342 *norm = 0.0; 1343 v = amat->a; jj = amat->j; 1344 for (j=0; j<amat->nz; j++) { 1345 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1346 } 1347 v = bmat->a; jj = bmat->j; 1348 for (j=0; j<bmat->nz; j++) { 1349 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1350 } 1351 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1352 for (j=0; j<mat->N; j++) { 1353 if (tmp2[j] > *norm) *norm = tmp2[j]; 1354 } 1355 ierr = PetscFree(tmp);CHKERRQ(ierr); 1356 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1357 } else if (type == NORM_INFINITY) { /* max row norm */ 1358 PetscReal ntemp = 0.0; 1359 for (j=0; j<aij->A->m; j++) { 1360 v = amat->a + amat->i[j]; 1361 sum = 0.0; 1362 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1363 sum += PetscAbsScalar(*v); v++; 1364 } 1365 v = bmat->a + bmat->i[j]; 1366 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 if (sum > ntemp) ntemp = sum; 1370 } 1371 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1372 } else { 1373 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1374 } 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatTranspose_MPIAIJ" 1381 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1382 { 1383 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1384 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1385 PetscErrorCode ierr; 1386 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1387 Mat B; 1388 PetscScalar *array; 1389 1390 PetscFunctionBegin; 1391 if (!matout && M != N) { 1392 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1393 } 1394 1395 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1396 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1397 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1398 1399 /* copy over the A part */ 1400 Aloc = (Mat_SeqAIJ*)a->A->data; 1401 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1402 row = a->rstart; 1403 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1407 } 1408 aj = Aloc->j; 1409 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1410 1411 /* copy over the B part */ 1412 Aloc = (Mat_SeqAIJ*)a->B->data; 1413 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1414 row = a->rstart; 1415 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1416 ct = cols; 1417 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1418 for (i=0; i<m; i++) { 1419 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1420 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1421 } 1422 ierr = PetscFree(ct);CHKERRQ(ierr); 1423 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 if (matout) { 1426 *matout = B; 1427 } else { 1428 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1435 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1436 { 1437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1438 Mat a = aij->A,b = aij->B; 1439 PetscErrorCode ierr; 1440 int s1,s2,s3; 1441 1442 PetscFunctionBegin; 1443 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1444 if (rr) { 1445 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1446 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1447 /* Overlap communication with computation. */ 1448 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1449 } 1450 if (ll) { 1451 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1452 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1453 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1454 } 1455 /* scale the diagonal block */ 1456 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1457 1458 if (rr) { 1459 /* Do a scatter end and then right scale the off-diagonal block */ 1460 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1461 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1462 } 1463 1464 PetscFunctionReturn(0); 1465 } 1466 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1470 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1471 { 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 if (!a->rank) { 1477 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1484 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1485 { 1486 PetscFunctionBegin; 1487 *bs = 1; 1488 PetscFunctionReturn(0); 1489 } 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1492 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatEqual_MPIAIJ" 1504 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1505 { 1506 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1507 Mat a,b,c,d; 1508 PetscTruth flg; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 a = matA->A; b = matA->B; 1513 c = matB->A; d = matB->B; 1514 1515 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1516 if (flg == PETSC_TRUE) { 1517 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1518 } 1519 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatCopy_MPIAIJ" 1525 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1526 { 1527 PetscErrorCode ierr; 1528 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1529 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1530 1531 PetscFunctionBegin; 1532 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1533 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1534 /* because of the column compression in the off-processor part of the matrix a->B, 1535 the number of columns in a->B and b->B may be different, hence we cannot call 1536 the MatCopy() directly on the two parts. If need be, we can provide a more 1537 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1538 then copying the submatrices */ 1539 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1540 } else { 1541 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1542 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1543 } 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1549 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1550 { 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #include "petscblaslapack.h" 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatAXPY_MPIAIJ" 1561 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1562 { 1563 PetscErrorCode ierr; 1564 int i; 1565 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1566 PetscBLASInt bnz,one=1; 1567 Mat_SeqAIJ *x,*y; 1568 1569 PetscFunctionBegin; 1570 if (str == SAME_NONZERO_PATTERN) { 1571 x = (Mat_SeqAIJ *)xx->A->data; 1572 y = (Mat_SeqAIJ *)yy->A->data; 1573 bnz = (PetscBLASInt)x->nz; 1574 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1575 x = (Mat_SeqAIJ *)xx->B->data; 1576 y = (Mat_SeqAIJ *)yy->B->data; 1577 bnz = (PetscBLASInt)x->nz; 1578 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1579 } else if (str == SUBSET_NONZERO_PATTERN) { 1580 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1581 1582 x = (Mat_SeqAIJ *)xx->B->data; 1583 y = (Mat_SeqAIJ *)yy->B->data; 1584 if (y->xtoy && y->XtoY != xx->B) { 1585 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1586 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1587 } 1588 if (!y->xtoy) { /* get xtoy */ 1589 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1590 y->XtoY = xx->B; 1591 } 1592 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1593 } else { 1594 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1595 } 1596 PetscFunctionReturn(0); 1597 } 1598 1599 /* -------------------------------------------------------------------*/ 1600 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1601 MatGetRow_MPIAIJ, 1602 MatRestoreRow_MPIAIJ, 1603 MatMult_MPIAIJ, 1604 /* 4*/ MatMultAdd_MPIAIJ, 1605 MatMultTranspose_MPIAIJ, 1606 MatMultTransposeAdd_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 /*10*/ 0, 1611 0, 1612 0, 1613 MatRelax_MPIAIJ, 1614 MatTranspose_MPIAIJ, 1615 /*15*/ MatGetInfo_MPIAIJ, 1616 MatEqual_MPIAIJ, 1617 MatGetDiagonal_MPIAIJ, 1618 MatDiagonalScale_MPIAIJ, 1619 MatNorm_MPIAIJ, 1620 /*20*/ MatAssemblyBegin_MPIAIJ, 1621 MatAssemblyEnd_MPIAIJ, 1622 0, 1623 MatSetOption_MPIAIJ, 1624 MatZeroEntries_MPIAIJ, 1625 /*25*/ MatZeroRows_MPIAIJ, 1626 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1627 MatLUFactorSymbolic_MPIAIJ_TFS, 1628 #else 1629 0, 1630 #endif 1631 0, 1632 0, 1633 0, 1634 /*30*/ MatSetUpPreallocation_MPIAIJ, 1635 0, 1636 0, 1637 0, 1638 0, 1639 /*35*/ MatDuplicate_MPIAIJ, 1640 0, 1641 0, 1642 0, 1643 0, 1644 /*40*/ MatAXPY_MPIAIJ, 1645 MatGetSubMatrices_MPIAIJ, 1646 MatIncreaseOverlap_MPIAIJ, 1647 MatGetValues_MPIAIJ, 1648 MatCopy_MPIAIJ, 1649 /*45*/ MatPrintHelp_MPIAIJ, 1650 MatScale_MPIAIJ, 1651 0, 1652 0, 1653 0, 1654 /*50*/ MatGetBlockSize_MPIAIJ, 1655 0, 1656 0, 1657 0, 1658 0, 1659 /*55*/ MatFDColoringCreate_MPIAIJ, 1660 0, 1661 MatSetUnfactored_MPIAIJ, 1662 0, 1663 0, 1664 /*60*/ MatGetSubMatrix_MPIAIJ, 1665 MatDestroy_MPIAIJ, 1666 MatView_MPIAIJ, 1667 MatGetPetscMaps_Petsc, 1668 0, 1669 /*65*/ 0, 1670 0, 1671 0, 1672 0, 1673 0, 1674 /*70*/ 0, 1675 0, 1676 MatSetColoring_MPIAIJ, 1677 MatSetValuesAdic_MPIAIJ, 1678 MatSetValuesAdifor_MPIAIJ, 1679 /*75*/ 0, 1680 0, 1681 0, 1682 0, 1683 0, 1684 /*80*/ 0, 1685 0, 1686 0, 1687 0, 1688 /*84*/ MatLoad_MPIAIJ, 1689 0, 1690 0, 1691 0, 1692 0, 1693 0, 1694 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1695 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1696 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1697 MatPtAP_MPIAIJ_MPIAIJ, 1698 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1699 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1700 0, 1701 0, 1702 0}; 1703 1704 /* ----------------------------------------------------------------------------------------*/ 1705 1706 EXTERN_C_BEGIN 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1709 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1710 { 1711 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1716 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 EXTERN_C_BEGIN 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1724 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1725 { 1726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1727 PetscErrorCode ierr; 1728 1729 PetscFunctionBegin; 1730 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1731 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 EXTERN_C_END 1735 1736 #include "petscpc.h" 1737 EXTERN_C_BEGIN 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1740 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1741 { 1742 Mat_MPIAIJ *b; 1743 PetscErrorCode ierr; 1744 int i; 1745 1746 PetscFunctionBegin; 1747 B->preallocated = PETSC_TRUE; 1748 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1749 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1750 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1751 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1752 if (d_nnz) { 1753 for (i=0; i<B->m; i++) { 1754 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1755 } 1756 } 1757 if (o_nnz) { 1758 for (i=0; i<B->m; i++) { 1759 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1760 } 1761 } 1762 b = (Mat_MPIAIJ*)B->data; 1763 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1764 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1765 1766 PetscFunctionReturn(0); 1767 } 1768 EXTERN_C_END 1769 1770 /*MC 1771 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1772 1773 Options Database Keys: 1774 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1775 1776 Level: beginner 1777 1778 .seealso: MatCreateMPIAIJ 1779 M*/ 1780 1781 EXTERN_C_BEGIN 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatCreate_MPIAIJ" 1784 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1785 { 1786 Mat_MPIAIJ *b; 1787 PetscErrorCode ierr; 1788 int i,size; 1789 1790 PetscFunctionBegin; 1791 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1792 1793 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1794 B->data = (void*)b; 1795 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1796 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1797 B->factor = 0; 1798 B->assembled = PETSC_FALSE; 1799 B->mapping = 0; 1800 1801 B->insertmode = NOT_SET_VALUES; 1802 b->size = size; 1803 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1804 1805 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1806 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1807 1808 /* the information in the maps duplicates the information computed below, eventually 1809 we should remove the duplicate information that is not contained in the maps */ 1810 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1811 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1812 1813 /* build local table of row and column ownerships */ 1814 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1815 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1816 b->cowners = b->rowners + b->size + 2; 1817 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1818 b->rowners[0] = 0; 1819 for (i=2; i<=b->size; i++) { 1820 b->rowners[i] += b->rowners[i-1]; 1821 } 1822 b->rstart = b->rowners[b->rank]; 1823 b->rend = b->rowners[b->rank+1]; 1824 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1825 b->cowners[0] = 0; 1826 for (i=2; i<=b->size; i++) { 1827 b->cowners[i] += b->cowners[i-1]; 1828 } 1829 b->cstart = b->cowners[b->rank]; 1830 b->cend = b->cowners[b->rank+1]; 1831 1832 /* build cache for off array entries formed */ 1833 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1834 b->donotstash = PETSC_FALSE; 1835 b->colmap = 0; 1836 b->garray = 0; 1837 b->roworiented = PETSC_TRUE; 1838 1839 /* stuff used for matrix vector multiply */ 1840 b->lvec = PETSC_NULL; 1841 b->Mvctx = PETSC_NULL; 1842 1843 /* stuff for MatGetRow() */ 1844 b->rowindices = 0; 1845 b->rowvalues = 0; 1846 b->getrowactive = PETSC_FALSE; 1847 1848 /* Explicitly create 2 MATSEQAIJ matrices. */ 1849 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1850 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1851 PetscLogObjectParent(B,b->A); 1852 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1853 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1854 PetscLogObjectParent(B,b->B); 1855 1856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1857 "MatStoreValues_MPIAIJ", 1858 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1860 "MatRetrieveValues_MPIAIJ", 1861 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1863 "MatGetDiagonalBlock_MPIAIJ", 1864 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1866 "MatIsTranspose_MPIAIJ", 1867 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1868 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1869 "MatMPIAIJSetPreallocation_MPIAIJ", 1870 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1871 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1872 "MatDiagonalScaleLocal_MPIAIJ", 1873 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 /*MC 1879 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1880 1881 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1882 and MATMPIAIJ otherwise. As a result, for single process communicators, 1883 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1884 for communicators controlling multiple processes. It is recommended that you call both of 1885 the above preallocation routines for simplicity. 1886 1887 Options Database Keys: 1888 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1889 1890 Level: beginner 1891 1892 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1893 M*/ 1894 1895 EXTERN_C_BEGIN 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatCreate_AIJ" 1898 PetscErrorCode MatCreate_AIJ(Mat A) 1899 { 1900 PetscErrorCode ierr; 1901 int size; 1902 1903 PetscFunctionBegin; 1904 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1905 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1906 if (size == 1) { 1907 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1908 } else { 1909 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 EXTERN_C_END 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1917 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1918 { 1919 Mat mat; 1920 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 *newmat = 0; 1925 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1926 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1927 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1928 a = (Mat_MPIAIJ*)mat->data; 1929 1930 mat->factor = matin->factor; 1931 mat->assembled = PETSC_TRUE; 1932 mat->insertmode = NOT_SET_VALUES; 1933 mat->preallocated = PETSC_TRUE; 1934 1935 a->rstart = oldmat->rstart; 1936 a->rend = oldmat->rend; 1937 a->cstart = oldmat->cstart; 1938 a->cend = oldmat->cend; 1939 a->size = oldmat->size; 1940 a->rank = oldmat->rank; 1941 a->donotstash = oldmat->donotstash; 1942 a->roworiented = oldmat->roworiented; 1943 a->rowindices = 0; 1944 a->rowvalues = 0; 1945 a->getrowactive = PETSC_FALSE; 1946 1947 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1948 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1949 if (oldmat->colmap) { 1950 #if defined (PETSC_USE_CTABLE) 1951 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1952 #else 1953 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1954 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1955 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1956 #endif 1957 } else a->colmap = 0; 1958 if (oldmat->garray) { 1959 int len; 1960 len = oldmat->B->n; 1961 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1962 PetscLogObjectMemory(mat,len*sizeof(int)); 1963 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1964 } else a->garray = 0; 1965 1966 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1967 PetscLogObjectParent(mat,a->lvec); 1968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1969 PetscLogObjectParent(mat,a->Mvctx); 1970 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1971 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1972 PetscLogObjectParent(mat,a->A); 1973 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1974 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1975 PetscLogObjectParent(mat,a->B); 1976 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1977 *newmat = mat; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #include "petscsys.h" 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatLoad_MPIAIJ" 1985 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1986 { 1987 Mat A; 1988 PetscScalar *vals,*svals; 1989 MPI_Comm comm = ((PetscObject)viewer)->comm; 1990 MPI_Status status; 1991 PetscErrorCode ierr; 1992 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1993 int i,nz,j,rstart,rend,fd; 1994 int header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1995 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1996 int cend,cstart,n; 1997 1998 PetscFunctionBegin; 1999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2000 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2001 if (!rank) { 2002 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2003 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2004 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2005 if (header[3] < 0) { 2006 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 2007 } 2008 } 2009 2010 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2011 M = header[1]; N = header[2]; 2012 /* determine ownership of all rows */ 2013 m = M/size + ((M % size) > rank); 2014 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2015 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2016 rowners[0] = 0; 2017 for (i=2; i<=size; i++) { 2018 rowners[i] += rowners[i-1]; 2019 } 2020 rstart = rowners[rank]; 2021 rend = rowners[rank+1]; 2022 2023 /* distribute row lengths to all processors */ 2024 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2025 offlens = ourlens + (rend-rstart); 2026 if (!rank) { 2027 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2028 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2029 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2030 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2031 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2032 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2033 } else { 2034 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2035 } 2036 2037 if (!rank) { 2038 /* calculate the number of nonzeros on each processor */ 2039 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2040 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2041 for (i=0; i<size; i++) { 2042 for (j=rowners[i]; j< rowners[i+1]; j++) { 2043 procsnz[i] += rowlengths[j]; 2044 } 2045 } 2046 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2047 2048 /* determine max buffer needed and allocate it */ 2049 maxnz = 0; 2050 for (i=0; i<size; i++) { 2051 maxnz = PetscMax(maxnz,procsnz[i]); 2052 } 2053 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2054 2055 /* read in my part of the matrix column indices */ 2056 nz = procsnz[0]; 2057 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2058 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2059 2060 /* read in every one elses and ship off */ 2061 for (i=1; i<size; i++) { 2062 nz = procsnz[i]; 2063 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2064 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2065 } 2066 ierr = PetscFree(cols);CHKERRQ(ierr); 2067 } else { 2068 /* determine buffer space needed for message */ 2069 nz = 0; 2070 for (i=0; i<m; i++) { 2071 nz += ourlens[i]; 2072 } 2073 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2074 2075 /* receive message of column indices*/ 2076 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2077 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2078 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2079 } 2080 2081 /* determine column ownership if matrix is not square */ 2082 if (N != M) { 2083 n = N/size + ((N % size) > rank); 2084 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2085 cstart = cend - n; 2086 } else { 2087 cstart = rstart; 2088 cend = rend; 2089 n = cend - cstart; 2090 } 2091 2092 /* loop over local rows, determining number of off diagonal entries */ 2093 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2094 jj = 0; 2095 for (i=0; i<m; i++) { 2096 for (j=0; j<ourlens[i]; j++) { 2097 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2098 jj++; 2099 } 2100 } 2101 2102 /* create our matrix */ 2103 for (i=0; i<m; i++) { 2104 ourlens[i] -= offlens[i]; 2105 } 2106 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2107 ierr = MatSetType(A,type);CHKERRQ(ierr); 2108 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2109 2110 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2111 for (i=0; i<m; i++) { 2112 ourlens[i] += offlens[i]; 2113 } 2114 2115 if (!rank) { 2116 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2117 2118 /* read in my part of the matrix numerical values */ 2119 nz = procsnz[0]; 2120 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2121 2122 /* insert into matrix */ 2123 jj = rstart; 2124 smycols = mycols; 2125 svals = vals; 2126 for (i=0; i<m; i++) { 2127 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2128 smycols += ourlens[i]; 2129 svals += ourlens[i]; 2130 jj++; 2131 } 2132 2133 /* read in other processors and ship out */ 2134 for (i=1; i<size; i++) { 2135 nz = procsnz[i]; 2136 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2137 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2138 } 2139 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2140 } else { 2141 /* receive numeric values */ 2142 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2143 2144 /* receive message of values*/ 2145 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2146 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2147 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2148 2149 /* insert into matrix */ 2150 jj = rstart; 2151 smycols = mycols; 2152 svals = vals; 2153 for (i=0; i<m; i++) { 2154 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2155 smycols += ourlens[i]; 2156 svals += ourlens[i]; 2157 jj++; 2158 } 2159 } 2160 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2161 ierr = PetscFree(vals);CHKERRQ(ierr); 2162 ierr = PetscFree(mycols);CHKERRQ(ierr); 2163 ierr = PetscFree(rowners);CHKERRQ(ierr); 2164 2165 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2167 *newmat = A; 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2173 /* 2174 Not great since it makes two copies of the submatrix, first an SeqAIJ 2175 in local and then by concatenating the local matrices the end result. 2176 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2177 */ 2178 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2179 { 2180 PetscErrorCode ierr; 2181 PetscMPIInt rank,size; 2182 int i,m,n,rstart,row,rend,nz,*cwork,j; 2183 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2184 Mat *local,M,Mreuse; 2185 PetscScalar *vwork,*aa; 2186 MPI_Comm comm = mat->comm; 2187 Mat_SeqAIJ *aij; 2188 2189 2190 PetscFunctionBegin; 2191 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2192 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2193 2194 if (call == MAT_REUSE_MATRIX) { 2195 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2196 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2197 local = &Mreuse; 2198 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2199 } else { 2200 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2201 Mreuse = *local; 2202 ierr = PetscFree(local);CHKERRQ(ierr); 2203 } 2204 2205 /* 2206 m - number of local rows 2207 n - number of columns (same on all processors) 2208 rstart - first row in new global matrix generated 2209 */ 2210 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2211 if (call == MAT_INITIAL_MATRIX) { 2212 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2213 ii = aij->i; 2214 jj = aij->j; 2215 2216 /* 2217 Determine the number of non-zeros in the diagonal and off-diagonal 2218 portions of the matrix in order to do correct preallocation 2219 */ 2220 2221 /* first get start and end of "diagonal" columns */ 2222 if (csize == PETSC_DECIDE) { 2223 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2224 if (mglobal == n) { /* square matrix */ 2225 nlocal = m; 2226 } else { 2227 nlocal = n/size + ((n % size) > rank); 2228 } 2229 } else { 2230 nlocal = csize; 2231 } 2232 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2233 rstart = rend - nlocal; 2234 if (rank == size - 1 && rend != n) { 2235 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2236 } 2237 2238 /* next, compute all the lengths */ 2239 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2240 olens = dlens + m; 2241 for (i=0; i<m; i++) { 2242 jend = ii[i+1] - ii[i]; 2243 olen = 0; 2244 dlen = 0; 2245 for (j=0; j<jend; j++) { 2246 if (*jj < rstart || *jj >= rend) olen++; 2247 else dlen++; 2248 jj++; 2249 } 2250 olens[i] = olen; 2251 dlens[i] = dlen; 2252 } 2253 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2254 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2255 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2256 ierr = PetscFree(dlens);CHKERRQ(ierr); 2257 } else { 2258 int ml,nl; 2259 2260 M = *newmat; 2261 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2262 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2263 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2264 /* 2265 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2266 rather than the slower MatSetValues(). 2267 */ 2268 M->was_assembled = PETSC_TRUE; 2269 M->assembled = PETSC_FALSE; 2270 } 2271 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2272 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2273 ii = aij->i; 2274 jj = aij->j; 2275 aa = aij->a; 2276 for (i=0; i<m; i++) { 2277 row = rstart + i; 2278 nz = ii[i+1] - ii[i]; 2279 cwork = jj; jj += nz; 2280 vwork = aa; aa += nz; 2281 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2282 } 2283 2284 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2285 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 *newmat = M; 2287 2288 /* save submatrix used in processor for next request */ 2289 if (call == MAT_INITIAL_MATRIX) { 2290 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2291 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2292 } 2293 2294 PetscFunctionReturn(0); 2295 } 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2299 /*@C 2300 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2301 (the default parallel PETSc format). For good matrix assembly performance 2302 the user should preallocate the matrix storage by setting the parameters 2303 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2304 performance can be increased by more than a factor of 50. 2305 2306 Collective on MPI_Comm 2307 2308 Input Parameters: 2309 + A - the matrix 2310 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2311 (same value is used for all local rows) 2312 . d_nnz - array containing the number of nonzeros in the various rows of the 2313 DIAGONAL portion of the local submatrix (possibly different for each row) 2314 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2315 The size of this array is equal to the number of local rows, i.e 'm'. 2316 You must leave room for the diagonal entry even if it is zero. 2317 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2318 submatrix (same value is used for all local rows). 2319 - o_nnz - array containing the number of nonzeros in the various rows of the 2320 OFF-DIAGONAL portion of the local submatrix (possibly different for 2321 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2322 structure. The size of this array is equal to the number 2323 of local rows, i.e 'm'. 2324 2325 The AIJ format (also called the Yale sparse matrix format or 2326 compressed row storage), is fully compatible with standard Fortran 77 2327 storage. That is, the stored row and column indices can begin at 2328 either one (as in Fortran) or zero. See the users manual for details. 2329 2330 The user MUST specify either the local or global matrix dimensions 2331 (possibly both). 2332 2333 The parallel matrix is partitioned such that the first m0 rows belong to 2334 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2335 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2336 2337 The DIAGONAL portion of the local submatrix of a processor can be defined 2338 as the submatrix which is obtained by extraction the part corresponding 2339 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2340 first row that belongs to the processor, and r2 is the last row belonging 2341 to the this processor. This is a square mxm matrix. The remaining portion 2342 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2343 2344 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2345 2346 By default, this format uses inodes (identical nodes) when possible. 2347 We search for consecutive rows with the same nonzero structure, thereby 2348 reusing matrix information to achieve increased efficiency. 2349 2350 Options Database Keys: 2351 + -mat_aij_no_inode - Do not use inodes 2352 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2353 - -mat_aij_oneindex - Internally use indexing starting at 1 2354 rather than 0. Note that when calling MatSetValues(), 2355 the user still MUST index entries starting at 0! 2356 2357 Example usage: 2358 2359 Consider the following 8x8 matrix with 34 non-zero values, that is 2360 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2361 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2362 as follows: 2363 2364 .vb 2365 1 2 0 | 0 3 0 | 0 4 2366 Proc0 0 5 6 | 7 0 0 | 8 0 2367 9 0 10 | 11 0 0 | 12 0 2368 ------------------------------------- 2369 13 0 14 | 15 16 17 | 0 0 2370 Proc1 0 18 0 | 19 20 21 | 0 0 2371 0 0 0 | 22 23 0 | 24 0 2372 ------------------------------------- 2373 Proc2 25 26 27 | 0 0 28 | 29 0 2374 30 0 0 | 31 32 33 | 0 34 2375 .ve 2376 2377 This can be represented as a collection of submatrices as: 2378 2379 .vb 2380 A B C 2381 D E F 2382 G H I 2383 .ve 2384 2385 Where the submatrices A,B,C are owned by proc0, D,E,F are 2386 owned by proc1, G,H,I are owned by proc2. 2387 2388 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2389 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2390 The 'M','N' parameters are 8,8, and have the same values on all procs. 2391 2392 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2393 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2394 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2395 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2396 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2397 matrix, ans [DF] as another SeqAIJ matrix. 2398 2399 When d_nz, o_nz parameters are specified, d_nz storage elements are 2400 allocated for every row of the local diagonal submatrix, and o_nz 2401 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2402 One way to choose d_nz and o_nz is to use the max nonzerors per local 2403 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2404 In this case, the values of d_nz,o_nz are: 2405 .vb 2406 proc0 : dnz = 2, o_nz = 2 2407 proc1 : dnz = 3, o_nz = 2 2408 proc2 : dnz = 1, o_nz = 4 2409 .ve 2410 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2411 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2412 for proc3. i.e we are using 12+15+10=37 storage locations to store 2413 34 values. 2414 2415 When d_nnz, o_nnz parameters are specified, the storage is specified 2416 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2417 In the above case the values for d_nnz,o_nnz are: 2418 .vb 2419 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2420 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2421 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2422 .ve 2423 Here the space allocated is sum of all the above values i.e 34, and 2424 hence pre-allocation is perfect. 2425 2426 Level: intermediate 2427 2428 .keywords: matrix, aij, compressed row, sparse, parallel 2429 2430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2431 @*/ 2432 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2433 { 2434 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2435 2436 PetscFunctionBegin; 2437 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2438 if (f) { 2439 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2440 } 2441 PetscFunctionReturn(0); 2442 } 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "MatCreateMPIAIJ" 2446 /*@C 2447 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2448 (the default parallel PETSc format). For good matrix assembly performance 2449 the user should preallocate the matrix storage by setting the parameters 2450 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2451 performance can be increased by more than a factor of 50. 2452 2453 Collective on MPI_Comm 2454 2455 Input Parameters: 2456 + comm - MPI communicator 2457 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2458 This value should be the same as the local size used in creating the 2459 y vector for the matrix-vector product y = Ax. 2460 . n - This value should be the same as the local size used in creating the 2461 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2462 calculated if N is given) For square matrices n is almost always m. 2463 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2464 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2465 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2466 (same value is used for all local rows) 2467 . d_nnz - array containing the number of nonzeros in the various rows of the 2468 DIAGONAL portion of the local submatrix (possibly different for each row) 2469 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2470 The size of this array is equal to the number of local rows, i.e 'm'. 2471 You must leave room for the diagonal entry even if it is zero. 2472 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2473 submatrix (same value is used for all local rows). 2474 - o_nnz - array containing the number of nonzeros in the various rows of the 2475 OFF-DIAGONAL portion of the local submatrix (possibly different for 2476 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2477 structure. The size of this array is equal to the number 2478 of local rows, i.e 'm'. 2479 2480 Output Parameter: 2481 . A - the matrix 2482 2483 Notes: 2484 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2485 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2486 storage requirements for this matrix. 2487 2488 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2489 processor than it must be used on all processors that share the object for 2490 that argument. 2491 2492 The AIJ format (also called the Yale sparse matrix format or 2493 compressed row storage), is fully compatible with standard Fortran 77 2494 storage. That is, the stored row and column indices can begin at 2495 either one (as in Fortran) or zero. See the users manual for details. 2496 2497 The user MUST specify either the local or global matrix dimensions 2498 (possibly both). 2499 2500 The parallel matrix is partitioned such that the first m0 rows belong to 2501 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2502 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2503 2504 The DIAGONAL portion of the local submatrix of a processor can be defined 2505 as the submatrix which is obtained by extraction the part corresponding 2506 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2507 first row that belongs to the processor, and r2 is the last row belonging 2508 to the this processor. This is a square mxm matrix. The remaining portion 2509 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2510 2511 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2512 2513 When calling this routine with a single process communicator, a matrix of 2514 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2515 type of communicator, use the construction mechanism: 2516 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2517 2518 By default, this format uses inodes (identical nodes) when possible. 2519 We search for consecutive rows with the same nonzero structure, thereby 2520 reusing matrix information to achieve increased efficiency. 2521 2522 Options Database Keys: 2523 + -mat_aij_no_inode - Do not use inodes 2524 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2525 - -mat_aij_oneindex - Internally use indexing starting at 1 2526 rather than 0. Note that when calling MatSetValues(), 2527 the user still MUST index entries starting at 0! 2528 2529 2530 Example usage: 2531 2532 Consider the following 8x8 matrix with 34 non-zero values, that is 2533 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2534 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2535 as follows: 2536 2537 .vb 2538 1 2 0 | 0 3 0 | 0 4 2539 Proc0 0 5 6 | 7 0 0 | 8 0 2540 9 0 10 | 11 0 0 | 12 0 2541 ------------------------------------- 2542 13 0 14 | 15 16 17 | 0 0 2543 Proc1 0 18 0 | 19 20 21 | 0 0 2544 0 0 0 | 22 23 0 | 24 0 2545 ------------------------------------- 2546 Proc2 25 26 27 | 0 0 28 | 29 0 2547 30 0 0 | 31 32 33 | 0 34 2548 .ve 2549 2550 This can be represented as a collection of submatrices as: 2551 2552 .vb 2553 A B C 2554 D E F 2555 G H I 2556 .ve 2557 2558 Where the submatrices A,B,C are owned by proc0, D,E,F are 2559 owned by proc1, G,H,I are owned by proc2. 2560 2561 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2562 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2563 The 'M','N' parameters are 8,8, and have the same values on all procs. 2564 2565 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2566 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2567 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2568 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2569 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2570 matrix, ans [DF] as another SeqAIJ matrix. 2571 2572 When d_nz, o_nz parameters are specified, d_nz storage elements are 2573 allocated for every row of the local diagonal submatrix, and o_nz 2574 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2575 One way to choose d_nz and o_nz is to use the max nonzerors per local 2576 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2577 In this case, the values of d_nz,o_nz are: 2578 .vb 2579 proc0 : dnz = 2, o_nz = 2 2580 proc1 : dnz = 3, o_nz = 2 2581 proc2 : dnz = 1, o_nz = 4 2582 .ve 2583 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2584 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2585 for proc3. i.e we are using 12+15+10=37 storage locations to store 2586 34 values. 2587 2588 When d_nnz, o_nnz parameters are specified, the storage is specified 2589 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2590 In the above case the values for d_nnz,o_nnz are: 2591 .vb 2592 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2593 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2594 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2595 .ve 2596 Here the space allocated is sum of all the above values i.e 34, and 2597 hence pre-allocation is perfect. 2598 2599 Level: intermediate 2600 2601 .keywords: matrix, aij, compressed row, sparse, parallel 2602 2603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2604 @*/ 2605 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2606 { 2607 PetscErrorCode ierr; 2608 int size; 2609 2610 PetscFunctionBegin; 2611 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2612 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2613 if (size > 1) { 2614 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2615 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2616 } else { 2617 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2618 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2619 } 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2625 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2626 { 2627 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2628 PetscFunctionBegin; 2629 *Ad = a->A; 2630 *Ao = a->B; 2631 *colmap = a->garray; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2637 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2638 { 2639 PetscErrorCode ierr; 2640 int i; 2641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2642 2643 PetscFunctionBegin; 2644 if (coloring->ctype == IS_COLORING_LOCAL) { 2645 ISColoringValue *allcolors,*colors; 2646 ISColoring ocoloring; 2647 2648 /* set coloring for diagonal portion */ 2649 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2650 2651 /* set coloring for off-diagonal portion */ 2652 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2653 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2654 for (i=0; i<a->B->n; i++) { 2655 colors[i] = allcolors[a->garray[i]]; 2656 } 2657 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2658 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2659 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2660 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2661 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2662 ISColoringValue *colors; 2663 int *larray; 2664 ISColoring ocoloring; 2665 2666 /* set coloring for diagonal portion */ 2667 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2668 for (i=0; i<a->A->n; i++) { 2669 larray[i] = i + a->cstart; 2670 } 2671 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2672 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2673 for (i=0; i<a->A->n; i++) { 2674 colors[i] = coloring->colors[larray[i]]; 2675 } 2676 ierr = PetscFree(larray);CHKERRQ(ierr); 2677 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2678 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2679 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2680 2681 /* set coloring for off-diagonal portion */ 2682 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2683 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2684 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2685 for (i=0; i<a->B->n; i++) { 2686 colors[i] = coloring->colors[larray[i]]; 2687 } 2688 ierr = PetscFree(larray);CHKERRQ(ierr); 2689 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2690 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2691 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2692 } else { 2693 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2694 } 2695 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2701 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2702 { 2703 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2708 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2714 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2721 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "MatMerge" 2727 /*@C 2728 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2729 matrices from each processor 2730 2731 Collective on MPI_Comm 2732 2733 Input Parameters: 2734 + comm - the communicators the parallel matrix will live on 2735 . inmat - the input sequential matrices 2736 . n - number of local columns (or PETSC_DECIDE) 2737 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2738 2739 Output Parameter: 2740 . outmat - the parallel matrix generated 2741 2742 Level: advanced 2743 2744 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2745 2746 @*/ 2747 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2748 { 2749 PetscErrorCode ierr; 2750 int m,N,i,rstart,nnz,I,*dnz,*onz; 2751 const int *indx; 2752 const PetscScalar *values; 2753 PetscMap columnmap,rowmap; 2754 2755 PetscFunctionBegin; 2756 2757 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2758 if (scall == MAT_INITIAL_MATRIX){ 2759 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2760 if (n == PETSC_DECIDE){ 2761 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2762 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2763 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2764 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2765 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2766 } 2767 2768 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2769 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2770 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2771 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2772 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2773 2774 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2775 for (i=0;i<m;i++) { 2776 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2777 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2778 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2779 } 2780 /* This routine will ONLY return MPIAIJ type matrix */ 2781 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2782 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2783 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2784 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2785 2786 } else if (scall == MAT_REUSE_MATRIX){ 2787 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2788 } else { 2789 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2790 } 2791 2792 for (i=0;i<m;i++) { 2793 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2794 I = i + rstart; 2795 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2796 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2797 } 2798 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2799 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2800 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2801 2802 PetscFunctionReturn(0); 2803 } 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatFileSplit" 2807 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2808 { 2809 PetscErrorCode ierr; 2810 PetscMPIInt rank; 2811 int m,N,i,rstart,nnz; 2812 size_t len; 2813 const int *indx; 2814 PetscViewer out; 2815 char *name; 2816 Mat B; 2817 const PetscScalar *values; 2818 2819 PetscFunctionBegin; 2820 2821 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2822 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2823 /* Should this be the type of the diagonal block of A? */ 2824 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2825 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2826 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2827 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2828 for (i=0;i<m;i++) { 2829 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2830 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2831 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2832 } 2833 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2835 2836 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2837 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2838 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2839 sprintf(name,"%s.%d",outfile,rank); 2840 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2841 ierr = PetscFree(name); 2842 ierr = MatView(B,out);CHKERRQ(ierr); 2843 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2844 ierr = MatDestroy(B);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2849 #undef __FUNCT__ 2850 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2851 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2852 { 2853 PetscErrorCode ierr; 2854 Mat_Merge_SeqsToMPI *merge; 2855 PetscObjectContainer container; 2856 2857 PetscFunctionBegin; 2858 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2859 if (container) { 2860 ierr = PetscObjectContainerGetPointer(container,(void *)&merge);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2862 ierr = PetscFree(merge->len_sra);CHKERRQ(ierr); 2863 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2864 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2865 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2866 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2867 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2868 ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr); 2869 2870 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2871 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2872 } 2873 ierr = PetscFree(merge);CHKERRQ(ierr); 2874 2875 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2876 PetscFunctionReturn(0); 2877 } 2878 2879 #include "src/mat/utils/freespace.h" 2880 #include "petscbt.h" 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatMerge_SeqsToMPI" 2883 /*@C 2884 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2885 matrices from each processor 2886 2887 Collective on MPI_Comm 2888 2889 Input Parameters: 2890 + comm - the communicators the parallel matrix will live on 2891 . seqmat - the input sequential matrices 2892 . m - number of local rows (or PETSC_DECIDE) 2893 . n - number of local columns (or PETSC_DECIDE) 2894 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2895 2896 Output Parameter: 2897 . mpimat - the parallel matrix generated 2898 2899 Level: advanced 2900 2901 Notes: 2902 The dimensions of the sequential matrix in each processor MUST be the same. 2903 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2904 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2905 @*/ 2906 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 2907 { 2908 PetscErrorCode ierr; 2909 Mat B_seq,B_mpi; 2910 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2911 PetscMPIInt size,rank; 2912 int M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j; 2913 int tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra; 2914 int tagi,tagj,*len_si,*len_ri,*len_rj,*id_r,**buf_ri,**buf_rj; /* new! */ 2915 int **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj; 2916 MPI_Request *s_waits,*r_waits,*si_waits,*sj_waits,*ri_waits,*rj_waits,*s_waitsa,*r_waitsa; 2917 MPI_Status *status; 2918 MatScalar *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i; 2919 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2920 PetscBT lnkbt; 2921 Mat_Merge_SeqsToMPI *merge; 2922 PetscObjectContainer container; 2923 2924 2925 PetscFunctionBegin; 2926 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2927 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2928 /* ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] nnz(C): %d\n",rank,ai[M]); */ 2929 if (scall == MAT_INITIAL_MATRIX){ 2930 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2931 } else if (scall == MAT_REUSE_MATRIX){ 2932 B_mpi = *mpimat; 2933 ierr = PetscObjectQuery((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2934 if (container) { 2935 ierr = PetscObjectContainerGetPointer(container,(void *)&merge);CHKERRQ(ierr); 2936 } 2937 bi = merge->bi; 2938 bj = merge->bj; 2939 buf_ri = merge->buf_ri; 2940 buf_rj = merge->buf_rj; 2941 } else { 2942 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2943 } 2944 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2945 2946 /* determine the number of messages to send, their lengths */ 2947 /*---------------------------------------------------------*/ 2948 if (scall == MAT_INITIAL_MATRIX){ 2949 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 2950 if (m == PETSC_DECIDE) { 2951 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 2952 } else { 2953 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 2954 } 2955 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 2956 ierr = PetscMalloc(2*size*sizeof(int),&len_s);CHKERRQ(ierr); 2957 len_si = len_s + size; 2958 ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr); 2959 } 2960 if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2961 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2962 2963 len_sa = merge->len_sra; 2964 len_ra = merge->len_sra + size; 2965 2966 if (scall == MAT_INITIAL_MATRIX){ 2967 merge->nsend = 0; 2968 len = len_a = 0; 2969 for (proc=0; proc<size; proc++){ 2970 len_si[proc] = 0; /* new */ 2971 len_s[proc] = len_sa[proc] = 0; 2972 if (proc == rank) continue; 2973 len_si[proc] = owners[proc+1] - owners[proc] + 1; 2974 len_sa[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* rows sent to [proc] */ 2975 2976 if (len_sa[proc]) { 2977 merge->nsend++; 2978 len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc]; 2979 if (len < len_s[proc]) len = len_s[proc]; 2980 if (len_a < len_sa[proc]) len_a = len_sa[proc]; 2981 /* 2982 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] send len_si=%d to [%d]\n",rank,len_si[proc],proc); 2983 */ 2984 } 2985 } 2986 2987 /* determine the number and length of messages to receive */ 2988 /*--------------------------------------------------------*/ 2989 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 2990 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); /* rm! */ 2991 ierr = PetscFree(merge->id_r); /* rm! */ 2992 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_sa,&merge->id_r,&len_rj);CHKERRQ(ierr); 2993 2994 ierr = PetscMalloc(size*sizeof(int),&len_ri);CHKERRQ(ierr); 2995 ierr = PetscMemzero(len_ri,size*sizeof(int));CHKERRQ(ierr); 2996 2997 for (i=0; i<merge->nrecv; i++){ 2998 proc = merge->id_r[i]; 2999 len_ri[i] = owners[rank+1] - owners[rank] + 1; 3000 /* 3001 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_ri=%d len_rj=%d len_r=%d from [%d]\n",rank,len_ri[i],len_rj[i],len_r[i],proc); */ 3002 } 3003 3004 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3005 for (i=0; i<merge->nrecv; i++){ 3006 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: expects recv len_r=%d from [%d]\n",len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3007 } 3008 3009 /* post the Irecvs corresponding to these messages */ 3010 /*-------------------------------------------------*/ 3011 /* get new tag to keep the communication clean */ 3012 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr); 3013 ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr); 3014 3015 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3016 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,len_rj,&buf_rj,&rj_waits);CHKERRQ(ierr); 3017 3018 /* post the sends of ij-structure */ 3019 /*--------------------------------*/ 3020 ierr = PetscMalloc((3*merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3021 si_waits = s_waits + merge->nsend; 3022 sj_waits = si_waits + merge->nsend; 3023 ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr); 3024 k = 0; 3025 for (proc=0; proc<size; proc++){ 3026 if (!len_s[proc]) continue; 3027 /* form outgoing ij data to be sent to [proc] */ 3028 nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc]; 3029 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 3030 anzi = ai[i+1] - ai[i]; 3031 if (i==owners[proc]){ 3032 ijbuf_s[i-owners[proc]] = anzi; 3033 } else { 3034 ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi; 3035 } 3036 for (j=0; j <anzi; j++){ 3037 *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++; 3038 } 3039 } 3040 ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr); 3041 i = owners[proc]; 3042 ierr = MPI_Isend(aj+ai[i],len_sa[proc],MPI_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3043 k++; 3044 } 3045 3046 /* receives and sends of ij-structure are complete, deallocate memories */ 3047 /*----------------------------------------------------------------------*/ 3048 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 3049 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 3050 3051 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3052 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3053 3054 /* send and recv i-structure */ 3055 /*---------------------------*/ 3056 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3057 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3058 3059 k = 0; 3060 for (proc=0; proc<size; proc++){ 3061 if (!len_s[proc]) continue; 3062 i = owners[proc]; 3063 /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] send %d i-struct to [%d]\n",rank,len_si[proc],proc); */ 3064 ierr = MPI_Isend(ai+i,len_si[proc],MPI_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3065 k++; 3066 } 3067 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3068 /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] recv i-struct done\n",rank); */ 3069 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3070 3071 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3072 ierr = PetscFree(len_rj);CHKERRQ(ierr); 3073 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3074 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3075 3076 ierr = PetscFree(ijbuf_s);CHKERRQ(ierr); 3077 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3078 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3079 3080 /* create seq matrix B_seq in each processor */ 3081 /*-------------------------------------------*/ 3082 ierr = PetscFree(len_s);CHKERRQ(ierr); 3083 /* allocate bi array and free space for accumulating nonzero column info */ 3084 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3085 bi[0] = 0; 3086 3087 /* create and initialize a linked list */ 3088 nlnk = N+1; 3089 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3090 3091 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3092 len = 0; 3093 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3094 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3095 current_space = free_space; 3096 3097 /* determine symbolic info for each row of B_seq */ 3098 for (i=0;i<m;i++) { 3099 bnzi = 0; 3100 /* add local non-zero cols of this proc's seqmat into lnk */ 3101 arow = owners[rank] + i; 3102 anzi = ai[arow+1] - ai[arow]; 3103 aj = a->j + ai[arow]; 3104 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3105 bnzi += nlnk; 3106 /* add received col data into lnk */ 3107 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3108 /* i-th row */ 3109 anzi = *(buf_ri[k]+i+1) - *(buf_ri[k]+i); 3110 aj = buf_rj[k] + (*(buf_ri[k]+i) - *(buf_ri[k])); 3111 #ifdef OLD 3112 if (i == 0){ 3113 /* anzi = *(ijbuf_r[k]+i); */ 3114 aj = ijbuf_r[k] + m; 3115 } else { 3116 /* anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); */ 3117 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3118 } 3119 #endif 3120 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3121 bnzi += nlnk; 3122 } 3123 3124 /* if free space is not available, make more free space */ 3125 if (current_space->local_remaining<bnzi) { 3126 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3127 nspacedouble++; 3128 } 3129 /* copy data into free space, then initialize lnk */ 3130 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3131 current_space->array += bnzi; 3132 current_space->local_used += bnzi; 3133 current_space->local_remaining -= bnzi; 3134 3135 bi[i+1] = bi[i] + bnzi; 3136 } 3137 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3138 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3139 3140 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3141 3142 /* allocate space for ba */ 3143 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3144 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3145 3146 /* put together B_seq */ 3147 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3148 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3149 3150 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3151 /*--------------------------------------------------------------*/ 3152 ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr); 3153 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3154 merge->bi = bi; 3155 merge->bj = bj; 3156 merge->buf_ri = buf_ri; 3157 merge->buf_rj = buf_rj; 3158 merge->C_seq = seqmat; 3159 3160 /* attach the supporting struct to B_mpi for reuse */ 3161 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3162 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3163 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3164 *mpimat = B_mpi; 3165 } /* if (scall == MAT_INITIAL_MATRIX) */ 3166 3167 /* send and recv matrix values */ 3168 /*-----------------------------*/ 3169 if (scall == MAT_INITIAL_MATRIX){ 3170 for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */ 3171 ierr = PetscFree(len_r);CHKERRQ(ierr); 3172 } 3173 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 3174 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr); 3175 3176 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr); 3177 k = 0; 3178 for (proc=0; proc<size; proc++){ 3179 if (!len_sa[proc]) continue; 3180 i = owners[proc]; 3181 ierr = MPI_Isend(aa+ai[i],len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr); 3182 k++; 3183 } 3184 3185 ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr); 3186 ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr); 3187 ierr = PetscFree(status);CHKERRQ(ierr); 3188 3189 ierr = PetscFree(s_waitsa);CHKERRQ(ierr); 3190 ierr = PetscFree(r_waitsa);CHKERRQ(ierr); 3191 3192 /* insert mat values of B_mpi */ 3193 /*----------------------------*/ 3194 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3195 3196 /* set values of ba */ 3197 for (i=0; i<m; i++) { 3198 arow = owners[rank] + i; 3199 bj_i = bj+bi[i]; /* col indices of the i-th row of B_seq */ 3200 bnzi = bi[i+1] - bi[i]; 3201 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3202 3203 /* add local non-zero vals of this proc's seqmat into ba */ 3204 anzi = ai[arow+1] - ai[arow]; 3205 aj = a->j + ai[arow]; 3206 aa = a->a + ai[arow]; 3207 nextaj = 0; 3208 for (j=0; nextaj<anzi; j++){ 3209 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3210 ba_i[j] += aa[nextaj++]; 3211 } 3212 } 3213 3214 /* add received vals into ba */ 3215 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3216 /* i-th row */ 3217 anzi = *(buf_ri[k]+i+1) - *(buf_ri[k]+i); 3218 aj = buf_rj[k] + (*(buf_ri[k]+i) - *(buf_ri[k])); 3219 aa = abuf_r[k] + (*(buf_ri[k]+i) - *(buf_ri[k])); 3220 #ifdef OLD 3221 if (i == 0){ 3222 /* anzi = *(ijbuf_r[k]+i); */ 3223 /* aj = ijbuf_r[k] + m; */ 3224 aa = abuf_r[k]; 3225 } else { 3226 /* anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); */ 3227 /* aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); */ 3228 aa = abuf_r[k] + *(ijbuf_r[k]+i-1); 3229 } 3230 #endif 3231 nextaj = 0; 3232 for (j=0; nextaj<anzi; j++){ 3233 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3234 ba_i[j] += aa[nextaj++]; 3235 } 3236 } 3237 } 3238 3239 ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3240 } 3241 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3242 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3243 3244 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3245 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3246 if (scall == MAT_INITIAL_MATRIX){ 3247 ierr = PetscFree(ijbuf_r); /* rm! */ 3248 } 3249 PetscFunctionReturn(0); 3250 } 3251 3252 #undef __FUNCT__ 3253 #define __FUNCT__ "MatGetLocalMat" 3254 /*@C 3255 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3256 3257 Collective on Mat 3258 3259 Input Parameters: 3260 + A - the matrix 3261 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3262 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3263 3264 Output Parameter: 3265 . A_loc - the local sequential matrix generated 3266 3267 Level: developer 3268 3269 @*/ 3270 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3271 { 3272 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3273 PetscErrorCode ierr; 3274 int *idx,i,start,end,ncols,nzA,nzB,*cmap,imark; 3275 IS isrowa,iscola; 3276 Mat *aloc; 3277 3278 PetscFunctionBegin; 3279 if (!row){ 3280 start = a->rstart; end = a->rend; 3281 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3282 } else { 3283 isrowa = *row; 3284 } 3285 if (!col){ 3286 start = a->cstart; 3287 cmap = a->garray; 3288 nzA = a->A->n; 3289 nzB = a->B->n; 3290 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3291 ncols = 0; 3292 for (i=0; i<nzB; i++) { 3293 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3294 else break; 3295 } 3296 imark = i; 3297 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3298 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3299 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3300 ierr = PetscFree(idx);CHKERRQ(ierr); 3301 } else { 3302 iscola = *col; 3303 } 3304 if (scall != MAT_INITIAL_MATRIX){ 3305 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3306 aloc[0] = *A_loc; 3307 } 3308 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3309 *A_loc = aloc[0]; 3310 ierr = PetscFree(aloc);CHKERRQ(ierr); 3311 if (!row){ 3312 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3313 } 3314 if (!col){ 3315 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3316 } 3317 PetscFunctionReturn(0); 3318 } 3319 3320 #undef __FUNCT__ 3321 #define __FUNCT__ "MatGetBrowsOfAcols" 3322 /*@C 3323 MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A 3324 3325 Collective on Mat 3326 3327 Input Parameters: 3328 + A,B - the matrices 3329 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3330 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3331 3332 Output Parameter: 3333 + rowb, colb - index sets of rows and columns of B to extract 3334 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3335 - B_seq - the sequential matrix generated 3336 3337 Level: developer 3338 3339 @*/ 3340 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3341 { 3342 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3343 PetscErrorCode ierr; 3344 int *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3345 IS isrowb,iscolb; 3346 Mat *bseq; 3347 3348 PetscFunctionBegin; 3349 if (a->cstart != b->rstart || a->cend != b->rend){ 3350 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3351 } 3352 3353 if (scall == MAT_INITIAL_MATRIX){ 3354 start = a->cstart; 3355 cmap = a->garray; 3356 nzA = a->A->n; 3357 nzB = a->B->n; 3358 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3359 ncols = 0; 3360 for (i=0; i<nzB; i++) { 3361 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3362 else break; 3363 } 3364 imark = i; 3365 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3366 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3367 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3368 ierr = PetscFree(idx);CHKERRQ(ierr); 3369 *brstart = imark; 3370 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3371 } else { 3372 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3373 isrowb = *rowb; iscolb = *colb; 3374 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3375 bseq[0] = *B_seq; 3376 } 3377 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3378 *B_seq = bseq[0]; 3379 ierr = PetscFree(bseq);CHKERRQ(ierr); 3380 if (!rowb){ 3381 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3382 } else { 3383 *rowb = isrowb; 3384 } 3385 if (!colb){ 3386 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3387 } else { 3388 *colb = iscolb; 3389 } 3390 3391 PetscFunctionReturn(0); 3392 } 3393