1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,rmax1,aa,ai,aj,am,rp1,ap1,aimax);\ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,rmax2,ba,bi,bj,bm,rp2,ap2,bimax);\ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIAIJ" 107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 110 PetscScalar value; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 113 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 114 PetscTruth roworiented = aij->roworiented; 115 116 /* Some Variables required in the macro */ 117 Mat A = aij->A; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 120 PetscScalar *aa = a->a; 121 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 122 Mat B = aij->B; 123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 124 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 125 PetscScalar *ba = b->a; 126 127 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 128 PetscInt nonew = a->nonew; 129 PetscScalar *ap1,*ap2; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (im[i] < 0) continue; 134 #if defined(PETSC_USE_DEBUG) 135 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 136 #endif 137 if (im[i] >= rstart && im[i] < rend) { 138 row = im[i] - rstart; 139 lastcol1 = -1; 140 rp1 = aj + ai[row]; 141 ap1 = aa + ai[row]; 142 rmax1 = aimax[row]; 143 nrow1 = ailen[row]; 144 low1 = 0; 145 high1 = nrow1; 146 lastcol2 = -1; 147 rp2 = bj + bi[row]; 148 ap2 = ba + bi[row]; 149 rmax2 = bimax[row]; 150 nrow2 = bilen[row]; 151 low2 = 0; 152 high2 = nrow2; 153 154 for (j=0; j<n; j++) { 155 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 157 if (in[j] >= cstart && in[j] < cend){ 158 col = in[j] - cstart; 159 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 160 } else if (in[j] < 0) continue; 161 #if defined(PETSC_USE_DEBUG) 162 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 163 #endif 164 else { 165 if (mat->was_assembled) { 166 if (!aij->colmap) { 167 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 168 } 169 #if defined (PETSC_USE_CTABLE) 170 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 171 col--; 172 #else 173 col = aij->colmap[in[j]] - 1; 174 #endif 175 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 176 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 177 col = in[j]; 178 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 179 B = aij->B; 180 b = (Mat_SeqAIJ*)B->data; 181 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 182 rp2 = bj + bi[row]; 183 ap2 = ba + bi[row]; 184 rmax2 = bimax[row]; 185 nrow2 = bilen[row]; 186 low2 = 0; 187 high2 = nrow2; 188 bm = aij->B->m; 189 ba = b->a; 190 } 191 } else col = in[j]; 192 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 193 } 194 } 195 } else { 196 if (!aij->donotstash) { 197 if (roworiented) { 198 if (ignorezeroentries && v[i*n] == 0.0) continue; 199 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 200 } else { 201 if (ignorezeroentries && v[i] == 0.0) continue; 202 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 203 } 204 } 205 } 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatGetValues_MPIAIJ" 213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 214 { 215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 216 PetscErrorCode ierr; 217 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 218 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 219 220 PetscFunctionBegin; 221 for (i=0; i<m; i++) { 222 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 223 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 224 if (idxm[i] >= rstart && idxm[i] < rend) { 225 row = idxm[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 228 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 229 if (idxn[j] >= cstart && idxn[j] < cend){ 230 col = idxn[j] - cstart; 231 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 232 } else { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 238 col --; 239 #else 240 col = aij->colmap[idxn[j]] - 1; 241 #endif 242 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 243 else { 244 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 245 } 246 } 247 } 248 } else { 249 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt nstash,reallocs; 262 InsertMode addv; 263 264 PetscFunctionBegin; 265 if (aij->donotstash) { 266 PetscFunctionReturn(0); 267 } 268 269 /* make sure all processors are either in INSERTMODE or ADDMODE */ 270 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 271 if (addv == (ADD_VALUES|INSERT_VALUES)) { 272 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 273 } 274 mat->insertmode = addv; /* in case this processor had no cache */ 275 276 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 277 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 278 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 288 PetscErrorCode ierr; 289 PetscMPIInt n; 290 PetscInt i,j,rstart,ncols,flg; 291 PetscInt *row,*col,other_disassembled; 292 PetscScalar *val; 293 InsertMode addv = mat->insertmode; 294 295 PetscFunctionBegin; 296 if (!aij->donotstash) { 297 while (1) { 298 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 299 if (!flg) break; 300 301 for (i=0; i<n;) { 302 /* Now identify the consecutive vals belonging to the same row */ 303 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 304 if (j < n) ncols = j-i; 305 else ncols = n-i; 306 /* Now assemble all these values with a single function call */ 307 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 308 i = j; 309 } 310 } 311 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 312 } 313 a->compressedrow.use = PETSC_FALSE; 314 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 316 317 /* determine if any processor has disassembled, if so we must 318 also disassemble ourselfs, in order that we may reassemble. */ 319 /* 320 if nonzero structure of submatrix B cannot change then we know that 321 no processor disassembled thus we can skip this stuff 322 */ 323 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 324 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 325 if (mat->was_assembled && !other_disassembled) { 326 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 327 } 328 } 329 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 330 b = (Mat_SeqAIJ *)aij->B->data; 331 332 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 333 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 334 } 335 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 336 b->compressedrow.use = PETSC_TRUE; 337 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 339 340 if (aij->rowvalues) { 341 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 342 aij->rowvalues = 0; 343 } 344 345 /* used by MatAXPY() */ 346 a->xtoy = 0; b->xtoy = 0; 347 a->XtoY = 0; b->XtoY = 0; 348 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 355 { 356 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 361 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatZeroRows_MPIAIJ" 367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 368 { 369 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 372 PetscInt i,N,*rows,*owners = l->rowners; 373 PetscInt *nprocs,j,idx,nsends,row; 374 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 375 PetscInt *rvalues,count,base,slen,*source; 376 PetscInt *lens,*lrows,*values,rstart=l->rstart; 377 MPI_Comm comm = A->comm; 378 MPI_Request *send_waits,*recv_waits; 379 MPI_Status recv_status,*send_status; 380 IS istmp; 381 #if defined(PETSC_DEBUG) 382 PetscTruth found = PETSC_FALSE; 383 #endif 384 385 PetscFunctionBegin; 386 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 387 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 388 389 /* first count number of contributors to each processor */ 390 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 391 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 392 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 393 j = 0; 394 for (i=0; i<N; i++) { 395 if (lastidx > (idx = rows[i])) j = 0; 396 lastidx = idx; 397 for (; j<size; j++) { 398 if (idx >= owners[j] && idx < owners[j+1]) { 399 nprocs[2*j]++; 400 nprocs[2*j+1] = 1; 401 owner[i] = j; 402 #if defined(PETSC_DEBUG) 403 found = PETSC_TRUE; 404 #endif 405 break; 406 } 407 } 408 #if defined(PETSC_DEBUG) 409 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 410 found = PETSC_FALSE; 411 #endif 412 } 413 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 414 415 /* inform other processors of number of messages and max length*/ 416 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 417 418 /* post receives: */ 419 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 420 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 421 for (i=0; i<nrecvs; i++) { 422 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 423 } 424 425 /* do sends: 426 1) starts[i] gives the starting index in svalues for stuff going to 427 the ith processor 428 */ 429 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 430 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 431 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 432 starts[0] = 0; 433 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 434 for (i=0; i<N; i++) { 435 svalues[starts[owner[i]]++] = rows[i]; 436 } 437 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 438 439 starts[0] = 0; 440 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 441 count = 0; 442 for (i=0; i<size; i++) { 443 if (nprocs[2*i+1]) { 444 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 445 } 446 } 447 ierr = PetscFree(starts);CHKERRQ(ierr); 448 449 base = owners[rank]; 450 451 /* wait on receives */ 452 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 453 source = lens + nrecvs; 454 count = nrecvs; slen = 0; 455 while (count) { 456 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 457 /* unpack receives into our local space */ 458 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 459 source[imdex] = recv_status.MPI_SOURCE; 460 lens[imdex] = n; 461 slen += n; 462 count--; 463 } 464 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 465 466 /* move the data into the send scatter */ 467 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 468 count = 0; 469 for (i=0; i<nrecvs; i++) { 470 values = rvalues + i*nmax; 471 for (j=0; j<lens[i]; j++) { 472 lrows[count++] = values[j] - base; 473 } 474 } 475 ierr = PetscFree(rvalues);CHKERRQ(ierr); 476 ierr = PetscFree(lens);CHKERRQ(ierr); 477 ierr = PetscFree(owner);CHKERRQ(ierr); 478 ierr = PetscFree(nprocs);CHKERRQ(ierr); 479 480 /* actually zap the local rows */ 481 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 482 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 483 484 /* 485 Zero the required rows. If the "diagonal block" of the matrix 486 is square and the user wishes to set the diagonal we use seperate 487 code so that MatSetValues() is not called for each diagonal allocating 488 new memory, thus calling lots of mallocs and slowing things down. 489 490 Contributed by: Mathew Knepley 491 */ 492 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 493 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 494 if (diag && (l->A->M == l->A->N)) { 495 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 496 } else if (diag) { 497 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 498 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 499 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 500 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 501 } 502 for (i = 0; i < slen; i++) { 503 row = lrows[i] + rstart; 504 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 505 } 506 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 507 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 508 } else { 509 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 510 } 511 ierr = ISDestroy(istmp);CHKERRQ(ierr); 512 ierr = PetscFree(lrows);CHKERRQ(ierr); 513 514 /* wait on sends */ 515 if (nsends) { 516 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 517 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 518 ierr = PetscFree(send_status);CHKERRQ(ierr); 519 } 520 ierr = PetscFree(send_waits);CHKERRQ(ierr); 521 ierr = PetscFree(svalues);CHKERRQ(ierr); 522 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatMult_MPIAIJ" 528 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 529 { 530 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 531 PetscErrorCode ierr; 532 PetscInt nt; 533 534 PetscFunctionBegin; 535 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 536 if (nt != A->n) { 537 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 538 } 539 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 540 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 541 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 542 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatMultAdd_MPIAIJ" 548 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 563 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscTruth merged; 568 569 PetscFunctionBegin; 570 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 571 /* do nondiagonal part */ 572 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 573 if (!merged) { 574 /* send it on its way */ 575 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 576 /* do local part */ 577 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 578 /* receive remote parts: note this assumes the values are not actually */ 579 /* added in yy until the next line, */ 580 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 581 } else { 582 /* do local part */ 583 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 584 /* send it on its way */ 585 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 586 /* values actually were received in the Begin() but we need to call this nop */ 587 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 588 } 589 PetscFunctionReturn(0); 590 } 591 592 EXTERN_C_BEGIN 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 595 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 596 { 597 MPI_Comm comm; 598 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 599 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 600 IS Me,Notme; 601 PetscErrorCode ierr; 602 PetscInt M,N,first,last,*notme,i; 603 PetscMPIInt size; 604 605 PetscFunctionBegin; 606 607 /* Easy test: symmetric diagonal block */ 608 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 609 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 610 if (!*f) PetscFunctionReturn(0); 611 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 612 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 613 if (size == 1) PetscFunctionReturn(0); 614 615 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 616 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 617 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 618 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 619 for (i=0; i<first; i++) notme[i] = i; 620 for (i=last; i<M; i++) notme[i-last+first] = i; 621 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 622 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 623 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 624 Aoff = Aoffs[0]; 625 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 626 Boff = Boffs[0]; 627 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 628 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 629 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 630 ierr = ISDestroy(Me);CHKERRQ(ierr); 631 ierr = ISDestroy(Notme);CHKERRQ(ierr); 632 633 PetscFunctionReturn(0); 634 } 635 EXTERN_C_END 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 639 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 640 { 641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 642 PetscErrorCode ierr; 643 644 PetscFunctionBegin; 645 /* do nondiagonal part */ 646 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 647 /* send it on its way */ 648 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 649 /* do local part */ 650 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 651 /* receive remote parts */ 652 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This only works correctly for square matrices where the subblock A->A is the 658 diagonal block 659 */ 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 662 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 663 { 664 PetscErrorCode ierr; 665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 666 667 PetscFunctionBegin; 668 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 669 if (a->rstart != a->cstart || a->rend != a->cend) { 670 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 671 } 672 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatScale_MPIAIJ" 678 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 685 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatDestroy_MPIAIJ" 691 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 692 { 693 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 #if defined(PETSC_USE_LOG) 698 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 699 #endif 700 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 701 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 703 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 704 #if defined (PETSC_USE_CTABLE) 705 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 706 #else 707 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 708 #endif 709 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 710 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 711 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 712 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 713 ierr = PetscFree(aij);CHKERRQ(ierr); 714 715 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 716 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 717 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 718 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 719 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 720 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 721 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatView_MPIAIJ_Binary" 727 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 731 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 732 PetscErrorCode ierr; 733 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 734 int fd; 735 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 736 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 737 PetscScalar *column_values; 738 739 PetscFunctionBegin; 740 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 741 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 742 nz = A->nz + B->nz; 743 if (!rank) { 744 header[0] = MAT_FILE_COOKIE; 745 header[1] = mat->M; 746 header[2] = mat->N; 747 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 748 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 749 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 750 /* get largest number of rows any processor has */ 751 rlen = mat->m; 752 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 753 for (i=1; i<size; i++) { 754 rlen = PetscMax(rlen,range[i+1] - range[i]); 755 } 756 } else { 757 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 758 rlen = mat->m; 759 } 760 761 /* load up the local row counts */ 762 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 763 for (i=0; i<mat->m; i++) { 764 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 765 } 766 767 /* store the row lengths to the file */ 768 if (!rank) { 769 MPI_Status status; 770 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 771 for (i=1; i<size; i++) { 772 rlen = range[i+1] - range[i]; 773 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 774 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 775 } 776 } else { 777 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 778 } 779 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 780 781 /* load up the local column indices */ 782 nzmax = nz; /* )th processor needs space a largest processor needs */ 783 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 784 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 785 cnt = 0; 786 for (i=0; i<mat->m; i++) { 787 for (j=B->i[i]; j<B->i[i+1]; j++) { 788 if ( (col = garray[B->j[j]]) > cstart) break; 789 column_indices[cnt++] = col; 790 } 791 for (k=A->i[i]; k<A->i[i+1]; k++) { 792 column_indices[cnt++] = A->j[k] + cstart; 793 } 794 for (; j<B->i[i+1]; j++) { 795 column_indices[cnt++] = garray[B->j[j]]; 796 } 797 } 798 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 799 800 /* store the column indices to the file */ 801 if (!rank) { 802 MPI_Status status; 803 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 804 for (i=1; i<size; i++) { 805 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 806 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 807 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 808 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 809 } 810 } else { 811 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 812 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 813 } 814 ierr = PetscFree(column_indices);CHKERRQ(ierr); 815 816 /* load up the local column values */ 817 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 818 cnt = 0; 819 for (i=0; i<mat->m; i++) { 820 for (j=B->i[i]; j<B->i[i+1]; j++) { 821 if ( garray[B->j[j]] > cstart) break; 822 column_values[cnt++] = B->a[j]; 823 } 824 for (k=A->i[i]; k<A->i[i+1]; k++) { 825 column_values[cnt++] = A->a[k]; 826 } 827 for (; j<B->i[i+1]; j++) { 828 column_values[cnt++] = B->a[j]; 829 } 830 } 831 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 832 833 /* store the column values to the file */ 834 if (!rank) { 835 MPI_Status status; 836 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 837 for (i=1; i<size; i++) { 838 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 839 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 840 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 841 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 842 } 843 } else { 844 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 845 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 846 } 847 ierr = PetscFree(column_values);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 853 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 854 { 855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 856 PetscErrorCode ierr; 857 PetscMPIInt rank = aij->rank,size = aij->size; 858 PetscTruth isdraw,iascii,isbinary; 859 PetscViewer sviewer; 860 PetscViewerFormat format; 861 862 PetscFunctionBegin; 863 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 864 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 865 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 866 if (iascii) { 867 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 868 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 869 MatInfo info; 870 PetscTruth inodes; 871 872 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 873 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 875 if (!inodes) { 876 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 877 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 878 } else { 879 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 880 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 881 } 882 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 884 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 885 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 887 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } else if (format == PETSC_VIEWER_ASCII_INFO) { 890 PetscInt inodecount,inodelimit,*inodes; 891 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 892 if (inodes) { 893 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 894 } else { 895 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 899 PetscFunctionReturn(0); 900 } 901 } else if (isbinary) { 902 if (size == 1) { 903 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 904 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 905 } else { 906 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } else if (isdraw) { 910 PetscDraw draw; 911 PetscTruth isnull; 912 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 913 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 914 } 915 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 /* assemble the entire matrix onto first processor. */ 921 Mat A; 922 Mat_SeqAIJ *Aloc; 923 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 924 PetscScalar *a; 925 926 if (!rank) { 927 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 928 } else { 929 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 930 } 931 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 932 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 933 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 934 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 935 936 /* copy over the A part */ 937 Aloc = (Mat_SeqAIJ*)aij->A->data; 938 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 939 row = aij->rstart; 940 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 941 for (i=0; i<m; i++) { 942 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 943 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 944 } 945 aj = Aloc->j; 946 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 947 948 /* copy over the B part */ 949 Aloc = (Mat_SeqAIJ*)aij->B->data; 950 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 951 row = aij->rstart; 952 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 953 ct = cols; 954 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 955 for (i=0; i<m; i++) { 956 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 957 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 958 } 959 ierr = PetscFree(ct);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 /* 963 Everyone has to call to draw the matrix since the graphics waits are 964 synchronized across all processors that share the PetscDraw object 965 */ 966 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 967 if (!rank) { 968 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 969 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 970 } 971 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 972 ierr = MatDestroy(A);CHKERRQ(ierr); 973 } 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatView_MPIAIJ" 979 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 980 { 981 PetscErrorCode ierr; 982 PetscTruth iascii,isdraw,issocket,isbinary; 983 984 PetscFunctionBegin; 985 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 986 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 987 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 988 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 989 if (iascii || isdraw || isbinary || issocket) { 990 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 991 } else { 992 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 998 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatRelax_MPIAIJ" 1001 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1002 { 1003 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1004 PetscErrorCode ierr; 1005 Vec bb1; 1006 PetscScalar mone=-1.0; 1007 1008 PetscFunctionBegin; 1009 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1010 1011 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1012 1013 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1014 if (flag & SOR_ZERO_INITIAL_GUESS) { 1015 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1016 its--; 1017 } 1018 1019 while (its--) { 1020 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1022 1023 /* update rhs: bb1 = bb - B*x */ 1024 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1025 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1026 1027 /* local sweep */ 1028 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1029 CHKERRQ(ierr); 1030 } 1031 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1032 if (flag & SOR_ZERO_INITIAL_GUESS) { 1033 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1034 its--; 1035 } 1036 while (its--) { 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 1040 /* update rhs: bb1 = bb - B*x */ 1041 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1042 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1043 1044 /* local sweep */ 1045 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1046 CHKERRQ(ierr); 1047 } 1048 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1049 if (flag & SOR_ZERO_INITIAL_GUESS) { 1050 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1051 its--; 1052 } 1053 while (its--) { 1054 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 1057 /* update rhs: bb1 = bb - B*x */ 1058 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1060 1061 /* local sweep */ 1062 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1063 CHKERRQ(ierr); 1064 } 1065 } else { 1066 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1067 } 1068 1069 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1075 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1076 { 1077 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1078 Mat A = mat->A,B = mat->B; 1079 PetscErrorCode ierr; 1080 PetscReal isend[5],irecv[5]; 1081 1082 PetscFunctionBegin; 1083 info->block_size = 1.0; 1084 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1085 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1086 isend[3] = info->memory; isend[4] = info->mallocs; 1087 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1088 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1089 isend[3] += info->memory; isend[4] += info->mallocs; 1090 if (flag == MAT_LOCAL) { 1091 info->nz_used = isend[0]; 1092 info->nz_allocated = isend[1]; 1093 info->nz_unneeded = isend[2]; 1094 info->memory = isend[3]; 1095 info->mallocs = isend[4]; 1096 } else if (flag == MAT_GLOBAL_MAX) { 1097 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1098 info->nz_used = irecv[0]; 1099 info->nz_allocated = irecv[1]; 1100 info->nz_unneeded = irecv[2]; 1101 info->memory = irecv[3]; 1102 info->mallocs = irecv[4]; 1103 } else if (flag == MAT_GLOBAL_SUM) { 1104 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1105 info->nz_used = irecv[0]; 1106 info->nz_allocated = irecv[1]; 1107 info->nz_unneeded = irecv[2]; 1108 info->memory = irecv[3]; 1109 info->mallocs = irecv[4]; 1110 } 1111 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1112 info->fill_ratio_needed = 0; 1113 info->factor_mallocs = 0; 1114 info->rows_global = (double)matin->M; 1115 info->columns_global = (double)matin->N; 1116 info->rows_local = (double)matin->m; 1117 info->columns_local = (double)matin->N; 1118 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatSetOption_MPIAIJ" 1124 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 switch (op) { 1131 case MAT_NO_NEW_NONZERO_LOCATIONS: 1132 case MAT_YES_NEW_NONZERO_LOCATIONS: 1133 case MAT_COLUMNS_UNSORTED: 1134 case MAT_COLUMNS_SORTED: 1135 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1136 case MAT_KEEP_ZEROED_ROWS: 1137 case MAT_NEW_NONZERO_LOCATION_ERR: 1138 case MAT_USE_INODES: 1139 case MAT_DO_NOT_USE_INODES: 1140 case MAT_IGNORE_ZERO_ENTRIES: 1141 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1142 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1143 break; 1144 case MAT_ROW_ORIENTED: 1145 a->roworiented = PETSC_TRUE; 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 break; 1149 case MAT_ROWS_SORTED: 1150 case MAT_ROWS_UNSORTED: 1151 case MAT_YES_NEW_DIAGONALS: 1152 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1153 break; 1154 case MAT_COLUMN_ORIENTED: 1155 a->roworiented = PETSC_FALSE; 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 break; 1159 case MAT_IGNORE_OFF_PROC_ENTRIES: 1160 a->donotstash = PETSC_TRUE; 1161 break; 1162 case MAT_NO_NEW_DIAGONALS: 1163 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1164 case MAT_SYMMETRIC: 1165 case MAT_STRUCTURALLY_SYMMETRIC: 1166 case MAT_HERMITIAN: 1167 case MAT_SYMMETRY_ETERNAL: 1168 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1169 break; 1170 case MAT_NOT_SYMMETRIC: 1171 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1172 case MAT_NOT_HERMITIAN: 1173 case MAT_NOT_SYMMETRY_ETERNAL: 1174 break; 1175 default: 1176 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatGetRow_MPIAIJ" 1183 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1184 { 1185 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1186 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1187 PetscErrorCode ierr; 1188 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1189 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1190 PetscInt *cmap,*idx_p; 1191 1192 PetscFunctionBegin; 1193 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1194 mat->getrowactive = PETSC_TRUE; 1195 1196 if (!mat->rowvalues && (idx || v)) { 1197 /* 1198 allocate enough space to hold information from the longest row. 1199 */ 1200 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1201 PetscInt max = 1,tmp; 1202 for (i=0; i<matin->m; i++) { 1203 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1204 if (max < tmp) { max = tmp; } 1205 } 1206 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1207 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1208 } 1209 1210 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1211 lrow = row - rstart; 1212 1213 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1214 if (!v) {pvA = 0; pvB = 0;} 1215 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1216 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1218 nztot = nzA + nzB; 1219 1220 cmap = mat->garray; 1221 if (v || idx) { 1222 if (nztot) { 1223 /* Sort by increasing column numbers, assuming A and B already sorted */ 1224 PetscInt imark = -1; 1225 if (v) { 1226 *v = v_p = mat->rowvalues; 1227 for (i=0; i<nzB; i++) { 1228 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1229 else break; 1230 } 1231 imark = i; 1232 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1233 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1234 } 1235 if (idx) { 1236 *idx = idx_p = mat->rowindices; 1237 if (imark > -1) { 1238 for (i=0; i<imark; i++) { 1239 idx_p[i] = cmap[cworkB[i]]; 1240 } 1241 } else { 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1244 else break; 1245 } 1246 imark = i; 1247 } 1248 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1249 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1250 } 1251 } else { 1252 if (idx) *idx = 0; 1253 if (v) *v = 0; 1254 } 1255 } 1256 *nz = nztot; 1257 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1258 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1264 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1267 1268 PetscFunctionBegin; 1269 if (!aij->getrowactive) { 1270 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1271 } 1272 aij->getrowactive = PETSC_FALSE; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatNorm_MPIAIJ" 1278 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1279 { 1280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1281 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1282 PetscErrorCode ierr; 1283 PetscInt i,j,cstart = aij->cstart; 1284 PetscReal sum = 0.0; 1285 PetscScalar *v; 1286 1287 PetscFunctionBegin; 1288 if (aij->size == 1) { 1289 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1290 } else { 1291 if (type == NORM_FROBENIUS) { 1292 v = amat->a; 1293 for (i=0; i<amat->nz; i++) { 1294 #if defined(PETSC_USE_COMPLEX) 1295 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1296 #else 1297 sum += (*v)*(*v); v++; 1298 #endif 1299 } 1300 v = bmat->a; 1301 for (i=0; i<bmat->nz; i++) { 1302 #if defined(PETSC_USE_COMPLEX) 1303 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1304 #else 1305 sum += (*v)*(*v); v++; 1306 #endif 1307 } 1308 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1309 *norm = sqrt(*norm); 1310 } else if (type == NORM_1) { /* max column norm */ 1311 PetscReal *tmp,*tmp2; 1312 PetscInt *jj,*garray = aij->garray; 1313 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1314 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1315 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1316 *norm = 0.0; 1317 v = amat->a; jj = amat->j; 1318 for (j=0; j<amat->nz; j++) { 1319 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1320 } 1321 v = bmat->a; jj = bmat->j; 1322 for (j=0; j<bmat->nz; j++) { 1323 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1324 } 1325 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1326 for (j=0; j<mat->N; j++) { 1327 if (tmp2[j] > *norm) *norm = tmp2[j]; 1328 } 1329 ierr = PetscFree(tmp);CHKERRQ(ierr); 1330 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1331 } else if (type == NORM_INFINITY) { /* max row norm */ 1332 PetscReal ntemp = 0.0; 1333 for (j=0; j<aij->A->m; j++) { 1334 v = amat->a + amat->i[j]; 1335 sum = 0.0; 1336 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1337 sum += PetscAbsScalar(*v); v++; 1338 } 1339 v = bmat->a + bmat->i[j]; 1340 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1341 sum += PetscAbsScalar(*v); v++; 1342 } 1343 if (sum > ntemp) ntemp = sum; 1344 } 1345 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1346 } else { 1347 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1348 } 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatTranspose_MPIAIJ" 1355 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1356 { 1357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1358 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1359 PetscErrorCode ierr; 1360 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1361 Mat B; 1362 PetscScalar *array; 1363 1364 PetscFunctionBegin; 1365 if (!matout && M != N) { 1366 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1367 } 1368 1369 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1370 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1371 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1372 1373 /* copy over the A part */ 1374 Aloc = (Mat_SeqAIJ*)a->A->data; 1375 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1376 row = a->rstart; 1377 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1378 for (i=0; i<m; i++) { 1379 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1380 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1381 } 1382 aj = Aloc->j; 1383 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1384 1385 /* copy over the B part */ 1386 Aloc = (Mat_SeqAIJ*)a->B->data; 1387 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1388 row = a->rstart; 1389 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1390 ct = cols; 1391 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1392 for (i=0; i<m; i++) { 1393 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1394 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1395 } 1396 ierr = PetscFree(ct);CHKERRQ(ierr); 1397 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 if (matout) { 1400 *matout = B; 1401 } else { 1402 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1409 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1410 { 1411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1412 Mat a = aij->A,b = aij->B; 1413 PetscErrorCode ierr; 1414 PetscInt s1,s2,s3; 1415 1416 PetscFunctionBegin; 1417 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1418 if (rr) { 1419 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1420 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1421 /* Overlap communication with computation. */ 1422 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1423 } 1424 if (ll) { 1425 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1426 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1427 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1428 } 1429 /* scale the diagonal block */ 1430 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1431 1432 if (rr) { 1433 /* Do a scatter end and then right scale the off-diagonal block */ 1434 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1435 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1436 } 1437 1438 PetscFunctionReturn(0); 1439 } 1440 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1444 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1445 { 1446 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 if (!a->rank) { 1451 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1458 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1459 { 1460 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1461 PetscErrorCode ierr; 1462 1463 PetscFunctionBegin; 1464 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1465 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1470 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1471 { 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatEqual_MPIAIJ" 1482 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1483 { 1484 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1485 Mat a,b,c,d; 1486 PetscTruth flg; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 a = matA->A; b = matA->B; 1491 c = matB->A; d = matB->B; 1492 1493 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1494 if (flg) { 1495 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1496 } 1497 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatCopy_MPIAIJ" 1503 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1504 { 1505 PetscErrorCode ierr; 1506 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1507 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1508 1509 PetscFunctionBegin; 1510 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1511 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1512 /* because of the column compression in the off-processor part of the matrix a->B, 1513 the number of columns in a->B and b->B may be different, hence we cannot call 1514 the MatCopy() directly on the two parts. If need be, we can provide a more 1515 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1516 then copying the submatrices */ 1517 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1518 } else { 1519 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1520 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1527 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1528 { 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #include "petscblaslapack.h" 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatAXPY_MPIAIJ" 1539 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1540 { 1541 PetscErrorCode ierr; 1542 PetscInt i; 1543 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1544 PetscBLASInt bnz,one=1; 1545 Mat_SeqAIJ *x,*y; 1546 1547 PetscFunctionBegin; 1548 if (str == SAME_NONZERO_PATTERN) { 1549 x = (Mat_SeqAIJ *)xx->A->data; 1550 y = (Mat_SeqAIJ *)yy->A->data; 1551 bnz = (PetscBLASInt)x->nz; 1552 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1553 x = (Mat_SeqAIJ *)xx->B->data; 1554 y = (Mat_SeqAIJ *)yy->B->data; 1555 bnz = (PetscBLASInt)x->nz; 1556 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1557 } else if (str == SUBSET_NONZERO_PATTERN) { 1558 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1559 1560 x = (Mat_SeqAIJ *)xx->B->data; 1561 y = (Mat_SeqAIJ *)yy->B->data; 1562 if (y->xtoy && y->XtoY != xx->B) { 1563 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1564 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1565 } 1566 if (!y->xtoy) { /* get xtoy */ 1567 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1568 y->XtoY = xx->B; 1569 } 1570 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1571 } else { 1572 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1573 } 1574 PetscFunctionReturn(0); 1575 } 1576 1577 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatConjugate_MPIAIJ" 1581 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1582 { 1583 #if defined(PETSC_USE_COMPLEX) 1584 PetscErrorCode ierr; 1585 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1586 1587 PetscFunctionBegin; 1588 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1589 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1590 #else 1591 PetscFunctionBegin; 1592 #endif 1593 PetscFunctionReturn(0); 1594 } 1595 1596 /* -------------------------------------------------------------------*/ 1597 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1598 MatGetRow_MPIAIJ, 1599 MatRestoreRow_MPIAIJ, 1600 MatMult_MPIAIJ, 1601 /* 4*/ MatMultAdd_MPIAIJ, 1602 MatMultTranspose_MPIAIJ, 1603 MatMultTransposeAdd_MPIAIJ, 1604 0, 1605 0, 1606 0, 1607 /*10*/ 0, 1608 0, 1609 0, 1610 MatRelax_MPIAIJ, 1611 MatTranspose_MPIAIJ, 1612 /*15*/ MatGetInfo_MPIAIJ, 1613 MatEqual_MPIAIJ, 1614 MatGetDiagonal_MPIAIJ, 1615 MatDiagonalScale_MPIAIJ, 1616 MatNorm_MPIAIJ, 1617 /*20*/ MatAssemblyBegin_MPIAIJ, 1618 MatAssemblyEnd_MPIAIJ, 1619 0, 1620 MatSetOption_MPIAIJ, 1621 MatZeroEntries_MPIAIJ, 1622 /*25*/ MatZeroRows_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 0, 1627 /*30*/ MatSetUpPreallocation_MPIAIJ, 1628 0, 1629 0, 1630 0, 1631 0, 1632 /*35*/ MatDuplicate_MPIAIJ, 1633 0, 1634 0, 1635 0, 1636 0, 1637 /*40*/ MatAXPY_MPIAIJ, 1638 MatGetSubMatrices_MPIAIJ, 1639 MatIncreaseOverlap_MPIAIJ, 1640 MatGetValues_MPIAIJ, 1641 MatCopy_MPIAIJ, 1642 /*45*/ MatPrintHelp_MPIAIJ, 1643 MatScale_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 /*50*/ MatSetBlockSize_MPIAIJ, 1648 0, 1649 0, 1650 0, 1651 0, 1652 /*55*/ MatFDColoringCreate_MPIAIJ, 1653 0, 1654 MatSetUnfactored_MPIAIJ, 1655 0, 1656 0, 1657 /*60*/ MatGetSubMatrix_MPIAIJ, 1658 MatDestroy_MPIAIJ, 1659 MatView_MPIAIJ, 1660 MatGetPetscMaps_Petsc, 1661 0, 1662 /*65*/ 0, 1663 0, 1664 0, 1665 0, 1666 0, 1667 /*70*/ 0, 1668 0, 1669 MatSetColoring_MPIAIJ, 1670 #if defined(PETSC_HAVE_ADIC) 1671 MatSetValuesAdic_MPIAIJ, 1672 #else 1673 0, 1674 #endif 1675 MatSetValuesAdifor_MPIAIJ, 1676 /*75*/ 0, 1677 0, 1678 0, 1679 0, 1680 0, 1681 /*80*/ 0, 1682 0, 1683 0, 1684 0, 1685 /*84*/ MatLoad_MPIAIJ, 1686 0, 1687 0, 1688 0, 1689 0, 1690 0, 1691 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1692 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1693 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1694 MatPtAP_Basic, 1695 MatPtAPSymbolic_MPIAIJ, 1696 /*95*/ MatPtAPNumeric_MPIAIJ, 1697 0, 1698 0, 1699 0, 1700 0, 1701 /*100*/0, 1702 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1703 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1704 MatConjugate_MPIAIJ 1705 }; 1706 1707 /* ----------------------------------------------------------------------------------------*/ 1708 1709 EXTERN_C_BEGIN 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1712 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1727 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1728 { 1729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1734 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 EXTERN_C_END 1738 1739 #include "petscpc.h" 1740 EXTERN_C_BEGIN 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1743 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1744 { 1745 Mat_MPIAIJ *b; 1746 PetscErrorCode ierr; 1747 PetscInt i; 1748 1749 PetscFunctionBegin; 1750 B->preallocated = PETSC_TRUE; 1751 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1752 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1753 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1754 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1755 if (d_nnz) { 1756 for (i=0; i<B->m; i++) { 1757 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1758 } 1759 } 1760 if (o_nnz) { 1761 for (i=0; i<B->m; i++) { 1762 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1763 } 1764 } 1765 b = (Mat_MPIAIJ*)B->data; 1766 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1767 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1768 1769 PetscFunctionReturn(0); 1770 } 1771 EXTERN_C_END 1772 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1775 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1776 { 1777 Mat mat; 1778 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1779 PetscErrorCode ierr; 1780 1781 PetscFunctionBegin; 1782 *newmat = 0; 1783 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1784 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1785 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1786 a = (Mat_MPIAIJ*)mat->data; 1787 1788 mat->factor = matin->factor; 1789 mat->bs = matin->bs; 1790 mat->assembled = PETSC_TRUE; 1791 mat->insertmode = NOT_SET_VALUES; 1792 mat->preallocated = PETSC_TRUE; 1793 1794 a->rstart = oldmat->rstart; 1795 a->rend = oldmat->rend; 1796 a->cstart = oldmat->cstart; 1797 a->cend = oldmat->cend; 1798 a->size = oldmat->size; 1799 a->rank = oldmat->rank; 1800 a->donotstash = oldmat->donotstash; 1801 a->roworiented = oldmat->roworiented; 1802 a->rowindices = 0; 1803 a->rowvalues = 0; 1804 a->getrowactive = PETSC_FALSE; 1805 1806 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1807 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1808 if (oldmat->colmap) { 1809 #if defined (PETSC_USE_CTABLE) 1810 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1811 #else 1812 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1813 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1814 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1815 #endif 1816 } else a->colmap = 0; 1817 if (oldmat->garray) { 1818 PetscInt len; 1819 len = oldmat->B->n; 1820 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1821 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1822 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1823 } else a->garray = 0; 1824 1825 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1826 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1827 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1828 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1829 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1830 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1831 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1832 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1833 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1834 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1835 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1836 *newmat = mat; 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #include "petscsys.h" 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatLoad_MPIAIJ" 1844 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1845 { 1846 Mat A; 1847 PetscScalar *vals,*svals; 1848 MPI_Comm comm = ((PetscObject)viewer)->comm; 1849 MPI_Status status; 1850 PetscErrorCode ierr; 1851 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1852 PetscInt i,nz,j,rstart,rend,mmax; 1853 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1854 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1855 PetscInt cend,cstart,n,*rowners; 1856 int fd; 1857 1858 PetscFunctionBegin; 1859 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1860 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1861 if (!rank) { 1862 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1863 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1864 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1865 } 1866 1867 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1868 M = header[1]; N = header[2]; 1869 /* determine ownership of all rows */ 1870 m = M/size + ((M % size) > rank); 1871 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1872 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1873 1874 /* First process needs enough room for process with most rows */ 1875 if (!rank) { 1876 mmax = rowners[1]; 1877 for (i=2; i<size; i++) { 1878 mmax = PetscMax(mmax,rowners[i]); 1879 } 1880 } else mmax = m; 1881 1882 rowners[0] = 0; 1883 for (i=2; i<=size; i++) { 1884 mmax = PetscMax(mmax,rowners[i]); 1885 rowners[i] += rowners[i-1]; 1886 } 1887 rstart = rowners[rank]; 1888 rend = rowners[rank+1]; 1889 1890 /* distribute row lengths to all processors */ 1891 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1892 if (!rank) { 1893 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1894 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1895 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1896 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1897 for (j=0; j<m; j++) { 1898 procsnz[0] += ourlens[j]; 1899 } 1900 for (i=1; i<size; i++) { 1901 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1902 /* calculate the number of nonzeros on each processor */ 1903 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1904 procsnz[i] += rowlengths[j]; 1905 } 1906 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1907 } 1908 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1909 } else { 1910 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1911 } 1912 1913 if (!rank) { 1914 /* determine max buffer needed and allocate it */ 1915 maxnz = 0; 1916 for (i=0; i<size; i++) { 1917 maxnz = PetscMax(maxnz,procsnz[i]); 1918 } 1919 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1920 1921 /* read in my part of the matrix column indices */ 1922 nz = procsnz[0]; 1923 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1924 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1925 1926 /* read in every one elses and ship off */ 1927 for (i=1; i<size; i++) { 1928 nz = procsnz[i]; 1929 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1930 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1931 } 1932 ierr = PetscFree(cols);CHKERRQ(ierr); 1933 } else { 1934 /* determine buffer space needed for message */ 1935 nz = 0; 1936 for (i=0; i<m; i++) { 1937 nz += ourlens[i]; 1938 } 1939 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1940 1941 /* receive message of column indices*/ 1942 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1943 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1944 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1945 } 1946 1947 /* determine column ownership if matrix is not square */ 1948 if (N != M) { 1949 n = N/size + ((N % size) > rank); 1950 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1951 cstart = cend - n; 1952 } else { 1953 cstart = rstart; 1954 cend = rend; 1955 n = cend - cstart; 1956 } 1957 1958 /* loop over local rows, determining number of off diagonal entries */ 1959 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1960 jj = 0; 1961 for (i=0; i<m; i++) { 1962 for (j=0; j<ourlens[i]; j++) { 1963 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1964 jj++; 1965 } 1966 } 1967 1968 /* create our matrix */ 1969 for (i=0; i<m; i++) { 1970 ourlens[i] -= offlens[i]; 1971 } 1972 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1973 ierr = MatSetType(A,type);CHKERRQ(ierr); 1974 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1975 1976 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1977 for (i=0; i<m; i++) { 1978 ourlens[i] += offlens[i]; 1979 } 1980 1981 if (!rank) { 1982 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1983 1984 /* read in my part of the matrix numerical values */ 1985 nz = procsnz[0]; 1986 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1987 1988 /* insert into matrix */ 1989 jj = rstart; 1990 smycols = mycols; 1991 svals = vals; 1992 for (i=0; i<m; i++) { 1993 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1994 smycols += ourlens[i]; 1995 svals += ourlens[i]; 1996 jj++; 1997 } 1998 1999 /* read in other processors and ship out */ 2000 for (i=1; i<size; i++) { 2001 nz = procsnz[i]; 2002 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2003 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2004 } 2005 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2006 } else { 2007 /* receive numeric values */ 2008 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2009 2010 /* receive message of values*/ 2011 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2012 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2013 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2014 2015 /* insert into matrix */ 2016 jj = rstart; 2017 smycols = mycols; 2018 svals = vals; 2019 for (i=0; i<m; i++) { 2020 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2021 smycols += ourlens[i]; 2022 svals += ourlens[i]; 2023 jj++; 2024 } 2025 } 2026 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2027 ierr = PetscFree(vals);CHKERRQ(ierr); 2028 ierr = PetscFree(mycols);CHKERRQ(ierr); 2029 ierr = PetscFree(rowners);CHKERRQ(ierr); 2030 2031 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 *newmat = A; 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2039 /* 2040 Not great since it makes two copies of the submatrix, first an SeqAIJ 2041 in local and then by concatenating the local matrices the end result. 2042 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2043 */ 2044 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2045 { 2046 PetscErrorCode ierr; 2047 PetscMPIInt rank,size; 2048 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2049 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2050 Mat *local,M,Mreuse; 2051 PetscScalar *vwork,*aa; 2052 MPI_Comm comm = mat->comm; 2053 Mat_SeqAIJ *aij; 2054 2055 2056 PetscFunctionBegin; 2057 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2058 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2059 2060 if (call == MAT_REUSE_MATRIX) { 2061 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2062 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2063 local = &Mreuse; 2064 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2065 } else { 2066 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2067 Mreuse = *local; 2068 ierr = PetscFree(local);CHKERRQ(ierr); 2069 } 2070 2071 /* 2072 m - number of local rows 2073 n - number of columns (same on all processors) 2074 rstart - first row in new global matrix generated 2075 */ 2076 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2077 if (call == MAT_INITIAL_MATRIX) { 2078 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2079 ii = aij->i; 2080 jj = aij->j; 2081 2082 /* 2083 Determine the number of non-zeros in the diagonal and off-diagonal 2084 portions of the matrix in order to do correct preallocation 2085 */ 2086 2087 /* first get start and end of "diagonal" columns */ 2088 if (csize == PETSC_DECIDE) { 2089 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2090 if (mglobal == n) { /* square matrix */ 2091 nlocal = m; 2092 } else { 2093 nlocal = n/size + ((n % size) > rank); 2094 } 2095 } else { 2096 nlocal = csize; 2097 } 2098 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2099 rstart = rend - nlocal; 2100 if (rank == size - 1 && rend != n) { 2101 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2102 } 2103 2104 /* next, compute all the lengths */ 2105 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2106 olens = dlens + m; 2107 for (i=0; i<m; i++) { 2108 jend = ii[i+1] - ii[i]; 2109 olen = 0; 2110 dlen = 0; 2111 for (j=0; j<jend; j++) { 2112 if (*jj < rstart || *jj >= rend) olen++; 2113 else dlen++; 2114 jj++; 2115 } 2116 olens[i] = olen; 2117 dlens[i] = dlen; 2118 } 2119 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2120 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2121 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2122 ierr = PetscFree(dlens);CHKERRQ(ierr); 2123 } else { 2124 PetscInt ml,nl; 2125 2126 M = *newmat; 2127 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2128 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2129 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2130 /* 2131 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2132 rather than the slower MatSetValues(). 2133 */ 2134 M->was_assembled = PETSC_TRUE; 2135 M->assembled = PETSC_FALSE; 2136 } 2137 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2138 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2139 ii = aij->i; 2140 jj = aij->j; 2141 aa = aij->a; 2142 for (i=0; i<m; i++) { 2143 row = rstart + i; 2144 nz = ii[i+1] - ii[i]; 2145 cwork = jj; jj += nz; 2146 vwork = aa; aa += nz; 2147 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2148 } 2149 2150 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 *newmat = M; 2153 2154 /* save submatrix used in processor for next request */ 2155 if (call == MAT_INITIAL_MATRIX) { 2156 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2157 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2158 } 2159 2160 PetscFunctionReturn(0); 2161 } 2162 2163 EXTERN_C_BEGIN 2164 #undef __FUNCT__ 2165 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2166 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2167 { 2168 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2169 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2170 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2171 const PetscInt *JJ; 2172 PetscScalar *values; 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 #if defined(PETSC_OPT_g) 2177 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2178 #endif 2179 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2180 o_nnz = d_nnz + m; 2181 2182 for (i=0; i<m; i++) { 2183 nnz = I[i+1]- I[i]; 2184 JJ = J + I[i]; 2185 nnz_max = PetscMax(nnz_max,nnz); 2186 #if defined(PETSC_OPT_g) 2187 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2188 #endif 2189 for (j=0; j<nnz; j++) { 2190 if (*JJ >= cstart) break; 2191 JJ++; 2192 } 2193 d = 0; 2194 for (; j<nnz; j++) { 2195 if (*JJ++ >= cend) break; 2196 d++; 2197 } 2198 d_nnz[i] = d; 2199 o_nnz[i] = nnz - d; 2200 } 2201 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2202 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2203 2204 if (v) values = (PetscScalar*)v; 2205 else { 2206 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2207 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2208 } 2209 2210 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2211 for (i=0; i<m; i++) { 2212 ii = i + rstart; 2213 nnz = I[i+1]- I[i]; 2214 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2215 } 2216 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2217 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2218 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2219 2220 if (!v) { 2221 ierr = PetscFree(values);CHKERRQ(ierr); 2222 } 2223 PetscFunctionReturn(0); 2224 } 2225 EXTERN_C_END 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2229 /*@C 2230 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2231 (the default parallel PETSc format). 2232 2233 Collective on MPI_Comm 2234 2235 Input Parameters: 2236 + A - the matrix 2237 . i - the indices into j for the start of each local row (starts with zero) 2238 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2239 - v - optional values in the matrix 2240 2241 Level: developer 2242 2243 .keywords: matrix, aij, compressed row, sparse, parallel 2244 2245 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2246 @*/ 2247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2248 { 2249 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2250 2251 PetscFunctionBegin; 2252 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2253 if (f) { 2254 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2261 /*@C 2262 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2263 (the default parallel PETSc format). For good matrix assembly performance 2264 the user should preallocate the matrix storage by setting the parameters 2265 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2266 performance can be increased by more than a factor of 50. 2267 2268 Collective on MPI_Comm 2269 2270 Input Parameters: 2271 + A - the matrix 2272 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2273 (same value is used for all local rows) 2274 . d_nnz - array containing the number of nonzeros in the various rows of the 2275 DIAGONAL portion of the local submatrix (possibly different for each row) 2276 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2277 The size of this array is equal to the number of local rows, i.e 'm'. 2278 You must leave room for the diagonal entry even if it is zero. 2279 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2280 submatrix (same value is used for all local rows). 2281 - o_nnz - array containing the number of nonzeros in the various rows of the 2282 OFF-DIAGONAL portion of the local submatrix (possibly different for 2283 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2284 structure. The size of this array is equal to the number 2285 of local rows, i.e 'm'. 2286 2287 If the *_nnz parameter is given then the *_nz parameter is ignored 2288 2289 The AIJ format (also called the Yale sparse matrix format or 2290 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2291 storage. The stored row and column indices begin with zero. See the users manual for details. 2292 2293 The parallel matrix is partitioned such that the first m0 rows belong to 2294 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2295 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2296 2297 The DIAGONAL portion of the local submatrix of a processor can be defined 2298 as the submatrix which is obtained by extraction the part corresponding 2299 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2300 first row that belongs to the processor, and r2 is the last row belonging 2301 to the this processor. This is a square mxm matrix. The remaining portion 2302 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2303 2304 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2305 2306 Example usage: 2307 2308 Consider the following 8x8 matrix with 34 non-zero values, that is 2309 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2310 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2311 as follows: 2312 2313 .vb 2314 1 2 0 | 0 3 0 | 0 4 2315 Proc0 0 5 6 | 7 0 0 | 8 0 2316 9 0 10 | 11 0 0 | 12 0 2317 ------------------------------------- 2318 13 0 14 | 15 16 17 | 0 0 2319 Proc1 0 18 0 | 19 20 21 | 0 0 2320 0 0 0 | 22 23 0 | 24 0 2321 ------------------------------------- 2322 Proc2 25 26 27 | 0 0 28 | 29 0 2323 30 0 0 | 31 32 33 | 0 34 2324 .ve 2325 2326 This can be represented as a collection of submatrices as: 2327 2328 .vb 2329 A B C 2330 D E F 2331 G H I 2332 .ve 2333 2334 Where the submatrices A,B,C are owned by proc0, D,E,F are 2335 owned by proc1, G,H,I are owned by proc2. 2336 2337 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2338 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2339 The 'M','N' parameters are 8,8, and have the same values on all procs. 2340 2341 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2342 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2343 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2344 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2345 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2346 matrix, ans [DF] as another SeqAIJ matrix. 2347 2348 When d_nz, o_nz parameters are specified, d_nz storage elements are 2349 allocated for every row of the local diagonal submatrix, and o_nz 2350 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2351 One way to choose d_nz and o_nz is to use the max nonzerors per local 2352 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2353 In this case, the values of d_nz,o_nz are: 2354 .vb 2355 proc0 : dnz = 2, o_nz = 2 2356 proc1 : dnz = 3, o_nz = 2 2357 proc2 : dnz = 1, o_nz = 4 2358 .ve 2359 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2360 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2361 for proc3. i.e we are using 12+15+10=37 storage locations to store 2362 34 values. 2363 2364 When d_nnz, o_nnz parameters are specified, the storage is specified 2365 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2366 In the above case the values for d_nnz,o_nnz are: 2367 .vb 2368 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2369 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2370 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2371 .ve 2372 Here the space allocated is sum of all the above values i.e 34, and 2373 hence pre-allocation is perfect. 2374 2375 Level: intermediate 2376 2377 .keywords: matrix, aij, compressed row, sparse, parallel 2378 2379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2380 MPIAIJ 2381 @*/ 2382 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2383 { 2384 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2385 2386 PetscFunctionBegin; 2387 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2388 if (f) { 2389 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatCreateMPIAIJ" 2396 /*@C 2397 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2398 (the default parallel PETSc format). For good matrix assembly performance 2399 the user should preallocate the matrix storage by setting the parameters 2400 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2401 performance can be increased by more than a factor of 50. 2402 2403 Collective on MPI_Comm 2404 2405 Input Parameters: 2406 + comm - MPI communicator 2407 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2408 This value should be the same as the local size used in creating the 2409 y vector for the matrix-vector product y = Ax. 2410 . n - This value should be the same as the local size used in creating the 2411 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2412 calculated if N is given) For square matrices n is almost always m. 2413 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2414 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2415 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2416 (same value is used for all local rows) 2417 . d_nnz - array containing the number of nonzeros in the various rows of the 2418 DIAGONAL portion of the local submatrix (possibly different for each row) 2419 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2420 The size of this array is equal to the number of local rows, i.e 'm'. 2421 You must leave room for the diagonal entry even if it is zero. 2422 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2423 submatrix (same value is used for all local rows). 2424 - o_nnz - array containing the number of nonzeros in the various rows of the 2425 OFF-DIAGONAL portion of the local submatrix (possibly different for 2426 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2427 structure. The size of this array is equal to the number 2428 of local rows, i.e 'm'. 2429 2430 Output Parameter: 2431 . A - the matrix 2432 2433 Notes: 2434 If the *_nnz parameter is given then the *_nz parameter is ignored 2435 2436 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2437 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2438 storage requirements for this matrix. 2439 2440 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2441 processor than it must be used on all processors that share the object for 2442 that argument. 2443 2444 The user MUST specify either the local or global matrix dimensions 2445 (possibly both). 2446 2447 The parallel matrix is partitioned such that the first m0 rows belong to 2448 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2449 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2450 2451 The DIAGONAL portion of the local submatrix of a processor can be defined 2452 as the submatrix which is obtained by extraction the part corresponding 2453 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2454 first row that belongs to the processor, and r2 is the last row belonging 2455 to the this processor. This is a square mxm matrix. The remaining portion 2456 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2457 2458 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2459 2460 When calling this routine with a single process communicator, a matrix of 2461 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2462 type of communicator, use the construction mechanism: 2463 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2464 2465 By default, this format uses inodes (identical nodes) when possible. 2466 We search for consecutive rows with the same nonzero structure, thereby 2467 reusing matrix information to achieve increased efficiency. 2468 2469 Options Database Keys: 2470 + -mat_no_inode - Do not use inodes 2471 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2472 - -mat_aij_oneindex - Internally use indexing starting at 1 2473 rather than 0. Note that when calling MatSetValues(), 2474 the user still MUST index entries starting at 0! 2475 2476 2477 Example usage: 2478 2479 Consider the following 8x8 matrix with 34 non-zero values, that is 2480 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2481 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2482 as follows: 2483 2484 .vb 2485 1 2 0 | 0 3 0 | 0 4 2486 Proc0 0 5 6 | 7 0 0 | 8 0 2487 9 0 10 | 11 0 0 | 12 0 2488 ------------------------------------- 2489 13 0 14 | 15 16 17 | 0 0 2490 Proc1 0 18 0 | 19 20 21 | 0 0 2491 0 0 0 | 22 23 0 | 24 0 2492 ------------------------------------- 2493 Proc2 25 26 27 | 0 0 28 | 29 0 2494 30 0 0 | 31 32 33 | 0 34 2495 .ve 2496 2497 This can be represented as a collection of submatrices as: 2498 2499 .vb 2500 A B C 2501 D E F 2502 G H I 2503 .ve 2504 2505 Where the submatrices A,B,C are owned by proc0, D,E,F are 2506 owned by proc1, G,H,I are owned by proc2. 2507 2508 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2509 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2510 The 'M','N' parameters are 8,8, and have the same values on all procs. 2511 2512 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2513 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2514 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2515 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2516 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2517 matrix, ans [DF] as another SeqAIJ matrix. 2518 2519 When d_nz, o_nz parameters are specified, d_nz storage elements are 2520 allocated for every row of the local diagonal submatrix, and o_nz 2521 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2522 One way to choose d_nz and o_nz is to use the max nonzerors per local 2523 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2524 In this case, the values of d_nz,o_nz are: 2525 .vb 2526 proc0 : dnz = 2, o_nz = 2 2527 proc1 : dnz = 3, o_nz = 2 2528 proc2 : dnz = 1, o_nz = 4 2529 .ve 2530 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2531 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2532 for proc3. i.e we are using 12+15+10=37 storage locations to store 2533 34 values. 2534 2535 When d_nnz, o_nnz parameters are specified, the storage is specified 2536 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2537 In the above case the values for d_nnz,o_nnz are: 2538 .vb 2539 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2540 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2541 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2542 .ve 2543 Here the space allocated is sum of all the above values i.e 34, and 2544 hence pre-allocation is perfect. 2545 2546 Level: intermediate 2547 2548 .keywords: matrix, aij, compressed row, sparse, parallel 2549 2550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2551 MPIAIJ 2552 @*/ 2553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2554 { 2555 PetscErrorCode ierr; 2556 PetscMPIInt size; 2557 2558 PetscFunctionBegin; 2559 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2560 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2561 if (size > 1) { 2562 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2563 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2564 } else { 2565 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2566 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2567 } 2568 PetscFunctionReturn(0); 2569 } 2570 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2573 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2574 { 2575 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2576 2577 PetscFunctionBegin; 2578 *Ad = a->A; 2579 *Ao = a->B; 2580 *colmap = a->garray; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2586 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2587 { 2588 PetscErrorCode ierr; 2589 PetscInt i; 2590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2591 2592 PetscFunctionBegin; 2593 if (coloring->ctype == IS_COLORING_LOCAL) { 2594 ISColoringValue *allcolors,*colors; 2595 ISColoring ocoloring; 2596 2597 /* set coloring for diagonal portion */ 2598 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2599 2600 /* set coloring for off-diagonal portion */ 2601 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2602 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2603 for (i=0; i<a->B->n; i++) { 2604 colors[i] = allcolors[a->garray[i]]; 2605 } 2606 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2607 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2608 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2609 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2610 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2611 ISColoringValue *colors; 2612 PetscInt *larray; 2613 ISColoring ocoloring; 2614 2615 /* set coloring for diagonal portion */ 2616 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2617 for (i=0; i<a->A->n; i++) { 2618 larray[i] = i + a->cstart; 2619 } 2620 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2621 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2622 for (i=0; i<a->A->n; i++) { 2623 colors[i] = coloring->colors[larray[i]]; 2624 } 2625 ierr = PetscFree(larray);CHKERRQ(ierr); 2626 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2627 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2628 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2629 2630 /* set coloring for off-diagonal portion */ 2631 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2632 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2633 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2634 for (i=0; i<a->B->n; i++) { 2635 colors[i] = coloring->colors[larray[i]]; 2636 } 2637 ierr = PetscFree(larray);CHKERRQ(ierr); 2638 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2639 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2640 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2641 } else { 2642 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2643 } 2644 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #if defined(PETSC_HAVE_ADIC) 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2651 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2652 { 2653 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2658 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 #endif 2662 2663 #undef __FUNCT__ 2664 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2665 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2666 { 2667 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2668 PetscErrorCode ierr; 2669 2670 PetscFunctionBegin; 2671 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2672 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatMerge" 2678 /*@C 2679 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2680 matrices from each processor 2681 2682 Collective on MPI_Comm 2683 2684 Input Parameters: 2685 + comm - the communicators the parallel matrix will live on 2686 . inmat - the input sequential matrices 2687 . n - number of local columns (or PETSC_DECIDE) 2688 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2689 2690 Output Parameter: 2691 . outmat - the parallel matrix generated 2692 2693 Level: advanced 2694 2695 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2696 2697 @*/ 2698 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2699 { 2700 PetscErrorCode ierr; 2701 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2702 PetscInt *indx; 2703 PetscScalar *values; 2704 PetscMap columnmap,rowmap; 2705 2706 PetscFunctionBegin; 2707 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2708 /* 2709 PetscMPIInt rank; 2710 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2711 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2712 */ 2713 if (scall == MAT_INITIAL_MATRIX){ 2714 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2715 if (n == PETSC_DECIDE){ 2716 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2717 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2718 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2719 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2720 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2721 } 2722 2723 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2724 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2725 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2726 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2727 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2728 2729 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2730 for (i=0;i<m;i++) { 2731 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2732 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2733 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2734 } 2735 /* This routine will ONLY return MPIAIJ type matrix */ 2736 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2737 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2738 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2739 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2740 2741 } else if (scall == MAT_REUSE_MATRIX){ 2742 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2743 } else { 2744 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2745 } 2746 2747 for (i=0;i<m;i++) { 2748 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2749 I = i + rstart; 2750 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2751 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2752 } 2753 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2754 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatFileSplit" 2762 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2763 { 2764 PetscErrorCode ierr; 2765 PetscMPIInt rank; 2766 PetscInt m,N,i,rstart,nnz; 2767 size_t len; 2768 const PetscInt *indx; 2769 PetscViewer out; 2770 char *name; 2771 Mat B; 2772 const PetscScalar *values; 2773 2774 PetscFunctionBegin; 2775 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2776 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2777 /* Should this be the type of the diagonal block of A? */ 2778 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2779 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2781 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2782 for (i=0;i<m;i++) { 2783 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2784 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2785 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2786 } 2787 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 2790 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2791 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2792 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2793 sprintf(name,"%s.%d",outfile,rank); 2794 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2795 ierr = PetscFree(name); 2796 ierr = MatView(B,out);CHKERRQ(ierr); 2797 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2798 ierr = MatDestroy(B);CHKERRQ(ierr); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2805 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2806 { 2807 PetscErrorCode ierr; 2808 Mat_Merge_SeqsToMPI *merge; 2809 PetscObjectContainer container; 2810 2811 PetscFunctionBegin; 2812 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2813 if (container) { 2814 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2815 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2816 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2817 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2818 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2819 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2820 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2821 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2822 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2823 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2824 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2825 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2826 2827 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2828 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2829 } 2830 ierr = PetscFree(merge);CHKERRQ(ierr); 2831 2832 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 #include "src/mat/utils/freespace.h" 2837 #include "petscbt.h" 2838 static PetscEvent logkey_seqstompinum = 0; 2839 #undef __FUNCT__ 2840 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2841 /*@C 2842 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2843 matrices from each processor 2844 2845 Collective on MPI_Comm 2846 2847 Input Parameters: 2848 + comm - the communicators the parallel matrix will live on 2849 . seqmat - the input sequential matrices 2850 . m - number of local rows (or PETSC_DECIDE) 2851 . n - number of local columns (or PETSC_DECIDE) 2852 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2853 2854 Output Parameter: 2855 . mpimat - the parallel matrix generated 2856 2857 Level: advanced 2858 2859 Notes: 2860 The dimensions of the sequential matrix in each processor MUST be the same. 2861 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2862 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2863 @*/ 2864 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2865 { 2866 PetscErrorCode ierr; 2867 MPI_Comm comm=mpimat->comm; 2868 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2869 PetscMPIInt size,rank,taga,*len_s; 2870 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2871 PetscInt proc,m; 2872 PetscInt **buf_ri,**buf_rj; 2873 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2874 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2875 MPI_Request *s_waits,*r_waits; 2876 MPI_Status *status; 2877 MatScalar *aa=a->a,**abuf_r,*ba_i; 2878 Mat_Merge_SeqsToMPI *merge; 2879 PetscObjectContainer container; 2880 2881 PetscFunctionBegin; 2882 if (!logkey_seqstompinum) { 2883 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2884 } 2885 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2886 2887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2889 2890 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2891 if (container) { 2892 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2893 } 2894 bi = merge->bi; 2895 bj = merge->bj; 2896 buf_ri = merge->buf_ri; 2897 buf_rj = merge->buf_rj; 2898 2899 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2900 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2901 len_s = merge->len_s; 2902 2903 /* send and recv matrix values */ 2904 /*-----------------------------*/ 2905 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2906 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2907 2908 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2909 for (proc=0,k=0; proc<size; proc++){ 2910 if (!len_s[proc]) continue; 2911 i = owners[proc]; 2912 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2913 k++; 2914 } 2915 2916 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2917 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2918 ierr = PetscFree(status);CHKERRQ(ierr); 2919 2920 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2921 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2922 2923 /* insert mat values of mpimat */ 2924 /*----------------------------*/ 2925 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2926 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2927 nextrow = buf_ri_k + merge->nrecv; 2928 nextai = nextrow + merge->nrecv; 2929 2930 for (k=0; k<merge->nrecv; k++){ 2931 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2932 nrows = *(buf_ri_k[k]); 2933 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2934 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2935 } 2936 2937 /* set values of ba */ 2938 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2939 for (i=0; i<m; i++) { 2940 arow = owners[rank] + i; 2941 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2942 bnzi = bi[i+1] - bi[i]; 2943 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2944 2945 /* add local non-zero vals of this proc's seqmat into ba */ 2946 anzi = ai[arow+1] - ai[arow]; 2947 aj = a->j + ai[arow]; 2948 aa = a->a + ai[arow]; 2949 nextaj = 0; 2950 for (j=0; nextaj<anzi; j++){ 2951 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2952 ba_i[j] += aa[nextaj++]; 2953 } 2954 } 2955 2956 /* add received vals into ba */ 2957 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2958 /* i-th row */ 2959 if (i == *nextrow[k]) { 2960 anzi = *(nextai[k]+1) - *nextai[k]; 2961 aj = buf_rj[k] + *(nextai[k]); 2962 aa = abuf_r[k] + *(nextai[k]); 2963 nextaj = 0; 2964 for (j=0; nextaj<anzi; j++){ 2965 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2966 ba_i[j] += aa[nextaj++]; 2967 } 2968 } 2969 nextrow[k]++; nextai[k]++; 2970 } 2971 } 2972 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2973 } 2974 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2975 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 2977 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2978 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2979 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2980 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 static PetscEvent logkey_seqstompisym = 0; 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 2987 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2988 { 2989 PetscErrorCode ierr; 2990 Mat B_mpi; 2991 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2992 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 2993 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 2994 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 2995 PetscInt len,proc,*dnz,*onz; 2996 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 2997 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 2998 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 2999 MPI_Status *status; 3000 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3001 PetscBT lnkbt; 3002 Mat_Merge_SeqsToMPI *merge; 3003 PetscObjectContainer container; 3004 3005 PetscFunctionBegin; 3006 if (!logkey_seqstompisym) { 3007 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3008 } 3009 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3010 3011 /* make sure it is a PETSc comm */ 3012 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3013 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3014 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3015 3016 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3017 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3018 3019 /* determine row ownership */ 3020 /*---------------------------------------------------------*/ 3021 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3022 if (m == PETSC_DECIDE) { 3023 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3024 } else { 3025 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3026 } 3027 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3028 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3029 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3030 3031 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3032 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3033 3034 /* determine the number of messages to send, their lengths */ 3035 /*---------------------------------------------------------*/ 3036 len_s = merge->len_s; 3037 3038 len = 0; /* length of buf_si[] */ 3039 merge->nsend = 0; 3040 for (proc=0; proc<size; proc++){ 3041 len_si[proc] = 0; 3042 if (proc == rank){ 3043 len_s[proc] = 0; 3044 } else { 3045 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3046 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3047 } 3048 if (len_s[proc]) { 3049 merge->nsend++; 3050 nrows = 0; 3051 for (i=owners[proc]; i<owners[proc+1]; i++){ 3052 if (ai[i+1] > ai[i]) nrows++; 3053 } 3054 len_si[proc] = 2*(nrows+1); 3055 len += len_si[proc]; 3056 } 3057 } 3058 3059 /* determine the number and length of messages to receive for ij-structure */ 3060 /*-------------------------------------------------------------------------*/ 3061 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3062 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3063 3064 /* post the Irecv of j-structure */ 3065 /*-------------------------------*/ 3066 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3067 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3068 3069 /* post the Isend of j-structure */ 3070 /*--------------------------------*/ 3071 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3072 sj_waits = si_waits + merge->nsend; 3073 3074 for (proc=0, k=0; proc<size; proc++){ 3075 if (!len_s[proc]) continue; 3076 i = owners[proc]; 3077 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3078 k++; 3079 } 3080 3081 /* receives and sends of j-structure are complete */ 3082 /*------------------------------------------------*/ 3083 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3084 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3085 3086 /* send and recv i-structure */ 3087 /*---------------------------*/ 3088 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3089 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3090 3091 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3092 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3093 for (proc=0,k=0; proc<size; proc++){ 3094 if (!len_s[proc]) continue; 3095 /* form outgoing message for i-structure: 3096 buf_si[0]: nrows to be sent 3097 [1:nrows]: row index (global) 3098 [nrows+1:2*nrows+1]: i-structure index 3099 */ 3100 /*-------------------------------------------*/ 3101 nrows = len_si[proc]/2 - 1; 3102 buf_si_i = buf_si + nrows+1; 3103 buf_si[0] = nrows; 3104 buf_si_i[0] = 0; 3105 nrows = 0; 3106 for (i=owners[proc]; i<owners[proc+1]; i++){ 3107 anzi = ai[i+1] - ai[i]; 3108 if (anzi) { 3109 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3110 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3111 nrows++; 3112 } 3113 } 3114 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3115 k++; 3116 buf_si += len_si[proc]; 3117 } 3118 3119 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3120 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3121 3122 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3123 for (i=0; i<merge->nrecv; i++){ 3124 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3125 } 3126 3127 ierr = PetscFree(len_si);CHKERRQ(ierr); 3128 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3129 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3130 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3131 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3132 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3133 ierr = PetscFree(status);CHKERRQ(ierr); 3134 3135 /* compute a local seq matrix in each processor */ 3136 /*----------------------------------------------*/ 3137 /* allocate bi array and free space for accumulating nonzero column info */ 3138 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3139 bi[0] = 0; 3140 3141 /* create and initialize a linked list */ 3142 nlnk = N+1; 3143 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3144 3145 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3146 len = 0; 3147 len = ai[owners[rank+1]] - ai[owners[rank]]; 3148 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3149 current_space = free_space; 3150 3151 /* determine symbolic info for each local row */ 3152 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3153 nextrow = buf_ri_k + merge->nrecv; 3154 nextai = nextrow + merge->nrecv; 3155 for (k=0; k<merge->nrecv; k++){ 3156 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3157 nrows = *buf_ri_k[k]; 3158 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3159 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3160 } 3161 3162 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3163 len = 0; 3164 for (i=0;i<m;i++) { 3165 bnzi = 0; 3166 /* add local non-zero cols of this proc's seqmat into lnk */ 3167 arow = owners[rank] + i; 3168 anzi = ai[arow+1] - ai[arow]; 3169 aj = a->j + ai[arow]; 3170 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3171 bnzi += nlnk; 3172 /* add received col data into lnk */ 3173 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3174 if (i == *nextrow[k]) { /* i-th row */ 3175 anzi = *(nextai[k]+1) - *nextai[k]; 3176 aj = buf_rj[k] + *nextai[k]; 3177 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3178 bnzi += nlnk; 3179 nextrow[k]++; nextai[k]++; 3180 } 3181 } 3182 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3183 3184 /* if free space is not available, make more free space */ 3185 if (current_space->local_remaining<bnzi) { 3186 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3187 nspacedouble++; 3188 } 3189 /* copy data into free space, then initialize lnk */ 3190 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3191 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3192 3193 current_space->array += bnzi; 3194 current_space->local_used += bnzi; 3195 current_space->local_remaining -= bnzi; 3196 3197 bi[i+1] = bi[i] + bnzi; 3198 } 3199 3200 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3201 3202 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3203 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3204 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3205 3206 /* create symbolic parallel matrix B_mpi */ 3207 /*---------------------------------------*/ 3208 if (n==PETSC_DECIDE) { 3209 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3210 } else { 3211 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3212 } 3213 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3214 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3215 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3216 3217 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3218 B_mpi->assembled = PETSC_FALSE; 3219 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3220 merge->bi = bi; 3221 merge->bj = bj; 3222 merge->buf_ri = buf_ri; 3223 merge->buf_rj = buf_rj; 3224 merge->coi = PETSC_NULL; 3225 merge->coj = PETSC_NULL; 3226 merge->owners_co = PETSC_NULL; 3227 3228 /* attach the supporting struct to B_mpi for reuse */ 3229 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3230 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3231 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3232 *mpimat = B_mpi; 3233 3234 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3235 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3236 PetscFunctionReturn(0); 3237 } 3238 3239 static PetscEvent logkey_seqstompi = 0; 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatMerge_SeqsToMPI" 3242 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3243 { 3244 PetscErrorCode ierr; 3245 3246 PetscFunctionBegin; 3247 if (!logkey_seqstompi) { 3248 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3249 } 3250 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3251 if (scall == MAT_INITIAL_MATRIX){ 3252 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3253 } 3254 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3255 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3256 PetscFunctionReturn(0); 3257 } 3258 static PetscEvent logkey_getlocalmat = 0; 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatGetLocalMat" 3261 /*@C 3262 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3263 3264 Not Collective 3265 3266 Input Parameters: 3267 + A - the matrix 3268 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3269 3270 Output Parameter: 3271 . A_loc - the local sequential matrix generated 3272 3273 Level: developer 3274 3275 @*/ 3276 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3277 { 3278 PetscErrorCode ierr; 3279 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3280 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3281 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3282 PetscScalar *aa=a->a,*ba=b->a,*ca; 3283 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3284 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3285 3286 PetscFunctionBegin; 3287 if (!logkey_getlocalmat) { 3288 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3289 } 3290 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3291 if (scall == MAT_INITIAL_MATRIX){ 3292 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3293 ci[0] = 0; 3294 for (i=0; i<am; i++){ 3295 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3296 } 3297 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3298 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3299 k = 0; 3300 for (i=0; i<am; i++) { 3301 ncols_o = bi[i+1] - bi[i]; 3302 ncols_d = ai[i+1] - ai[i]; 3303 /* off-diagonal portion of A */ 3304 for (jo=0; jo<ncols_o; jo++) { 3305 col = cmap[*bj]; 3306 if (col >= cstart) break; 3307 cj[k] = col; bj++; 3308 ca[k++] = *ba++; 3309 } 3310 /* diagonal portion of A */ 3311 for (j=0; j<ncols_d; j++) { 3312 cj[k] = cstart + *aj++; 3313 ca[k++] = *aa++; 3314 } 3315 /* off-diagonal portion of A */ 3316 for (j=jo; j<ncols_o; j++) { 3317 cj[k] = cmap[*bj++]; 3318 ca[k++] = *ba++; 3319 } 3320 } 3321 /* put together the new matrix */ 3322 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3323 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3324 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3325 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3326 mat->freedata = PETSC_TRUE; 3327 mat->nonew = 0; 3328 } else if (scall == MAT_REUSE_MATRIX){ 3329 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3330 ci = mat->i; cj = mat->j; ca = mat->a; 3331 for (i=0; i<am; i++) { 3332 /* off-diagonal portion of A */ 3333 ncols_o = bi[i+1] - bi[i]; 3334 for (jo=0; jo<ncols_o; jo++) { 3335 col = cmap[*bj]; 3336 if (col >= cstart) break; 3337 *ca++ = *ba++; bj++; 3338 } 3339 /* diagonal portion of A */ 3340 ncols_d = ai[i+1] - ai[i]; 3341 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3342 /* off-diagonal portion of A */ 3343 for (j=jo; j<ncols_o; j++) { 3344 *ca++ = *ba++; bj++; 3345 } 3346 } 3347 } else { 3348 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3349 } 3350 3351 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3352 PetscFunctionReturn(0); 3353 } 3354 3355 static PetscEvent logkey_getlocalmatcondensed = 0; 3356 #undef __FUNCT__ 3357 #define __FUNCT__ "MatGetLocalMatCondensed" 3358 /*@C 3359 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3360 3361 Not Collective 3362 3363 Input Parameters: 3364 + A - the matrix 3365 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3366 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3367 3368 Output Parameter: 3369 . A_loc - the local sequential matrix generated 3370 3371 Level: developer 3372 3373 @*/ 3374 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3375 { 3376 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3377 PetscErrorCode ierr; 3378 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3379 IS isrowa,iscola; 3380 Mat *aloc; 3381 3382 PetscFunctionBegin; 3383 if (!logkey_getlocalmatcondensed) { 3384 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3385 } 3386 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3387 if (!row){ 3388 start = a->rstart; end = a->rend; 3389 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3390 } else { 3391 isrowa = *row; 3392 } 3393 if (!col){ 3394 start = a->cstart; 3395 cmap = a->garray; 3396 nzA = a->A->n; 3397 nzB = a->B->n; 3398 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3399 ncols = 0; 3400 for (i=0; i<nzB; i++) { 3401 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3402 else break; 3403 } 3404 imark = i; 3405 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3406 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3407 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3408 ierr = PetscFree(idx);CHKERRQ(ierr); 3409 } else { 3410 iscola = *col; 3411 } 3412 if (scall != MAT_INITIAL_MATRIX){ 3413 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3414 aloc[0] = *A_loc; 3415 } 3416 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3417 *A_loc = aloc[0]; 3418 ierr = PetscFree(aloc);CHKERRQ(ierr); 3419 if (!row){ 3420 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3421 } 3422 if (!col){ 3423 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3424 } 3425 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } 3428 3429 static PetscEvent logkey_GetBrowsOfAcols = 0; 3430 #undef __FUNCT__ 3431 #define __FUNCT__ "MatGetBrowsOfAcols" 3432 /*@C 3433 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3434 3435 Collective on Mat 3436 3437 Input Parameters: 3438 + A,B - the matrices in mpiaij format 3439 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3440 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3441 3442 Output Parameter: 3443 + rowb, colb - index sets of rows and columns of B to extract 3444 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3445 - B_seq - the sequential matrix generated 3446 3447 Level: developer 3448 3449 @*/ 3450 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3451 { 3452 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3453 PetscErrorCode ierr; 3454 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3455 IS isrowb,iscolb; 3456 Mat *bseq; 3457 3458 PetscFunctionBegin; 3459 if (a->cstart != b->rstart || a->cend != b->rend){ 3460 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3461 } 3462 if (!logkey_GetBrowsOfAcols) { 3463 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3464 } 3465 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3466 3467 if (scall == MAT_INITIAL_MATRIX){ 3468 start = a->cstart; 3469 cmap = a->garray; 3470 nzA = a->A->n; 3471 nzB = a->B->n; 3472 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3473 ncols = 0; 3474 for (i=0; i<nzB; i++) { /* row < local row index */ 3475 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3476 else break; 3477 } 3478 imark = i; 3479 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3480 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3481 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3482 ierr = PetscFree(idx);CHKERRQ(ierr); 3483 *brstart = imark; 3484 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3485 } else { 3486 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3487 isrowb = *rowb; iscolb = *colb; 3488 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3489 bseq[0] = *B_seq; 3490 } 3491 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3492 *B_seq = bseq[0]; 3493 ierr = PetscFree(bseq);CHKERRQ(ierr); 3494 if (!rowb){ 3495 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3496 } else { 3497 *rowb = isrowb; 3498 } 3499 if (!colb){ 3500 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3501 } else { 3502 *colb = iscolb; 3503 } 3504 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 static PetscEvent logkey_GetBrowsOfAocols = 0; 3509 #undef __FUNCT__ 3510 #define __FUNCT__ "MatGetBrowsOfAoCols" 3511 /*@C 3512 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3513 of the OFF-DIAGONAL portion of local A 3514 3515 Collective on Mat 3516 3517 Input Parameters: 3518 + A,B - the matrices in mpiaij format 3519 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3520 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3521 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3522 3523 Output Parameter: 3524 + B_oth - the sequential matrix generated 3525 3526 Level: developer 3527 3528 @*/ 3529 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3530 { 3531 VecScatter_MPI_General *gen_to,*gen_from; 3532 PetscErrorCode ierr; 3533 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3534 Mat_SeqAIJ *b_oth; 3535 VecScatter ctx=a->Mvctx; 3536 MPI_Comm comm=ctx->comm; 3537 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3538 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3539 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3540 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3541 MPI_Request *rwaits,*swaits; 3542 MPI_Status *sstatus,rstatus; 3543 PetscInt *cols; 3544 PetscScalar *vals; 3545 PetscMPIInt j; 3546 3547 PetscFunctionBegin; 3548 if (a->cstart != b->rstart || a->cend != b->rend){ 3549 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3550 } 3551 if (!logkey_GetBrowsOfAocols) { 3552 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3553 } 3554 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3555 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3556 3557 gen_to = (VecScatter_MPI_General*)ctx->todata; 3558 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3559 rvalues = gen_from->values; /* holds the length of sending row */ 3560 svalues = gen_to->values; /* holds the length of receiving row */ 3561 nrecvs = gen_from->n; 3562 nsends = gen_to->n; 3563 rwaits = gen_from->requests; 3564 swaits = gen_to->requests; 3565 srow = gen_to->indices; /* local row index to be sent */ 3566 rstarts = gen_from->starts; 3567 sstarts = gen_to->starts; 3568 rprocs = gen_from->procs; 3569 sprocs = gen_to->procs; 3570 sstatus = gen_to->sstatus; 3571 3572 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3573 if (scall == MAT_INITIAL_MATRIX){ 3574 /* i-array */ 3575 /*---------*/ 3576 /* post receives */ 3577 for (i=0; i<nrecvs; i++){ 3578 rowlen = (PetscInt*)rvalues + rstarts[i]; 3579 nrows = rstarts[i+1]-rstarts[i]; 3580 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3581 } 3582 3583 /* pack the outgoing message */ 3584 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3585 rstartsj = sstartsj + nsends +1; 3586 sstartsj[0] = 0; rstartsj[0] = 0; 3587 len = 0; /* total length of j or a array to be sent */ 3588 k = 0; 3589 for (i=0; i<nsends; i++){ 3590 rowlen = (PetscInt*)svalues + sstarts[i]; 3591 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3592 for (j=0; j<nrows; j++) { 3593 row = srow[k] + b->rowners[rank]; /* global row idx */ 3594 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3595 len += rowlen[j]; 3596 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3597 k++; 3598 } 3599 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3600 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3601 } 3602 /* recvs and sends of i-array are completed */ 3603 i = nrecvs; 3604 while (i--) { 3605 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3606 } 3607 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3608 /* allocate buffers for sending j and a arrays */ 3609 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3610 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3611 3612 /* create i-array of B_oth */ 3613 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3614 b_othi[0] = 0; 3615 len = 0; /* total length of j or a array to be received */ 3616 k = 0; 3617 for (i=0; i<nrecvs; i++){ 3618 rowlen = (PetscInt*)rvalues + rstarts[i]; 3619 nrows = rstarts[i+1]-rstarts[i]; 3620 for (j=0; j<nrows; j++) { 3621 b_othi[k+1] = b_othi[k] + rowlen[j]; 3622 len += rowlen[j]; k++; 3623 } 3624 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3625 } 3626 3627 /* allocate space for j and a arrrays of B_oth */ 3628 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3629 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3630 3631 /* j-array */ 3632 /*---------*/ 3633 /* post receives of j-array */ 3634 for (i=0; i<nrecvs; i++){ 3635 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3636 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3637 } 3638 k = 0; 3639 for (i=0; i<nsends; i++){ 3640 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3641 bufJ = bufj+sstartsj[i]; 3642 for (j=0; j<nrows; j++) { 3643 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3644 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3645 for (l=0; l<ncols; l++){ 3646 *bufJ++ = cols[l]; 3647 } 3648 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3649 } 3650 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3651 } 3652 3653 /* recvs and sends of j-array are completed */ 3654 i = nrecvs; 3655 while (i--) { 3656 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3657 } 3658 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3659 } else if (scall == MAT_REUSE_MATRIX){ 3660 sstartsj = *startsj; 3661 rstartsj = sstartsj + nsends +1; 3662 bufa = *bufa_ptr; 3663 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3664 b_otha = b_oth->a; 3665 } else { 3666 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3667 } 3668 3669 /* a-array */ 3670 /*---------*/ 3671 /* post receives of a-array */ 3672 for (i=0; i<nrecvs; i++){ 3673 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3674 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3675 } 3676 k = 0; 3677 for (i=0; i<nsends; i++){ 3678 nrows = sstarts[i+1]-sstarts[i]; 3679 bufA = bufa+sstartsj[i]; 3680 for (j=0; j<nrows; j++) { 3681 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3682 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3683 for (l=0; l<ncols; l++){ 3684 *bufA++ = vals[l]; 3685 } 3686 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3687 3688 } 3689 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3690 } 3691 /* recvs and sends of a-array are completed */ 3692 i = nrecvs; 3693 while (i--) { 3694 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3695 } 3696 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3697 3698 if (scall == MAT_INITIAL_MATRIX){ 3699 /* put together the new matrix */ 3700 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3701 3702 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3703 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3704 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3705 b_oth->freedata = PETSC_TRUE; 3706 b_oth->nonew = 0; 3707 3708 ierr = PetscFree(bufj);CHKERRQ(ierr); 3709 if (!startsj || !bufa_ptr){ 3710 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3711 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3712 } else { 3713 *startsj = sstartsj; 3714 *bufa_ptr = bufa; 3715 } 3716 } 3717 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3718 3719 PetscFunctionReturn(0); 3720 } 3721 3722 /*MC 3723 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3724 3725 Options Database Keys: 3726 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3727 3728 Level: beginner 3729 3730 .seealso: MatCreateMPIAIJ 3731 M*/ 3732 3733 EXTERN_C_BEGIN 3734 #undef __FUNCT__ 3735 #define __FUNCT__ "MatCreate_MPIAIJ" 3736 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3737 { 3738 Mat_MPIAIJ *b; 3739 PetscErrorCode ierr; 3740 PetscInt i; 3741 PetscMPIInt size; 3742 3743 PetscFunctionBegin; 3744 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3745 3746 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3747 B->data = (void*)b; 3748 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3749 B->factor = 0; 3750 B->bs = 1; 3751 B->assembled = PETSC_FALSE; 3752 B->mapping = 0; 3753 3754 B->insertmode = NOT_SET_VALUES; 3755 b->size = size; 3756 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3757 3758 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3759 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3760 3761 /* the information in the maps duplicates the information computed below, eventually 3762 we should remove the duplicate information that is not contained in the maps */ 3763 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3764 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3765 3766 /* build local table of row and column ownerships */ 3767 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3768 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3769 b->cowners = b->rowners + b->size + 2; 3770 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3771 b->rowners[0] = 0; 3772 for (i=2; i<=b->size; i++) { 3773 b->rowners[i] += b->rowners[i-1]; 3774 } 3775 b->rstart = b->rowners[b->rank]; 3776 b->rend = b->rowners[b->rank+1]; 3777 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3778 b->cowners[0] = 0; 3779 for (i=2; i<=b->size; i++) { 3780 b->cowners[i] += b->cowners[i-1]; 3781 } 3782 b->cstart = b->cowners[b->rank]; 3783 b->cend = b->cowners[b->rank+1]; 3784 3785 /* build cache for off array entries formed */ 3786 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3787 b->donotstash = PETSC_FALSE; 3788 b->colmap = 0; 3789 b->garray = 0; 3790 b->roworiented = PETSC_TRUE; 3791 3792 /* stuff used for matrix vector multiply */ 3793 b->lvec = PETSC_NULL; 3794 b->Mvctx = PETSC_NULL; 3795 3796 /* stuff for MatGetRow() */ 3797 b->rowindices = 0; 3798 b->rowvalues = 0; 3799 b->getrowactive = PETSC_FALSE; 3800 3801 /* Explicitly create 2 MATSEQAIJ matrices. */ 3802 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3803 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3804 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3805 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3806 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3807 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3808 3809 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3810 "MatStoreValues_MPIAIJ", 3811 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3813 "MatRetrieveValues_MPIAIJ", 3814 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3816 "MatGetDiagonalBlock_MPIAIJ", 3817 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3819 "MatIsTranspose_MPIAIJ", 3820 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3822 "MatMPIAIJSetPreallocation_MPIAIJ", 3823 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3825 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3826 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3828 "MatDiagonalScaleLocal_MPIAIJ", 3829 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3830 PetscFunctionReturn(0); 3831 } 3832 EXTERN_C_END 3833