1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1521 1522 /* copy over the A part */ 1523 Aloc = (Mat_SeqAIJ*)aij->A->data; 1524 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1525 row = mat->rmap->rstart; 1526 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1527 for (i=0; i<m; i++) { 1528 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1529 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1530 } 1531 aj = Aloc->j; 1532 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1533 1534 /* copy over the B part */ 1535 Aloc = (Mat_SeqAIJ*)aij->B->data; 1536 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 row = mat->rmap->rstart; 1538 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1539 ct = cols; 1540 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1541 for (i=0; i<m; i++) { 1542 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1543 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1544 } 1545 ierr = PetscFree(ct);CHKERRQ(ierr); 1546 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 /* 1549 Everyone has to call to draw the matrix since the graphics waits are 1550 synchronized across all processors that share the PetscDraw object 1551 */ 1552 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1553 if (!rank) { 1554 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1555 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1556 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1557 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1558 } 1559 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1560 ierr = MatDestroy(&A);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatView_MPIAIJ" 1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1568 { 1569 PetscErrorCode ierr; 1570 PetscBool iascii,isdraw,issocket,isbinary; 1571 1572 PetscFunctionBegin; 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1577 if (iascii || isdraw || isbinary || issocket) { 1578 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSOR_MPIAIJ" 1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1588 { 1589 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1590 PetscErrorCode ierr; 1591 Vec bb1 = 0; 1592 PetscBool hasop; 1593 1594 PetscFunctionBegin; 1595 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1596 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1597 } 1598 1599 if (flag == SOR_APPLY_UPPER) { 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1605 if (flag & SOR_ZERO_INITIAL_GUESS) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 its--; 1608 } 1609 1610 while (its--) { 1611 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 1614 /* update rhs: bb1 = bb - B*x */ 1615 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1616 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1620 } 1621 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_EISENSTAT) { 1654 Vec xx1; 1655 1656 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1658 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 if (!mat->diag) { 1662 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1663 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1664 } 1665 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1666 if (hasop) { 1667 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1668 } else { 1669 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1670 } 1671 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1672 1673 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1674 1675 /* local sweep */ 1676 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1677 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1678 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1679 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1680 1681 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatPermute_MPIAIJ" 1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1688 { 1689 MPI_Comm comm; 1690 PetscInt first,local_rowsize,local_colsize; 1691 const PetscInt *rows; 1692 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1697 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1698 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1699 /* collect the global row permutation and invert it */ 1700 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1701 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1702 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1703 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1704 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1705 /* get the local target indices */ 1706 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1707 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1708 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1709 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1710 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1711 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1712 /* the column permutation is so much easier; 1713 make a local version of 'colp' and invert it */ 1714 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1715 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1718 /* now we just get the submatrix */ 1719 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1720 /* clean up */ 1721 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1722 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1728 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1729 { 1730 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1731 Mat A = mat->A,B = mat->B; 1732 PetscErrorCode ierr; 1733 PetscReal isend[5],irecv[5]; 1734 1735 PetscFunctionBegin; 1736 info->block_size = 1.0; 1737 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1738 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1739 isend[3] = info->memory; isend[4] = info->mallocs; 1740 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1741 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1742 isend[3] += info->memory; isend[4] += info->mallocs; 1743 if (flag == MAT_LOCAL) { 1744 info->nz_used = isend[0]; 1745 info->nz_allocated = isend[1]; 1746 info->nz_unneeded = isend[2]; 1747 info->memory = isend[3]; 1748 info->mallocs = isend[4]; 1749 } else if (flag == MAT_GLOBAL_MAX) { 1750 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1751 info->nz_used = irecv[0]; 1752 info->nz_allocated = irecv[1]; 1753 info->nz_unneeded = irecv[2]; 1754 info->memory = irecv[3]; 1755 info->mallocs = irecv[4]; 1756 } else if (flag == MAT_GLOBAL_SUM) { 1757 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1758 info->nz_used = irecv[0]; 1759 info->nz_allocated = irecv[1]; 1760 info->nz_unneeded = irecv[2]; 1761 info->memory = irecv[3]; 1762 info->mallocs = irecv[4]; 1763 } 1764 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1765 info->fill_ratio_needed = 0; 1766 info->factor_mallocs = 0; 1767 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatSetOption_MPIAIJ" 1773 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1774 { 1775 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 switch (op) { 1780 case MAT_NEW_NONZERO_LOCATIONS: 1781 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1782 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1783 case MAT_KEEP_NONZERO_PATTERN: 1784 case MAT_NEW_NONZERO_LOCATION_ERR: 1785 case MAT_USE_INODES: 1786 case MAT_IGNORE_ZERO_ENTRIES: 1787 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1788 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1789 break; 1790 case MAT_ROW_ORIENTED: 1791 a->roworiented = flg; 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_NEW_DIAGONALS: 1796 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1797 break; 1798 case MAT_IGNORE_OFF_PROC_ENTRIES: 1799 a->donotstash = flg; 1800 break; 1801 case MAT_SPD: 1802 A->spd_set = PETSC_TRUE; 1803 A->spd = flg; 1804 if (flg) { 1805 A->symmetric = PETSC_TRUE; 1806 A->structurally_symmetric = PETSC_TRUE; 1807 A->symmetric_set = PETSC_TRUE; 1808 A->structurally_symmetric_set = PETSC_TRUE; 1809 } 1810 break; 1811 case MAT_SYMMETRIC: 1812 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_STRUCTURALLY_SYMMETRIC: 1815 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_HERMITIAN: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_SYMMETRY_ETERNAL: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 default: 1824 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1825 } 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatGetRow_MPIAIJ" 1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1834 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1835 PetscErrorCode ierr; 1836 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1837 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1838 PetscInt *cmap,*idx_p; 1839 1840 PetscFunctionBegin; 1841 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1842 mat->getrowactive = PETSC_TRUE; 1843 1844 if (!mat->rowvalues && (idx || v)) { 1845 /* 1846 allocate enough space to hold information from the longest row. 1847 */ 1848 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1849 PetscInt max = 1,tmp; 1850 for (i=0; i<matin->rmap->n; i++) { 1851 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1852 if (max < tmp) { max = tmp; } 1853 } 1854 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1855 } 1856 1857 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1858 lrow = row - rstart; 1859 1860 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1861 if (!v) {pvA = 0; pvB = 0;} 1862 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1863 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1864 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1865 nztot = nzA + nzB; 1866 1867 cmap = mat->garray; 1868 if (v || idx) { 1869 if (nztot) { 1870 /* Sort by increasing column numbers, assuming A and B already sorted */ 1871 PetscInt imark = -1; 1872 if (v) { 1873 *v = v_p = mat->rowvalues; 1874 for (i=0; i<nzB; i++) { 1875 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1876 else break; 1877 } 1878 imark = i; 1879 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1880 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1881 } 1882 if (idx) { 1883 *idx = idx_p = mat->rowindices; 1884 if (imark > -1) { 1885 for (i=0; i<imark; i++) { 1886 idx_p[i] = cmap[cworkB[i]]; 1887 } 1888 } else { 1889 for (i=0; i<nzB; i++) { 1890 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1891 else break; 1892 } 1893 imark = i; 1894 } 1895 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1896 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1897 } 1898 } else { 1899 if (idx) *idx = 0; 1900 if (v) *v = 0; 1901 } 1902 } 1903 *nz = nztot; 1904 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1905 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1911 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1912 { 1913 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1914 1915 PetscFunctionBegin; 1916 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1917 aij->getrowactive = PETSC_FALSE; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatNorm_MPIAIJ" 1923 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1924 { 1925 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1926 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1927 PetscErrorCode ierr; 1928 PetscInt i,j,cstart = mat->cmap->rstart; 1929 PetscReal sum = 0.0; 1930 MatScalar *v; 1931 1932 PetscFunctionBegin; 1933 if (aij->size == 1) { 1934 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1935 } else { 1936 if (type == NORM_FROBENIUS) { 1937 v = amat->a; 1938 for (i=0; i<amat->nz; i++) { 1939 #if defined(PETSC_USE_COMPLEX) 1940 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1941 #else 1942 sum += (*v)*(*v); v++; 1943 #endif 1944 } 1945 v = bmat->a; 1946 for (i=0; i<bmat->nz; i++) { 1947 #if defined(PETSC_USE_COMPLEX) 1948 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1949 #else 1950 sum += (*v)*(*v); v++; 1951 #endif 1952 } 1953 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 } else if (type == NORM_1) { /* max column norm */ 1956 PetscReal *tmp,*tmp2; 1957 PetscInt *jj,*garray = aij->garray; 1958 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1959 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1960 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 } else if (type == NORM_INFINITY) { /* max row norm */ 1977 PetscReal ntemp = 0.0; 1978 for (j=0; j<aij->A->rmap->n; j++) { 1979 v = amat->a + amat->i[j]; 1980 sum = 0.0; 1981 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1982 sum += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a + bmat->i[j]; 1985 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1986 sum += PetscAbsScalar(*v); v++; 1987 } 1988 if (sum > ntemp) ntemp = sum; 1989 } 1990 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1991 } else { 1992 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1993 } 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatTranspose_MPIAIJ" 2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2001 { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2003 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2004 PetscErrorCode ierr; 2005 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2006 PetscInt cstart=A->cmap->rstart,ncol; 2007 Mat B; 2008 MatScalar *array; 2009 2010 PetscFunctionBegin; 2011 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2012 2013 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2014 ai = Aloc->i; aj = Aloc->j; 2015 bi = Bloc->i; bj = Bloc->j; 2016 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2017 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2018 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2019 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++){ 2021 d_nnz[aj[i]] ++; 2022 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2023 } 2024 2025 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2026 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2027 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2028 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2029 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2030 } else { 2031 B = *matout; 2032 } 2033 2034 /* copy over the A part */ 2035 array = Aloc->a; 2036 row = A->rmap->rstart; 2037 for (i=0; i<ma; i++) { 2038 ncol = ai[i+1]-ai[i]; 2039 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2040 row++; array += ncol; aj += ncol; 2041 } 2042 aj = Aloc->j; 2043 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2044 2045 /* copy over the B part */ 2046 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2047 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2048 array = Bloc->a; 2049 row = A->rmap->rstart; 2050 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2051 cols_tmp = cols; 2052 for (i=0; i<mb; i++) { 2053 ncol = bi[i+1]-bi[i]; 2054 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2055 row++; array += ncol; cols_tmp += ncol; 2056 } 2057 ierr = PetscFree(cols);CHKERRQ(ierr); 2058 2059 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2060 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2061 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2062 *matout = B; 2063 } else { 2064 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2071 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2072 { 2073 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2074 Mat a = aij->A,b = aij->B; 2075 PetscErrorCode ierr; 2076 PetscInt s1,s2,s3; 2077 2078 PetscFunctionBegin; 2079 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2080 if (rr) { 2081 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2082 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2083 /* Overlap communication with computation. */ 2084 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2085 } 2086 if (ll) { 2087 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2088 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2089 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2090 } 2091 /* scale the diagonal block */ 2092 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2093 2094 if (rr) { 2095 /* Do a scatter end and then right scale the off-diagonal block */ 2096 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2097 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2098 } 2099 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2105 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2106 { 2107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2112 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2113 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2114 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2120 { 2121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2122 PetscErrorCode ierr; 2123 2124 PetscFunctionBegin; 2125 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatEqual_MPIAIJ" 2131 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2132 { 2133 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2134 Mat a,b,c,d; 2135 PetscBool flg; 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 a = matA->A; b = matA->B; 2140 c = matB->A; d = matB->B; 2141 2142 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2143 if (flg) { 2144 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2145 } 2146 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatCopy_MPIAIJ" 2152 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2153 { 2154 PetscErrorCode ierr; 2155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2156 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2157 2158 PetscFunctionBegin; 2159 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2160 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2161 /* because of the column compression in the off-processor part of the matrix a->B, 2162 the number of columns in a->B and b->B may be different, hence we cannot call 2163 the MatCopy() directly on the two parts. If need be, we can provide a more 2164 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2165 then copying the submatrices */ 2166 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2167 } else { 2168 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2169 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2170 } 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2176 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2177 { 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatAXPY_MPIAIJ" 2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2188 { 2189 PetscErrorCode ierr; 2190 PetscInt i; 2191 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2192 PetscBLASInt bnz,one=1; 2193 Mat_SeqAIJ *x,*y; 2194 2195 PetscFunctionBegin; 2196 if (str == SAME_NONZERO_PATTERN) { 2197 PetscScalar alpha = a; 2198 x = (Mat_SeqAIJ *)xx->A->data; 2199 y = (Mat_SeqAIJ *)yy->A->data; 2200 bnz = PetscBLASIntCast(x->nz); 2201 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2202 x = (Mat_SeqAIJ *)xx->B->data; 2203 y = (Mat_SeqAIJ *)yy->B->data; 2204 bnz = PetscBLASIntCast(x->nz); 2205 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2206 } else if (str == SUBSET_NONZERO_PATTERN) { 2207 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2208 2209 x = (Mat_SeqAIJ *)xx->B->data; 2210 y = (Mat_SeqAIJ *)yy->B->data; 2211 if (y->xtoy && y->XtoY != xx->B) { 2212 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2213 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2214 } 2215 if (!y->xtoy) { /* get xtoy */ 2216 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2217 y->XtoY = xx->B; 2218 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2219 } 2220 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2221 } else { 2222 Mat B; 2223 PetscInt *nnz_d,*nnz_o; 2224 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2225 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2226 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2227 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2228 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2229 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2230 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2231 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2232 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2233 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2234 ierr = MatHeaderReplace(Y,B); 2235 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2236 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2237 } 2238 PetscFunctionReturn(0); 2239 } 2240 2241 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatConjugate_MPIAIJ" 2245 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2246 { 2247 #if defined(PETSC_USE_COMPLEX) 2248 PetscErrorCode ierr; 2249 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2250 2251 PetscFunctionBegin; 2252 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2253 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2254 #else 2255 PetscFunctionBegin; 2256 #endif 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatRealPart_MPIAIJ" 2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2263 { 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2265 PetscErrorCode ierr; 2266 2267 PetscFunctionBegin; 2268 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2269 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2276 { 2277 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2278 PetscErrorCode ierr; 2279 2280 PetscFunctionBegin; 2281 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2282 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #ifdef PETSC_HAVE_PBGL 2287 2288 #include <boost/parallel/mpi/bsp_process_group.hpp> 2289 #include <boost/graph/distributed/ilu_default_graph.hpp> 2290 #include <boost/graph/distributed/ilu_0_block.hpp> 2291 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2292 #include <boost/graph/distributed/petsc/interface.hpp> 2293 #include <boost/multi_array.hpp> 2294 #include <boost/parallel/distributed_property_map->hpp> 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2298 /* 2299 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2300 */ 2301 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2302 { 2303 namespace petsc = boost::distributed::petsc; 2304 2305 namespace graph_dist = boost::graph::distributed; 2306 using boost::graph::distributed::ilu_default::process_group_type; 2307 using boost::graph::ilu_permuted; 2308 2309 PetscBool row_identity, col_identity; 2310 PetscContainer c; 2311 PetscInt m, n, M, N; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2316 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2317 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2318 if (!row_identity || !col_identity) { 2319 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2320 } 2321 2322 process_group_type pg; 2323 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2324 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2325 lgraph_type& level_graph = *lgraph_p; 2326 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2327 2328 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2329 ilu_permuted(level_graph); 2330 2331 /* put together the new matrix */ 2332 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2333 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2334 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2335 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2336 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2337 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2338 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 2340 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2341 ierr = PetscContainerSetPointer(c, lgraph_p); 2342 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2343 ierr = PetscContainerDestroy(&c); 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2349 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2350 { 2351 PetscFunctionBegin; 2352 PetscFunctionReturn(0); 2353 } 2354 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatSolve_MPIAIJ" 2357 /* 2358 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2359 */ 2360 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2361 { 2362 namespace graph_dist = boost::graph::distributed; 2363 2364 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2365 lgraph_type* lgraph_p; 2366 PetscContainer c; 2367 PetscErrorCode ierr; 2368 2369 PetscFunctionBegin; 2370 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2371 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2372 ierr = VecCopy(b, x);CHKERRQ(ierr); 2373 2374 PetscScalar* array_x; 2375 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2376 PetscInt sx; 2377 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2378 2379 PetscScalar* array_b; 2380 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2381 PetscInt sb; 2382 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2383 2384 lgraph_type& level_graph = *lgraph_p; 2385 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2386 2387 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2388 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2389 ref_x(array_x, boost::extents[num_vertices(graph)]); 2390 2391 typedef boost::iterator_property_map<array_ref_type::iterator, 2392 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2393 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2394 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2395 2396 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2397 2398 PetscFunctionReturn(0); 2399 } 2400 #endif 2401 2402 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2403 PetscInt nzlocal,nsends,nrecvs; 2404 PetscMPIInt *send_rank,*recv_rank; 2405 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2406 PetscScalar *sbuf_a,**rbuf_a; 2407 PetscErrorCode (*Destroy)(Mat); 2408 } Mat_Redundant; 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2412 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2413 { 2414 PetscErrorCode ierr; 2415 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2416 PetscInt i; 2417 2418 PetscFunctionBegin; 2419 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2420 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2421 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2422 for (i=0; i<redund->nrecvs; i++){ 2423 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2424 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2425 } 2426 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2427 ierr = PetscFree(redund);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatDestroy_MatRedundant" 2433 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2434 { 2435 PetscErrorCode ierr; 2436 PetscContainer container; 2437 Mat_Redundant *redund=PETSC_NULL; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2441 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2442 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2443 A->ops->destroy = redund->Destroy; 2444 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2445 if (A->ops->destroy) { 2446 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2447 } 2448 PetscFunctionReturn(0); 2449 } 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2453 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2454 { 2455 PetscMPIInt rank,size; 2456 MPI_Comm comm=((PetscObject)mat)->comm; 2457 PetscErrorCode ierr; 2458 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2459 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2460 PetscInt *rowrange=mat->rmap->range; 2461 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2462 Mat A=aij->A,B=aij->B,C=*matredundant; 2463 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2464 PetscScalar *sbuf_a; 2465 PetscInt nzlocal=a->nz+b->nz; 2466 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2467 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2468 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2469 MatScalar *aworkA,*aworkB; 2470 PetscScalar *vals; 2471 PetscMPIInt tag1,tag2,tag3,imdex; 2472 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2473 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2474 MPI_Status recv_status,*send_status; 2475 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2476 PetscInt **rbuf_j=PETSC_NULL; 2477 PetscScalar **rbuf_a=PETSC_NULL; 2478 Mat_Redundant *redund=PETSC_NULL; 2479 PetscContainer container; 2480 2481 PetscFunctionBegin; 2482 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2483 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2484 2485 if (reuse == MAT_REUSE_MATRIX) { 2486 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2487 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2488 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2489 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2490 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2491 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2492 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2493 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2494 2495 nsends = redund->nsends; 2496 nrecvs = redund->nrecvs; 2497 send_rank = redund->send_rank; 2498 recv_rank = redund->recv_rank; 2499 sbuf_nz = redund->sbuf_nz; 2500 rbuf_nz = redund->rbuf_nz; 2501 sbuf_j = redund->sbuf_j; 2502 sbuf_a = redund->sbuf_a; 2503 rbuf_j = redund->rbuf_j; 2504 rbuf_a = redund->rbuf_a; 2505 } 2506 2507 if (reuse == MAT_INITIAL_MATRIX){ 2508 PetscMPIInt subrank,subsize; 2509 PetscInt nleftover,np_subcomm; 2510 /* get the destination processors' id send_rank, nsends and nrecvs */ 2511 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2512 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2513 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2514 np_subcomm = size/nsubcomm; 2515 nleftover = size - nsubcomm*np_subcomm; 2516 nsends = 0; nrecvs = 0; 2517 for (i=0; i<size; i++){ /* i=rank*/ 2518 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2519 send_rank[nsends] = i; nsends++; 2520 recv_rank[nrecvs++] = i; 2521 } 2522 } 2523 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2524 i = size-nleftover-1; 2525 j = 0; 2526 while (j < nsubcomm - nleftover){ 2527 send_rank[nsends++] = i; 2528 i--; j++; 2529 } 2530 } 2531 2532 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2533 for (i=0; i<nleftover; i++){ 2534 recv_rank[nrecvs++] = size-nleftover+i; 2535 } 2536 } 2537 2538 /* allocate sbuf_j, sbuf_a */ 2539 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2540 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2541 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2542 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2543 2544 /* copy mat's local entries into the buffers */ 2545 if (reuse == MAT_INITIAL_MATRIX){ 2546 rownz_max = 0; 2547 rptr = sbuf_j; 2548 cols = sbuf_j + rend-rstart + 1; 2549 vals = sbuf_a; 2550 rptr[0] = 0; 2551 for (i=0; i<rend-rstart; i++){ 2552 row = i + rstart; 2553 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2554 ncols = nzA + nzB; 2555 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2556 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2557 /* load the column indices for this row into cols */ 2558 lwrite = 0; 2559 for (l=0; l<nzB; l++) { 2560 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2561 vals[lwrite] = aworkB[l]; 2562 cols[lwrite++] = ctmp; 2563 } 2564 } 2565 for (l=0; l<nzA; l++){ 2566 vals[lwrite] = aworkA[l]; 2567 cols[lwrite++] = cstart + cworkA[l]; 2568 } 2569 for (l=0; l<nzB; l++) { 2570 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2571 vals[lwrite] = aworkB[l]; 2572 cols[lwrite++] = ctmp; 2573 } 2574 } 2575 vals += ncols; 2576 cols += ncols; 2577 rptr[i+1] = rptr[i] + ncols; 2578 if (rownz_max < ncols) rownz_max = ncols; 2579 } 2580 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2581 } else { /* only copy matrix values into sbuf_a */ 2582 rptr = sbuf_j; 2583 vals = sbuf_a; 2584 rptr[0] = 0; 2585 for (i=0; i<rend-rstart; i++){ 2586 row = i + rstart; 2587 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2588 ncols = nzA + nzB; 2589 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2590 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2591 lwrite = 0; 2592 for (l=0; l<nzB; l++) { 2593 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2594 } 2595 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2596 for (l=0; l<nzB; l++) { 2597 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2598 } 2599 vals += ncols; 2600 rptr[i+1] = rptr[i] + ncols; 2601 } 2602 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2603 2604 /* send nzlocal to others, and recv other's nzlocal */ 2605 /*--------------------------------------------------*/ 2606 if (reuse == MAT_INITIAL_MATRIX){ 2607 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2608 s_waits2 = s_waits3 + nsends; 2609 s_waits1 = s_waits2 + nsends; 2610 r_waits1 = s_waits1 + nsends; 2611 r_waits2 = r_waits1 + nrecvs; 2612 r_waits3 = r_waits2 + nrecvs; 2613 } else { 2614 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2615 r_waits3 = s_waits3 + nsends; 2616 } 2617 2618 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2619 if (reuse == MAT_INITIAL_MATRIX){ 2620 /* get new tags to keep the communication clean */ 2621 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2622 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2623 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2624 2625 /* post receives of other's nzlocal */ 2626 for (i=0; i<nrecvs; i++){ 2627 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2628 } 2629 /* send nzlocal to others */ 2630 for (i=0; i<nsends; i++){ 2631 sbuf_nz[i] = nzlocal; 2632 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2633 } 2634 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2635 count = nrecvs; 2636 while (count) { 2637 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2638 recv_rank[imdex] = recv_status.MPI_SOURCE; 2639 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2640 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2641 2642 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2643 rbuf_nz[imdex] += i + 2; 2644 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2645 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2646 count--; 2647 } 2648 /* wait on sends of nzlocal */ 2649 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2650 /* send mat->i,j to others, and recv from other's */ 2651 /*------------------------------------------------*/ 2652 for (i=0; i<nsends; i++){ 2653 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2654 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of mat->i,j */ 2657 /*------------------------------*/ 2658 count = nrecvs; 2659 while (count) { 2660 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2661 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2662 count--; 2663 } 2664 /* wait on sends of mat->i,j */ 2665 /*---------------------------*/ 2666 if (nsends) { 2667 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2668 } 2669 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2670 2671 /* post receives, send and receive mat->a */ 2672 /*----------------------------------------*/ 2673 for (imdex=0; imdex<nrecvs; imdex++) { 2674 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2675 } 2676 for (i=0; i<nsends; i++){ 2677 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2678 } 2679 count = nrecvs; 2680 while (count) { 2681 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2682 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2683 count--; 2684 } 2685 if (nsends) { 2686 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2687 } 2688 2689 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2690 2691 /* create redundant matrix */ 2692 /*-------------------------*/ 2693 if (reuse == MAT_INITIAL_MATRIX){ 2694 /* compute rownz_max for preallocation */ 2695 for (imdex=0; imdex<nrecvs; imdex++){ 2696 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2697 rptr = rbuf_j[imdex]; 2698 for (i=0; i<j; i++){ 2699 ncols = rptr[i+1] - rptr[i]; 2700 if (rownz_max < ncols) rownz_max = ncols; 2701 } 2702 } 2703 2704 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2705 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2706 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2707 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2708 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2709 } else { 2710 C = *matredundant; 2711 } 2712 2713 /* insert local matrix entries */ 2714 rptr = sbuf_j; 2715 cols = sbuf_j + rend-rstart + 1; 2716 vals = sbuf_a; 2717 for (i=0; i<rend-rstart; i++){ 2718 row = i + rstart; 2719 ncols = rptr[i+1] - rptr[i]; 2720 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2721 vals += ncols; 2722 cols += ncols; 2723 } 2724 /* insert received matrix entries */ 2725 for (imdex=0; imdex<nrecvs; imdex++){ 2726 rstart = rowrange[recv_rank[imdex]]; 2727 rend = rowrange[recv_rank[imdex]+1]; 2728 rptr = rbuf_j[imdex]; 2729 cols = rbuf_j[imdex] + rend-rstart + 1; 2730 vals = rbuf_a[imdex]; 2731 for (i=0; i<rend-rstart; i++){ 2732 row = i + rstart; 2733 ncols = rptr[i+1] - rptr[i]; 2734 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2735 vals += ncols; 2736 cols += ncols; 2737 } 2738 } 2739 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2740 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2741 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2742 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2743 if (reuse == MAT_INITIAL_MATRIX) { 2744 PetscContainer container; 2745 *matredundant = C; 2746 /* create a supporting struct and attach it to C for reuse */ 2747 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2748 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2749 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2750 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2751 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2752 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2753 2754 redund->nzlocal = nzlocal; 2755 redund->nsends = nsends; 2756 redund->nrecvs = nrecvs; 2757 redund->send_rank = send_rank; 2758 redund->recv_rank = recv_rank; 2759 redund->sbuf_nz = sbuf_nz; 2760 redund->rbuf_nz = rbuf_nz; 2761 redund->sbuf_j = sbuf_j; 2762 redund->sbuf_a = sbuf_a; 2763 redund->rbuf_j = rbuf_j; 2764 redund->rbuf_a = rbuf_a; 2765 2766 redund->Destroy = C->ops->destroy; 2767 C->ops->destroy = MatDestroy_MatRedundant; 2768 } 2769 PetscFunctionReturn(0); 2770 } 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2774 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2775 { 2776 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2777 PetscErrorCode ierr; 2778 PetscInt i,*idxb = 0; 2779 PetscScalar *va,*vb; 2780 Vec vtmp; 2781 2782 PetscFunctionBegin; 2783 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2784 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2785 if (idx) { 2786 for (i=0; i<A->rmap->n; i++) { 2787 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2788 } 2789 } 2790 2791 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2792 if (idx) { 2793 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2794 } 2795 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2796 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2797 2798 for (i=0; i<A->rmap->n; i++){ 2799 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2800 va[i] = vb[i]; 2801 if (idx) idx[i] = a->garray[idxb[i]]; 2802 } 2803 } 2804 2805 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2806 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2807 ierr = PetscFree(idxb);CHKERRQ(ierr); 2808 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 #undef __FUNCT__ 2813 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2814 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2815 { 2816 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2817 PetscErrorCode ierr; 2818 PetscInt i,*idxb = 0; 2819 PetscScalar *va,*vb; 2820 Vec vtmp; 2821 2822 PetscFunctionBegin; 2823 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2824 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2825 if (idx) { 2826 for (i=0; i<A->cmap->n; i++) { 2827 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2828 } 2829 } 2830 2831 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2832 if (idx) { 2833 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2834 } 2835 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2836 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2837 2838 for (i=0; i<A->rmap->n; i++){ 2839 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2840 va[i] = vb[i]; 2841 if (idx) idx[i] = a->garray[idxb[i]]; 2842 } 2843 } 2844 2845 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2846 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2847 ierr = PetscFree(idxb);CHKERRQ(ierr); 2848 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2849 PetscFunctionReturn(0); 2850 } 2851 2852 #undef __FUNCT__ 2853 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2854 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2855 { 2856 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2857 PetscInt n = A->rmap->n; 2858 PetscInt cstart = A->cmap->rstart; 2859 PetscInt *cmap = mat->garray; 2860 PetscInt *diagIdx, *offdiagIdx; 2861 Vec diagV, offdiagV; 2862 PetscScalar *a, *diagA, *offdiagA; 2863 PetscInt r; 2864 PetscErrorCode ierr; 2865 2866 PetscFunctionBegin; 2867 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2868 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2869 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2870 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2871 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2872 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2873 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2874 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2875 for(r = 0; r < n; ++r) { 2876 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2877 a[r] = diagA[r]; 2878 idx[r] = cstart + diagIdx[r]; 2879 } else { 2880 a[r] = offdiagA[r]; 2881 idx[r] = cmap[offdiagIdx[r]]; 2882 } 2883 } 2884 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2885 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2886 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2887 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2888 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2889 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2895 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2898 PetscInt n = A->rmap->n; 2899 PetscInt cstart = A->cmap->rstart; 2900 PetscInt *cmap = mat->garray; 2901 PetscInt *diagIdx, *offdiagIdx; 2902 Vec diagV, offdiagV; 2903 PetscScalar *a, *diagA, *offdiagA; 2904 PetscInt r; 2905 PetscErrorCode ierr; 2906 2907 PetscFunctionBegin; 2908 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2909 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2910 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2911 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2912 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2913 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2914 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2915 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2916 for(r = 0; r < n; ++r) { 2917 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2918 a[r] = diagA[r]; 2919 idx[r] = cstart + diagIdx[r]; 2920 } else { 2921 a[r] = offdiagA[r]; 2922 idx[r] = cmap[offdiagIdx[r]]; 2923 } 2924 } 2925 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2926 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2928 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2929 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2930 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2936 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2937 { 2938 PetscErrorCode ierr; 2939 Mat *dummy; 2940 2941 PetscFunctionBegin; 2942 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2943 *newmat = *dummy; 2944 ierr = PetscFree(dummy);CHKERRQ(ierr); 2945 PetscFunctionReturn(0); 2946 } 2947 2948 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2952 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2953 { 2954 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 2963 /* -------------------------------------------------------------------*/ 2964 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2965 MatGetRow_MPIAIJ, 2966 MatRestoreRow_MPIAIJ, 2967 MatMult_MPIAIJ, 2968 /* 4*/ MatMultAdd_MPIAIJ, 2969 MatMultTranspose_MPIAIJ, 2970 MatMultTransposeAdd_MPIAIJ, 2971 #ifdef PETSC_HAVE_PBGL 2972 MatSolve_MPIAIJ, 2973 #else 2974 0, 2975 #endif 2976 0, 2977 0, 2978 /*10*/ 0, 2979 0, 2980 0, 2981 MatSOR_MPIAIJ, 2982 MatTranspose_MPIAIJ, 2983 /*15*/ MatGetInfo_MPIAIJ, 2984 MatEqual_MPIAIJ, 2985 MatGetDiagonal_MPIAIJ, 2986 MatDiagonalScale_MPIAIJ, 2987 MatNorm_MPIAIJ, 2988 /*20*/ MatAssemblyBegin_MPIAIJ, 2989 MatAssemblyEnd_MPIAIJ, 2990 MatSetOption_MPIAIJ, 2991 MatZeroEntries_MPIAIJ, 2992 /*24*/ MatZeroRows_MPIAIJ, 2993 0, 2994 #ifdef PETSC_HAVE_PBGL 2995 0, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 /*29*/ MatSetUpPreallocation_MPIAIJ, 3002 #ifdef PETSC_HAVE_PBGL 3003 0, 3004 #else 3005 0, 3006 #endif 3007 0, 3008 0, 3009 0, 3010 /*34*/ MatDuplicate_MPIAIJ, 3011 0, 3012 0, 3013 0, 3014 0, 3015 /*39*/ MatAXPY_MPIAIJ, 3016 MatGetSubMatrices_MPIAIJ, 3017 MatIncreaseOverlap_MPIAIJ, 3018 MatGetValues_MPIAIJ, 3019 MatCopy_MPIAIJ, 3020 /*44*/ MatGetRowMax_MPIAIJ, 3021 MatScale_MPIAIJ, 3022 0, 3023 0, 3024 MatZeroRowsColumns_MPIAIJ, 3025 /*49*/ MatSetBlockSize_MPIAIJ, 3026 0, 3027 0, 3028 0, 3029 0, 3030 /*54*/ MatFDColoringCreate_MPIAIJ, 3031 0, 3032 MatSetUnfactored_MPIAIJ, 3033 MatPermute_MPIAIJ, 3034 0, 3035 /*59*/ MatGetSubMatrix_MPIAIJ, 3036 MatDestroy_MPIAIJ, 3037 MatView_MPIAIJ, 3038 0, 3039 0, 3040 /*64*/ 0, 3041 0, 3042 0, 3043 0, 3044 0, 3045 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3046 MatGetRowMinAbs_MPIAIJ, 3047 0, 3048 MatSetColoring_MPIAIJ, 3049 #if defined(PETSC_HAVE_ADIC) 3050 MatSetValuesAdic_MPIAIJ, 3051 #else 3052 0, 3053 #endif 3054 MatSetValuesAdifor_MPIAIJ, 3055 /*75*/ MatFDColoringApply_AIJ, 3056 0, 3057 0, 3058 0, 3059 0, 3060 /*80*/ 0, 3061 0, 3062 0, 3063 /*83*/ MatLoad_MPIAIJ, 3064 0, 3065 0, 3066 0, 3067 0, 3068 0, 3069 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3070 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3071 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3072 MatPtAP_Basic, 3073 MatPtAPSymbolic_MPIAIJ, 3074 /*94*/ MatPtAPNumeric_MPIAIJ, 3075 0, 3076 0, 3077 0, 3078 0, 3079 /*99*/ 0, 3080 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3081 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3082 MatConjugate_MPIAIJ, 3083 0, 3084 /*104*/MatSetValuesRow_MPIAIJ, 3085 MatRealPart_MPIAIJ, 3086 MatImaginaryPart_MPIAIJ, 3087 0, 3088 0, 3089 /*109*/0, 3090 MatGetRedundantMatrix_MPIAIJ, 3091 MatGetRowMin_MPIAIJ, 3092 0, 3093 0, 3094 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3095 0, 3096 0, 3097 0, 3098 0, 3099 /*119*/0, 3100 0, 3101 0, 3102 0, 3103 MatGetMultiProcBlock_MPIAIJ, 3104 /*124*/MatFindNonZeroRows_MPIAIJ, 3105 MatGetColumnNorms_MPIAIJ, 3106 MatInvertBlockDiagonal_MPIAIJ, 3107 0, 3108 MatGetSubMatricesParallel_MPIAIJ, 3109 /*129*/0, 3110 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3111 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3112 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3113 0, 3114 /*134*/0, 3115 0, 3116 0, 3117 0, 3118 0 3119 }; 3120 3121 /* ----------------------------------------------------------------------------------------*/ 3122 3123 EXTERN_C_BEGIN 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3126 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3127 { 3128 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3129 PetscErrorCode ierr; 3130 3131 PetscFunctionBegin; 3132 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3133 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3134 PetscFunctionReturn(0); 3135 } 3136 EXTERN_C_END 3137 3138 EXTERN_C_BEGIN 3139 #undef __FUNCT__ 3140 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3141 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3142 { 3143 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3144 PetscErrorCode ierr; 3145 3146 PetscFunctionBegin; 3147 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3148 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3149 PetscFunctionReturn(0); 3150 } 3151 EXTERN_C_END 3152 3153 EXTERN_C_BEGIN 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3156 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3157 { 3158 Mat_MPIAIJ *b; 3159 PetscErrorCode ierr; 3160 PetscInt i; 3161 3162 PetscFunctionBegin; 3163 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3164 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3165 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3166 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3167 3168 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3169 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3170 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3171 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3172 if (d_nnz) { 3173 for (i=0; i<B->rmap->n; i++) { 3174 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3175 } 3176 } 3177 if (o_nnz) { 3178 for (i=0; i<B->rmap->n; i++) { 3179 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3180 } 3181 } 3182 b = (Mat_MPIAIJ*)B->data; 3183 3184 if (!B->preallocated) { 3185 /* Explicitly create 2 MATSEQAIJ matrices. */ 3186 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3187 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3188 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3189 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3190 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3191 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3192 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3193 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3194 } 3195 3196 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3197 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3198 B->preallocated = PETSC_TRUE; 3199 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3200 PetscFunctionReturn(0); 3201 } 3202 EXTERN_C_END 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3206 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3207 { 3208 Mat mat; 3209 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3210 PetscErrorCode ierr; 3211 3212 PetscFunctionBegin; 3213 *newmat = 0; 3214 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3215 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3216 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3217 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3218 a = (Mat_MPIAIJ*)mat->data; 3219 3220 mat->factortype = matin->factortype; 3221 mat->rmap->bs = matin->rmap->bs; 3222 mat->assembled = PETSC_TRUE; 3223 mat->insertmode = NOT_SET_VALUES; 3224 mat->preallocated = PETSC_TRUE; 3225 3226 a->size = oldmat->size; 3227 a->rank = oldmat->rank; 3228 a->donotstash = oldmat->donotstash; 3229 a->roworiented = oldmat->roworiented; 3230 a->rowindices = 0; 3231 a->rowvalues = 0; 3232 a->getrowactive = PETSC_FALSE; 3233 3234 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3235 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3236 3237 if (oldmat->colmap) { 3238 #if defined (PETSC_USE_CTABLE) 3239 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3240 #else 3241 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3242 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3243 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3244 #endif 3245 } else a->colmap = 0; 3246 if (oldmat->garray) { 3247 PetscInt len; 3248 len = oldmat->B->cmap->n; 3249 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3251 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3252 } else a->garray = 0; 3253 3254 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3255 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3256 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3257 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3258 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3259 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3260 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3261 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3262 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3263 *newmat = mat; 3264 PetscFunctionReturn(0); 3265 } 3266 3267 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatLoad_MPIAIJ" 3271 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3272 { 3273 PetscScalar *vals,*svals; 3274 MPI_Comm comm = ((PetscObject)viewer)->comm; 3275 PetscErrorCode ierr; 3276 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3277 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3278 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3279 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3280 PetscInt cend,cstart,n,*rowners,sizesset=1; 3281 int fd; 3282 3283 PetscFunctionBegin; 3284 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3285 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3286 if (!rank) { 3287 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3288 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3289 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3290 } 3291 3292 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3293 3294 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3295 M = header[1]; N = header[2]; 3296 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3297 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3298 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3299 3300 /* If global sizes are set, check if they are consistent with that given in the file */ 3301 if (sizesset) { 3302 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3303 } 3304 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3305 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3306 3307 /* determine ownership of all rows */ 3308 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3309 else m = newMat->rmap->n; /* Set by user */ 3310 3311 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3312 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3313 3314 /* First process needs enough room for process with most rows */ 3315 if (!rank) { 3316 mmax = rowners[1]; 3317 for (i=2; i<size; i++) { 3318 mmax = PetscMax(mmax,rowners[i]); 3319 } 3320 } else mmax = m; 3321 3322 rowners[0] = 0; 3323 for (i=2; i<=size; i++) { 3324 rowners[i] += rowners[i-1]; 3325 } 3326 rstart = rowners[rank]; 3327 rend = rowners[rank+1]; 3328 3329 /* distribute row lengths to all processors */ 3330 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3331 if (!rank) { 3332 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3333 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3334 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3335 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3336 for (j=0; j<m; j++) { 3337 procsnz[0] += ourlens[j]; 3338 } 3339 for (i=1; i<size; i++) { 3340 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3341 /* calculate the number of nonzeros on each processor */ 3342 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3343 procsnz[i] += rowlengths[j]; 3344 } 3345 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3346 } 3347 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3348 } else { 3349 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3350 } 3351 3352 if (!rank) { 3353 /* determine max buffer needed and allocate it */ 3354 maxnz = 0; 3355 for (i=0; i<size; i++) { 3356 maxnz = PetscMax(maxnz,procsnz[i]); 3357 } 3358 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3359 3360 /* read in my part of the matrix column indices */ 3361 nz = procsnz[0]; 3362 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3363 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3364 3365 /* read in every one elses and ship off */ 3366 for (i=1; i<size; i++) { 3367 nz = procsnz[i]; 3368 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3369 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3370 } 3371 ierr = PetscFree(cols);CHKERRQ(ierr); 3372 } else { 3373 /* determine buffer space needed for message */ 3374 nz = 0; 3375 for (i=0; i<m; i++) { 3376 nz += ourlens[i]; 3377 } 3378 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3379 3380 /* receive message of column indices*/ 3381 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3382 } 3383 3384 /* determine column ownership if matrix is not square */ 3385 if (N != M) { 3386 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3387 else n = newMat->cmap->n; 3388 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3389 cstart = cend - n; 3390 } else { 3391 cstart = rstart; 3392 cend = rend; 3393 n = cend - cstart; 3394 } 3395 3396 /* loop over local rows, determining number of off diagonal entries */ 3397 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3398 jj = 0; 3399 for (i=0; i<m; i++) { 3400 for (j=0; j<ourlens[i]; j++) { 3401 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3402 jj++; 3403 } 3404 } 3405 3406 for (i=0; i<m; i++) { 3407 ourlens[i] -= offlens[i]; 3408 } 3409 if (!sizesset) { 3410 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3411 } 3412 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3413 3414 for (i=0; i<m; i++) { 3415 ourlens[i] += offlens[i]; 3416 } 3417 3418 if (!rank) { 3419 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3420 3421 /* read in my part of the matrix numerical values */ 3422 nz = procsnz[0]; 3423 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3424 3425 /* insert into matrix */ 3426 jj = rstart; 3427 smycols = mycols; 3428 svals = vals; 3429 for (i=0; i<m; i++) { 3430 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3431 smycols += ourlens[i]; 3432 svals += ourlens[i]; 3433 jj++; 3434 } 3435 3436 /* read in other processors and ship out */ 3437 for (i=1; i<size; i++) { 3438 nz = procsnz[i]; 3439 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3440 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3441 } 3442 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3443 } else { 3444 /* receive numeric values */ 3445 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3446 3447 /* receive message of values*/ 3448 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3449 3450 /* insert into matrix */ 3451 jj = rstart; 3452 smycols = mycols; 3453 svals = vals; 3454 for (i=0; i<m; i++) { 3455 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3456 smycols += ourlens[i]; 3457 svals += ourlens[i]; 3458 jj++; 3459 } 3460 } 3461 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3462 ierr = PetscFree(vals);CHKERRQ(ierr); 3463 ierr = PetscFree(mycols);CHKERRQ(ierr); 3464 ierr = PetscFree(rowners);CHKERRQ(ierr); 3465 3466 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3467 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } 3470 3471 #undef __FUNCT__ 3472 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3473 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3474 { 3475 PetscErrorCode ierr; 3476 IS iscol_local; 3477 PetscInt csize; 3478 3479 PetscFunctionBegin; 3480 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3481 if (call == MAT_REUSE_MATRIX) { 3482 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3483 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3484 } else { 3485 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3486 } 3487 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3488 if (call == MAT_INITIAL_MATRIX) { 3489 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3490 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3491 } 3492 PetscFunctionReturn(0); 3493 } 3494 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3497 /* 3498 Not great since it makes two copies of the submatrix, first an SeqAIJ 3499 in local and then by concatenating the local matrices the end result. 3500 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3501 3502 Note: This requires a sequential iscol with all indices. 3503 */ 3504 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3505 { 3506 PetscErrorCode ierr; 3507 PetscMPIInt rank,size; 3508 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3509 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3510 Mat *local,M,Mreuse; 3511 MatScalar *vwork,*aa; 3512 MPI_Comm comm = ((PetscObject)mat)->comm; 3513 Mat_SeqAIJ *aij; 3514 3515 3516 PetscFunctionBegin; 3517 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3518 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3519 3520 if (call == MAT_REUSE_MATRIX) { 3521 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3522 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3523 local = &Mreuse; 3524 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3525 } else { 3526 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3527 Mreuse = *local; 3528 ierr = PetscFree(local);CHKERRQ(ierr); 3529 } 3530 3531 /* 3532 m - number of local rows 3533 n - number of columns (same on all processors) 3534 rstart - first row in new global matrix generated 3535 */ 3536 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3537 if (call == MAT_INITIAL_MATRIX) { 3538 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3539 ii = aij->i; 3540 jj = aij->j; 3541 3542 /* 3543 Determine the number of non-zeros in the diagonal and off-diagonal 3544 portions of the matrix in order to do correct preallocation 3545 */ 3546 3547 /* first get start and end of "diagonal" columns */ 3548 if (csize == PETSC_DECIDE) { 3549 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3550 if (mglobal == n) { /* square matrix */ 3551 nlocal = m; 3552 } else { 3553 nlocal = n/size + ((n % size) > rank); 3554 } 3555 } else { 3556 nlocal = csize; 3557 } 3558 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3559 rstart = rend - nlocal; 3560 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3561 3562 /* next, compute all the lengths */ 3563 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3564 olens = dlens + m; 3565 for (i=0; i<m; i++) { 3566 jend = ii[i+1] - ii[i]; 3567 olen = 0; 3568 dlen = 0; 3569 for (j=0; j<jend; j++) { 3570 if (*jj < rstart || *jj >= rend) olen++; 3571 else dlen++; 3572 jj++; 3573 } 3574 olens[i] = olen; 3575 dlens[i] = dlen; 3576 } 3577 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3578 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3579 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3580 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3581 ierr = PetscFree(dlens);CHKERRQ(ierr); 3582 } else { 3583 PetscInt ml,nl; 3584 3585 M = *newmat; 3586 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3587 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3588 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3589 /* 3590 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3591 rather than the slower MatSetValues(). 3592 */ 3593 M->was_assembled = PETSC_TRUE; 3594 M->assembled = PETSC_FALSE; 3595 } 3596 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3597 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3598 ii = aij->i; 3599 jj = aij->j; 3600 aa = aij->a; 3601 for (i=0; i<m; i++) { 3602 row = rstart + i; 3603 nz = ii[i+1] - ii[i]; 3604 cwork = jj; jj += nz; 3605 vwork = aa; aa += nz; 3606 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3607 } 3608 3609 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 *newmat = M; 3612 3613 /* save submatrix used in processor for next request */ 3614 if (call == MAT_INITIAL_MATRIX) { 3615 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3616 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3617 } 3618 3619 PetscFunctionReturn(0); 3620 } 3621 3622 EXTERN_C_BEGIN 3623 #undef __FUNCT__ 3624 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3625 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3626 { 3627 PetscInt m,cstart, cend,j,nnz,i,d; 3628 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3629 const PetscInt *JJ; 3630 PetscScalar *values; 3631 PetscErrorCode ierr; 3632 3633 PetscFunctionBegin; 3634 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3635 3636 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3637 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3638 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3639 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3640 m = B->rmap->n; 3641 cstart = B->cmap->rstart; 3642 cend = B->cmap->rend; 3643 rstart = B->rmap->rstart; 3644 3645 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3646 3647 #if defined(PETSC_USE_DEBUGGING) 3648 for (i=0; i<m; i++) { 3649 nnz = Ii[i+1]- Ii[i]; 3650 JJ = J + Ii[i]; 3651 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3652 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3653 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3654 } 3655 #endif 3656 3657 for (i=0; i<m; i++) { 3658 nnz = Ii[i+1]- Ii[i]; 3659 JJ = J + Ii[i]; 3660 nnz_max = PetscMax(nnz_max,nnz); 3661 d = 0; 3662 for (j=0; j<nnz; j++) { 3663 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3664 } 3665 d_nnz[i] = d; 3666 o_nnz[i] = nnz - d; 3667 } 3668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3669 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3670 3671 if (v) values = (PetscScalar*)v; 3672 else { 3673 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3674 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3675 } 3676 3677 for (i=0; i<m; i++) { 3678 ii = i + rstart; 3679 nnz = Ii[i+1]- Ii[i]; 3680 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3681 } 3682 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3683 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 3685 if (!v) { 3686 ierr = PetscFree(values);CHKERRQ(ierr); 3687 } 3688 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3689 PetscFunctionReturn(0); 3690 } 3691 EXTERN_C_END 3692 3693 #undef __FUNCT__ 3694 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3695 /*@ 3696 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3697 (the default parallel PETSc format). 3698 3699 Collective on MPI_Comm 3700 3701 Input Parameters: 3702 + B - the matrix 3703 . i - the indices into j for the start of each local row (starts with zero) 3704 . j - the column indices for each local row (starts with zero) 3705 - v - optional values in the matrix 3706 3707 Level: developer 3708 3709 Notes: 3710 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3711 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3712 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3713 3714 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3715 3716 The format which is used for the sparse matrix input, is equivalent to a 3717 row-major ordering.. i.e for the following matrix, the input data expected is 3718 as shown: 3719 3720 1 0 0 3721 2 0 3 P0 3722 ------- 3723 4 5 6 P1 3724 3725 Process0 [P0]: rows_owned=[0,1] 3726 i = {0,1,3} [size = nrow+1 = 2+1] 3727 j = {0,0,2} [size = nz = 6] 3728 v = {1,2,3} [size = nz = 6] 3729 3730 Process1 [P1]: rows_owned=[2] 3731 i = {0,3} [size = nrow+1 = 1+1] 3732 j = {0,1,2} [size = nz = 6] 3733 v = {4,5,6} [size = nz = 6] 3734 3735 .keywords: matrix, aij, compressed row, sparse, parallel 3736 3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3738 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3739 @*/ 3740 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3741 { 3742 PetscErrorCode ierr; 3743 3744 PetscFunctionBegin; 3745 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3751 /*@C 3752 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3753 (the default parallel PETSc format). For good matrix assembly performance 3754 the user should preallocate the matrix storage by setting the parameters 3755 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3756 performance can be increased by more than a factor of 50. 3757 3758 Collective on MPI_Comm 3759 3760 Input Parameters: 3761 + A - the matrix 3762 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3763 (same value is used for all local rows) 3764 . d_nnz - array containing the number of nonzeros in the various rows of the 3765 DIAGONAL portion of the local submatrix (possibly different for each row) 3766 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3767 The size of this array is equal to the number of local rows, i.e 'm'. 3768 You must leave room for the diagonal entry even if it is zero. 3769 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3770 submatrix (same value is used for all local rows). 3771 - o_nnz - array containing the number of nonzeros in the various rows of the 3772 OFF-DIAGONAL portion of the local submatrix (possibly different for 3773 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3774 structure. The size of this array is equal to the number 3775 of local rows, i.e 'm'. 3776 3777 If the *_nnz parameter is given then the *_nz parameter is ignored 3778 3779 The AIJ format (also called the Yale sparse matrix format or 3780 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3781 storage. The stored row and column indices begin with zero. 3782 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3783 3784 The parallel matrix is partitioned such that the first m0 rows belong to 3785 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3786 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3787 3788 The DIAGONAL portion of the local submatrix of a processor can be defined 3789 as the submatrix which is obtained by extraction the part corresponding to 3790 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3791 first row that belongs to the processor, r2 is the last row belonging to 3792 the this processor, and c1-c2 is range of indices of the local part of a 3793 vector suitable for applying the matrix to. This is an mxn matrix. In the 3794 common case of a square matrix, the row and column ranges are the same and 3795 the DIAGONAL part is also square. The remaining portion of the local 3796 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3797 3798 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3799 3800 You can call MatGetInfo() to get information on how effective the preallocation was; 3801 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3802 You can also run with the option -info and look for messages with the string 3803 malloc in them to see if additional memory allocation was needed. 3804 3805 Example usage: 3806 3807 Consider the following 8x8 matrix with 34 non-zero values, that is 3808 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3809 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3810 as follows: 3811 3812 .vb 3813 1 2 0 | 0 3 0 | 0 4 3814 Proc0 0 5 6 | 7 0 0 | 8 0 3815 9 0 10 | 11 0 0 | 12 0 3816 ------------------------------------- 3817 13 0 14 | 15 16 17 | 0 0 3818 Proc1 0 18 0 | 19 20 21 | 0 0 3819 0 0 0 | 22 23 0 | 24 0 3820 ------------------------------------- 3821 Proc2 25 26 27 | 0 0 28 | 29 0 3822 30 0 0 | 31 32 33 | 0 34 3823 .ve 3824 3825 This can be represented as a collection of submatrices as: 3826 3827 .vb 3828 A B C 3829 D E F 3830 G H I 3831 .ve 3832 3833 Where the submatrices A,B,C are owned by proc0, D,E,F are 3834 owned by proc1, G,H,I are owned by proc2. 3835 3836 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3837 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3838 The 'M','N' parameters are 8,8, and have the same values on all procs. 3839 3840 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3841 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3842 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3843 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3844 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3845 matrix, ans [DF] as another SeqAIJ matrix. 3846 3847 When d_nz, o_nz parameters are specified, d_nz storage elements are 3848 allocated for every row of the local diagonal submatrix, and o_nz 3849 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3850 One way to choose d_nz and o_nz is to use the max nonzerors per local 3851 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3852 In this case, the values of d_nz,o_nz are: 3853 .vb 3854 proc0 : dnz = 2, o_nz = 2 3855 proc1 : dnz = 3, o_nz = 2 3856 proc2 : dnz = 1, o_nz = 4 3857 .ve 3858 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3859 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3860 for proc3. i.e we are using 12+15+10=37 storage locations to store 3861 34 values. 3862 3863 When d_nnz, o_nnz parameters are specified, the storage is specified 3864 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3865 In the above case the values for d_nnz,o_nnz are: 3866 .vb 3867 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3868 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3869 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3870 .ve 3871 Here the space allocated is sum of all the above values i.e 34, and 3872 hence pre-allocation is perfect. 3873 3874 Level: intermediate 3875 3876 .keywords: matrix, aij, compressed row, sparse, parallel 3877 3878 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3879 MPIAIJ, MatGetInfo() 3880 @*/ 3881 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3882 { 3883 PetscErrorCode ierr; 3884 3885 PetscFunctionBegin; 3886 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3887 PetscValidType(B,1); 3888 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3889 PetscFunctionReturn(0); 3890 } 3891 3892 #undef __FUNCT__ 3893 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3894 /*@ 3895 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3896 CSR format the local rows. 3897 3898 Collective on MPI_Comm 3899 3900 Input Parameters: 3901 + comm - MPI communicator 3902 . m - number of local rows (Cannot be PETSC_DECIDE) 3903 . n - This value should be the same as the local size used in creating the 3904 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3905 calculated if N is given) For square matrices n is almost always m. 3906 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3907 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3908 . i - row indices 3909 . j - column indices 3910 - a - matrix values 3911 3912 Output Parameter: 3913 . mat - the matrix 3914 3915 Level: intermediate 3916 3917 Notes: 3918 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3919 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3920 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3921 3922 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3923 3924 The format which is used for the sparse matrix input, is equivalent to a 3925 row-major ordering.. i.e for the following matrix, the input data expected is 3926 as shown: 3927 3928 1 0 0 3929 2 0 3 P0 3930 ------- 3931 4 5 6 P1 3932 3933 Process0 [P0]: rows_owned=[0,1] 3934 i = {0,1,3} [size = nrow+1 = 2+1] 3935 j = {0,0,2} [size = nz = 6] 3936 v = {1,2,3} [size = nz = 6] 3937 3938 Process1 [P1]: rows_owned=[2] 3939 i = {0,3} [size = nrow+1 = 1+1] 3940 j = {0,1,2} [size = nz = 6] 3941 v = {4,5,6} [size = nz = 6] 3942 3943 .keywords: matrix, aij, compressed row, sparse, parallel 3944 3945 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3946 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3947 @*/ 3948 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3949 { 3950 PetscErrorCode ierr; 3951 3952 PetscFunctionBegin; 3953 if (i[0]) { 3954 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3955 } 3956 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3957 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3958 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3959 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3960 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3961 PetscFunctionReturn(0); 3962 } 3963 3964 #undef __FUNCT__ 3965 #define __FUNCT__ "MatCreateMPIAIJ" 3966 /*@C 3967 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3968 (the default parallel PETSc format). For good matrix assembly performance 3969 the user should preallocate the matrix storage by setting the parameters 3970 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3971 performance can be increased by more than a factor of 50. 3972 3973 Collective on MPI_Comm 3974 3975 Input Parameters: 3976 + comm - MPI communicator 3977 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3978 This value should be the same as the local size used in creating the 3979 y vector for the matrix-vector product y = Ax. 3980 . n - This value should be the same as the local size used in creating the 3981 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3982 calculated if N is given) For square matrices n is almost always m. 3983 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3984 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3985 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3986 (same value is used for all local rows) 3987 . d_nnz - array containing the number of nonzeros in the various rows of the 3988 DIAGONAL portion of the local submatrix (possibly different for each row) 3989 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3990 The size of this array is equal to the number of local rows, i.e 'm'. 3991 You must leave room for the diagonal entry even if it is zero. 3992 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3993 submatrix (same value is used for all local rows). 3994 - o_nnz - array containing the number of nonzeros in the various rows of the 3995 OFF-DIAGONAL portion of the local submatrix (possibly different for 3996 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3997 structure. The size of this array is equal to the number 3998 of local rows, i.e 'm'. 3999 4000 Output Parameter: 4001 . A - the matrix 4002 4003 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4004 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4005 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4006 4007 Notes: 4008 If the *_nnz parameter is given then the *_nz parameter is ignored 4009 4010 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4011 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4012 storage requirements for this matrix. 4013 4014 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4015 processor than it must be used on all processors that share the object for 4016 that argument. 4017 4018 The user MUST specify either the local or global matrix dimensions 4019 (possibly both). 4020 4021 The parallel matrix is partitioned across processors such that the 4022 first m0 rows belong to process 0, the next m1 rows belong to 4023 process 1, the next m2 rows belong to process 2 etc.. where 4024 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4025 values corresponding to [m x N] submatrix. 4026 4027 The columns are logically partitioned with the n0 columns belonging 4028 to 0th partition, the next n1 columns belonging to the next 4029 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4030 4031 The DIAGONAL portion of the local submatrix on any given processor 4032 is the submatrix corresponding to the rows and columns m,n 4033 corresponding to the given processor. i.e diagonal matrix on 4034 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4035 etc. The remaining portion of the local submatrix [m x (N-n)] 4036 constitute the OFF-DIAGONAL portion. The example below better 4037 illustrates this concept. 4038 4039 For a square global matrix we define each processor's diagonal portion 4040 to be its local rows and the corresponding columns (a square submatrix); 4041 each processor's off-diagonal portion encompasses the remainder of the 4042 local matrix (a rectangular submatrix). 4043 4044 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4045 4046 When calling this routine with a single process communicator, a matrix of 4047 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4048 type of communicator, use the construction mechanism: 4049 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4050 4051 By default, this format uses inodes (identical nodes) when possible. 4052 We search for consecutive rows with the same nonzero structure, thereby 4053 reusing matrix information to achieve increased efficiency. 4054 4055 Options Database Keys: 4056 + -mat_no_inode - Do not use inodes 4057 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4058 - -mat_aij_oneindex - Internally use indexing starting at 1 4059 rather than 0. Note that when calling MatSetValues(), 4060 the user still MUST index entries starting at 0! 4061 4062 4063 Example usage: 4064 4065 Consider the following 8x8 matrix with 34 non-zero values, that is 4066 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4067 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4068 as follows: 4069 4070 .vb 4071 1 2 0 | 0 3 0 | 0 4 4072 Proc0 0 5 6 | 7 0 0 | 8 0 4073 9 0 10 | 11 0 0 | 12 0 4074 ------------------------------------- 4075 13 0 14 | 15 16 17 | 0 0 4076 Proc1 0 18 0 | 19 20 21 | 0 0 4077 0 0 0 | 22 23 0 | 24 0 4078 ------------------------------------- 4079 Proc2 25 26 27 | 0 0 28 | 29 0 4080 30 0 0 | 31 32 33 | 0 34 4081 .ve 4082 4083 This can be represented as a collection of submatrices as: 4084 4085 .vb 4086 A B C 4087 D E F 4088 G H I 4089 .ve 4090 4091 Where the submatrices A,B,C are owned by proc0, D,E,F are 4092 owned by proc1, G,H,I are owned by proc2. 4093 4094 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4095 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'M','N' parameters are 8,8, and have the same values on all procs. 4097 4098 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4099 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4100 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4101 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4102 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4103 matrix, ans [DF] as another SeqAIJ matrix. 4104 4105 When d_nz, o_nz parameters are specified, d_nz storage elements are 4106 allocated for every row of the local diagonal submatrix, and o_nz 4107 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4108 One way to choose d_nz and o_nz is to use the max nonzerors per local 4109 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4110 In this case, the values of d_nz,o_nz are: 4111 .vb 4112 proc0 : dnz = 2, o_nz = 2 4113 proc1 : dnz = 3, o_nz = 2 4114 proc2 : dnz = 1, o_nz = 4 4115 .ve 4116 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4117 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4118 for proc3. i.e we are using 12+15+10=37 storage locations to store 4119 34 values. 4120 4121 When d_nnz, o_nnz parameters are specified, the storage is specified 4122 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4123 In the above case the values for d_nnz,o_nnz are: 4124 .vb 4125 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4126 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4127 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4128 .ve 4129 Here the space allocated is sum of all the above values i.e 34, and 4130 hence pre-allocation is perfect. 4131 4132 Level: intermediate 4133 4134 .keywords: matrix, aij, compressed row, sparse, parallel 4135 4136 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4137 MPIAIJ, MatCreateMPIAIJWithArrays() 4138 @*/ 4139 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4140 { 4141 PetscErrorCode ierr; 4142 PetscMPIInt size; 4143 4144 PetscFunctionBegin; 4145 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4146 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4147 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4148 if (size > 1) { 4149 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4150 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4151 } else { 4152 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4153 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4154 } 4155 PetscFunctionReturn(0); 4156 } 4157 4158 #undef __FUNCT__ 4159 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4160 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4161 { 4162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4163 4164 PetscFunctionBegin; 4165 *Ad = a->A; 4166 *Ao = a->B; 4167 *colmap = a->garray; 4168 PetscFunctionReturn(0); 4169 } 4170 4171 #undef __FUNCT__ 4172 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4173 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4174 { 4175 PetscErrorCode ierr; 4176 PetscInt i; 4177 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4178 4179 PetscFunctionBegin; 4180 if (coloring->ctype == IS_COLORING_GLOBAL) { 4181 ISColoringValue *allcolors,*colors; 4182 ISColoring ocoloring; 4183 4184 /* set coloring for diagonal portion */ 4185 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4186 4187 /* set coloring for off-diagonal portion */ 4188 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4189 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4190 for (i=0; i<a->B->cmap->n; i++) { 4191 colors[i] = allcolors[a->garray[i]]; 4192 } 4193 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4194 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4195 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4196 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4197 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4198 ISColoringValue *colors; 4199 PetscInt *larray; 4200 ISColoring ocoloring; 4201 4202 /* set coloring for diagonal portion */ 4203 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4204 for (i=0; i<a->A->cmap->n; i++) { 4205 larray[i] = i + A->cmap->rstart; 4206 } 4207 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4208 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4209 for (i=0; i<a->A->cmap->n; i++) { 4210 colors[i] = coloring->colors[larray[i]]; 4211 } 4212 ierr = PetscFree(larray);CHKERRQ(ierr); 4213 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4214 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4215 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4216 4217 /* set coloring for off-diagonal portion */ 4218 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4219 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4220 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4221 for (i=0; i<a->B->cmap->n; i++) { 4222 colors[i] = coloring->colors[larray[i]]; 4223 } 4224 ierr = PetscFree(larray);CHKERRQ(ierr); 4225 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4226 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4227 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4228 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4229 4230 PetscFunctionReturn(0); 4231 } 4232 4233 #if defined(PETSC_HAVE_ADIC) 4234 #undef __FUNCT__ 4235 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4236 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4237 { 4238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4239 PetscErrorCode ierr; 4240 4241 PetscFunctionBegin; 4242 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4243 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4244 PetscFunctionReturn(0); 4245 } 4246 #endif 4247 4248 #undef __FUNCT__ 4249 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4250 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4251 { 4252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4253 PetscErrorCode ierr; 4254 4255 PetscFunctionBegin; 4256 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4257 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4258 PetscFunctionReturn(0); 4259 } 4260 4261 #undef __FUNCT__ 4262 #define __FUNCT__ "MatMergeSymbolic" 4263 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4264 { 4265 PetscErrorCode ierr; 4266 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4267 PetscInt *indx; 4268 4269 PetscFunctionBegin; 4270 /* This routine will ONLY return MPIAIJ type matrix */ 4271 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4272 if (n == PETSC_DECIDE){ 4273 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4274 } 4275 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4276 rstart -= m; 4277 4278 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4279 for (i=0;i<m;i++) { 4280 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4281 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4282 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4283 } 4284 4285 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4286 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4287 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4288 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4289 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4290 PetscFunctionReturn(0); 4291 } 4292 4293 #undef __FUNCT__ 4294 #define __FUNCT__ "MatMergeNumeric" 4295 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4296 { 4297 PetscErrorCode ierr; 4298 PetscInt m,N,i,rstart,nnz,Ii; 4299 PetscInt *indx; 4300 PetscScalar *values; 4301 4302 PetscFunctionBegin; 4303 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4304 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4305 for (i=0;i<m;i++) { 4306 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4307 Ii = i + rstart; 4308 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4309 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4310 } 4311 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4312 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4313 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatMerge" 4319 /*@ 4320 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4321 matrices from each processor 4322 4323 Collective on MPI_Comm 4324 4325 Input Parameters: 4326 + comm - the communicators the parallel matrix will live on 4327 . inmat - the input sequential matrices 4328 . n - number of local columns (or PETSC_DECIDE) 4329 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4330 4331 Output Parameter: 4332 . outmat - the parallel matrix generated 4333 4334 Level: advanced 4335 4336 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4337 4338 @*/ 4339 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4340 { 4341 PetscErrorCode ierr; 4342 4343 PetscFunctionBegin; 4344 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4345 if (scall == MAT_INITIAL_MATRIX){ 4346 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4347 } 4348 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4349 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4350 PetscFunctionReturn(0); 4351 } 4352 4353 #undef __FUNCT__ 4354 #define __FUNCT__ "MatFileSplit" 4355 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4356 { 4357 PetscErrorCode ierr; 4358 PetscMPIInt rank; 4359 PetscInt m,N,i,rstart,nnz; 4360 size_t len; 4361 const PetscInt *indx; 4362 PetscViewer out; 4363 char *name; 4364 Mat B; 4365 const PetscScalar *values; 4366 4367 PetscFunctionBegin; 4368 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4369 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4370 /* Should this be the type of the diagonal block of A? */ 4371 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4372 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4373 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4374 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4375 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4376 for (i=0;i<m;i++) { 4377 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4378 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4379 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4380 } 4381 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 4384 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4385 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4386 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4387 sprintf(name,"%s.%d",outfile,rank); 4388 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4389 ierr = PetscFree(name); 4390 ierr = MatView(B,out);CHKERRQ(ierr); 4391 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4392 ierr = MatDestroy(&B);CHKERRQ(ierr); 4393 PetscFunctionReturn(0); 4394 } 4395 4396 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4397 #undef __FUNCT__ 4398 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4399 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4400 { 4401 PetscErrorCode ierr; 4402 Mat_Merge_SeqsToMPI *merge; 4403 PetscContainer container; 4404 4405 PetscFunctionBegin; 4406 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4407 if (container) { 4408 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4409 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4410 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4411 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4412 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4421 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4422 ierr = PetscFree(merge);CHKERRQ(ierr); 4423 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4424 } 4425 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 #include <../src/mat/utils/freespace.h> 4430 #include <petscbt.h> 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4434 /*@C 4435 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4436 matrices from each processor 4437 4438 Collective on MPI_Comm 4439 4440 Input Parameters: 4441 + comm - the communicators the parallel matrix will live on 4442 . seqmat - the input sequential matrices 4443 . m - number of local rows (or PETSC_DECIDE) 4444 . n - number of local columns (or PETSC_DECIDE) 4445 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4446 4447 Output Parameter: 4448 . mpimat - the parallel matrix generated 4449 4450 Level: advanced 4451 4452 Notes: 4453 The dimensions of the sequential matrix in each processor MUST be the same. 4454 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4455 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4456 @*/ 4457 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4458 { 4459 PetscErrorCode ierr; 4460 MPI_Comm comm=((PetscObject)mpimat)->comm; 4461 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4462 PetscMPIInt size,rank,taga,*len_s; 4463 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4464 PetscInt proc,m; 4465 PetscInt **buf_ri,**buf_rj; 4466 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4467 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4468 MPI_Request *s_waits,*r_waits; 4469 MPI_Status *status; 4470 MatScalar *aa=a->a; 4471 MatScalar **abuf_r,*ba_i; 4472 Mat_Merge_SeqsToMPI *merge; 4473 PetscContainer container; 4474 4475 PetscFunctionBegin; 4476 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4477 4478 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4479 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4480 4481 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4482 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4483 4484 bi = merge->bi; 4485 bj = merge->bj; 4486 buf_ri = merge->buf_ri; 4487 buf_rj = merge->buf_rj; 4488 4489 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4490 owners = merge->rowmap->range; 4491 len_s = merge->len_s; 4492 4493 /* send and recv matrix values */ 4494 /*-----------------------------*/ 4495 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4496 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4497 4498 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4499 for (proc=0,k=0; proc<size; proc++){ 4500 if (!len_s[proc]) continue; 4501 i = owners[proc]; 4502 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4503 k++; 4504 } 4505 4506 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4507 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4508 ierr = PetscFree(status);CHKERRQ(ierr); 4509 4510 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4511 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4512 4513 /* insert mat values of mpimat */ 4514 /*----------------------------*/ 4515 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4516 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4517 4518 for (k=0; k<merge->nrecv; k++){ 4519 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4520 nrows = *(buf_ri_k[k]); 4521 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4522 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4523 } 4524 4525 /* set values of ba */ 4526 m = merge->rowmap->n; 4527 for (i=0; i<m; i++) { 4528 arow = owners[rank] + i; 4529 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4530 bnzi = bi[i+1] - bi[i]; 4531 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4532 4533 /* add local non-zero vals of this proc's seqmat into ba */ 4534 anzi = ai[arow+1] - ai[arow]; 4535 aj = a->j + ai[arow]; 4536 aa = a->a + ai[arow]; 4537 nextaj = 0; 4538 for (j=0; nextaj<anzi; j++){ 4539 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4540 ba_i[j] += aa[nextaj++]; 4541 } 4542 } 4543 4544 /* add received vals into ba */ 4545 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4546 /* i-th row */ 4547 if (i == *nextrow[k]) { 4548 anzi = *(nextai[k]+1) - *nextai[k]; 4549 aj = buf_rj[k] + *(nextai[k]); 4550 aa = abuf_r[k] + *(nextai[k]); 4551 nextaj = 0; 4552 for (j=0; nextaj<anzi; j++){ 4553 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4554 ba_i[j] += aa[nextaj++]; 4555 } 4556 } 4557 nextrow[k]++; nextai[k]++; 4558 } 4559 } 4560 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4561 } 4562 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4563 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4564 4565 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4566 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4567 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4568 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4569 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4570 PetscFunctionReturn(0); 4571 } 4572 4573 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4577 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4578 { 4579 PetscErrorCode ierr; 4580 Mat B_mpi; 4581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4582 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4583 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4584 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4585 PetscInt len,proc,*dnz,*onz; 4586 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4587 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4588 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4589 MPI_Status *status; 4590 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4591 PetscBT lnkbt; 4592 Mat_Merge_SeqsToMPI *merge; 4593 PetscContainer container; 4594 4595 PetscFunctionBegin; 4596 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4597 4598 /* make sure it is a PETSc comm */ 4599 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4602 4603 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4604 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4605 4606 /* determine row ownership */ 4607 /*---------------------------------------------------------*/ 4608 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4609 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4610 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4611 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4612 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4613 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4614 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4615 4616 m = merge->rowmap->n; 4617 M = merge->rowmap->N; 4618 owners = merge->rowmap->range; 4619 4620 /* determine the number of messages to send, their lengths */ 4621 /*---------------------------------------------------------*/ 4622 len_s = merge->len_s; 4623 4624 len = 0; /* length of buf_si[] */ 4625 merge->nsend = 0; 4626 for (proc=0; proc<size; proc++){ 4627 len_si[proc] = 0; 4628 if (proc == rank){ 4629 len_s[proc] = 0; 4630 } else { 4631 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4632 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4633 } 4634 if (len_s[proc]) { 4635 merge->nsend++; 4636 nrows = 0; 4637 for (i=owners[proc]; i<owners[proc+1]; i++){ 4638 if (ai[i+1] > ai[i]) nrows++; 4639 } 4640 len_si[proc] = 2*(nrows+1); 4641 len += len_si[proc]; 4642 } 4643 } 4644 4645 /* determine the number and length of messages to receive for ij-structure */ 4646 /*-------------------------------------------------------------------------*/ 4647 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4648 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4649 4650 /* post the Irecv of j-structure */ 4651 /*-------------------------------*/ 4652 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4653 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4654 4655 /* post the Isend of j-structure */ 4656 /*--------------------------------*/ 4657 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4658 4659 for (proc=0, k=0; proc<size; proc++){ 4660 if (!len_s[proc]) continue; 4661 i = owners[proc]; 4662 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4663 k++; 4664 } 4665 4666 /* receives and sends of j-structure are complete */ 4667 /*------------------------------------------------*/ 4668 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4669 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4670 4671 /* send and recv i-structure */ 4672 /*---------------------------*/ 4673 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4674 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4675 4676 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4677 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4678 for (proc=0,k=0; proc<size; proc++){ 4679 if (!len_s[proc]) continue; 4680 /* form outgoing message for i-structure: 4681 buf_si[0]: nrows to be sent 4682 [1:nrows]: row index (global) 4683 [nrows+1:2*nrows+1]: i-structure index 4684 */ 4685 /*-------------------------------------------*/ 4686 nrows = len_si[proc]/2 - 1; 4687 buf_si_i = buf_si + nrows+1; 4688 buf_si[0] = nrows; 4689 buf_si_i[0] = 0; 4690 nrows = 0; 4691 for (i=owners[proc]; i<owners[proc+1]; i++){ 4692 anzi = ai[i+1] - ai[i]; 4693 if (anzi) { 4694 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4695 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4696 nrows++; 4697 } 4698 } 4699 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4700 k++; 4701 buf_si += len_si[proc]; 4702 } 4703 4704 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4705 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4706 4707 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4708 for (i=0; i<merge->nrecv; i++){ 4709 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4710 } 4711 4712 ierr = PetscFree(len_si);CHKERRQ(ierr); 4713 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4714 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4715 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4716 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4717 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4718 ierr = PetscFree(status);CHKERRQ(ierr); 4719 4720 /* compute a local seq matrix in each processor */ 4721 /*----------------------------------------------*/ 4722 /* allocate bi array and free space for accumulating nonzero column info */ 4723 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4724 bi[0] = 0; 4725 4726 /* create and initialize a linked list */ 4727 nlnk = N+1; 4728 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4729 4730 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4731 len = 0; 4732 len = ai[owners[rank+1]] - ai[owners[rank]]; 4733 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4734 current_space = free_space; 4735 4736 /* determine symbolic info for each local row */ 4737 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4738 4739 for (k=0; k<merge->nrecv; k++){ 4740 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4741 nrows = *buf_ri_k[k]; 4742 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4743 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4744 } 4745 4746 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4747 len = 0; 4748 for (i=0;i<m;i++) { 4749 bnzi = 0; 4750 /* add local non-zero cols of this proc's seqmat into lnk */ 4751 arow = owners[rank] + i; 4752 anzi = ai[arow+1] - ai[arow]; 4753 aj = a->j + ai[arow]; 4754 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4755 bnzi += nlnk; 4756 /* add received col data into lnk */ 4757 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4758 if (i == *nextrow[k]) { /* i-th row */ 4759 anzi = *(nextai[k]+1) - *nextai[k]; 4760 aj = buf_rj[k] + *nextai[k]; 4761 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4762 bnzi += nlnk; 4763 nextrow[k]++; nextai[k]++; 4764 } 4765 } 4766 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4767 4768 /* if free space is not available, make more free space */ 4769 if (current_space->local_remaining<bnzi) { 4770 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4771 nspacedouble++; 4772 } 4773 /* copy data into free space, then initialize lnk */ 4774 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4775 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4776 4777 current_space->array += bnzi; 4778 current_space->local_used += bnzi; 4779 current_space->local_remaining -= bnzi; 4780 4781 bi[i+1] = bi[i] + bnzi; 4782 } 4783 4784 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4785 4786 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4787 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4788 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4789 4790 /* create symbolic parallel matrix B_mpi */ 4791 /*---------------------------------------*/ 4792 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4793 if (n==PETSC_DECIDE) { 4794 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4795 } else { 4796 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4797 } 4798 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4799 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4800 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4801 4802 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4803 B_mpi->assembled = PETSC_FALSE; 4804 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4805 merge->bi = bi; 4806 merge->bj = bj; 4807 merge->buf_ri = buf_ri; 4808 merge->buf_rj = buf_rj; 4809 merge->coi = PETSC_NULL; 4810 merge->coj = PETSC_NULL; 4811 merge->owners_co = PETSC_NULL; 4812 4813 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4814 4815 /* attach the supporting struct to B_mpi for reuse */ 4816 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4817 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4818 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4819 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4820 *mpimat = B_mpi; 4821 4822 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4823 PetscFunctionReturn(0); 4824 } 4825 4826 #undef __FUNCT__ 4827 #define __FUNCT__ "MatMerge_SeqsToMPI" 4828 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4829 { 4830 PetscErrorCode ierr; 4831 4832 PetscFunctionBegin; 4833 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4834 if (scall == MAT_INITIAL_MATRIX){ 4835 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4836 } 4837 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4838 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4839 PetscFunctionReturn(0); 4840 } 4841 4842 #undef __FUNCT__ 4843 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4844 /*@ 4845 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4846 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4847 with MatGetSize() 4848 4849 Not Collective 4850 4851 Input Parameters: 4852 + A - the matrix 4853 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4854 4855 Output Parameter: 4856 . A_loc - the local sequential matrix generated 4857 4858 Level: developer 4859 4860 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4861 4862 @*/ 4863 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4864 { 4865 PetscErrorCode ierr; 4866 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4867 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4868 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4869 MatScalar *aa=a->a,*ba=b->a,*cam; 4870 PetscScalar *ca; 4871 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4872 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4873 PetscBool match; 4874 4875 PetscFunctionBegin; 4876 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4877 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4878 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4879 if (scall == MAT_INITIAL_MATRIX){ 4880 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4881 ci[0] = 0; 4882 for (i=0; i<am; i++){ 4883 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4884 } 4885 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4886 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4887 k = 0; 4888 for (i=0; i<am; i++) { 4889 ncols_o = bi[i+1] - bi[i]; 4890 ncols_d = ai[i+1] - ai[i]; 4891 /* off-diagonal portion of A */ 4892 for (jo=0; jo<ncols_o; jo++) { 4893 col = cmap[*bj]; 4894 if (col >= cstart) break; 4895 cj[k] = col; bj++; 4896 ca[k++] = *ba++; 4897 } 4898 /* diagonal portion of A */ 4899 for (j=0; j<ncols_d; j++) { 4900 cj[k] = cstart + *aj++; 4901 ca[k++] = *aa++; 4902 } 4903 /* off-diagonal portion of A */ 4904 for (j=jo; j<ncols_o; j++) { 4905 cj[k] = cmap[*bj++]; 4906 ca[k++] = *ba++; 4907 } 4908 } 4909 /* put together the new matrix */ 4910 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4911 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4912 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4913 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4914 mat->free_a = PETSC_TRUE; 4915 mat->free_ij = PETSC_TRUE; 4916 mat->nonew = 0; 4917 } else if (scall == MAT_REUSE_MATRIX){ 4918 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4919 ci = mat->i; cj = mat->j; cam = mat->a; 4920 for (i=0; i<am; i++) { 4921 /* off-diagonal portion of A */ 4922 ncols_o = bi[i+1] - bi[i]; 4923 for (jo=0; jo<ncols_o; jo++) { 4924 col = cmap[*bj]; 4925 if (col >= cstart) break; 4926 *cam++ = *ba++; bj++; 4927 } 4928 /* diagonal portion of A */ 4929 ncols_d = ai[i+1] - ai[i]; 4930 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4931 /* off-diagonal portion of A */ 4932 for (j=jo; j<ncols_o; j++) { 4933 *cam++ = *ba++; bj++; 4934 } 4935 } 4936 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4937 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4943 /*@C 4944 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4945 4946 Not Collective 4947 4948 Input Parameters: 4949 + A - the matrix 4950 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4951 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4952 4953 Output Parameter: 4954 . A_loc - the local sequential matrix generated 4955 4956 Level: developer 4957 4958 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4959 4960 @*/ 4961 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4962 { 4963 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4964 PetscErrorCode ierr; 4965 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4966 IS isrowa,iscola; 4967 Mat *aloc; 4968 PetscBool match; 4969 4970 PetscFunctionBegin; 4971 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4972 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4973 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4974 if (!row){ 4975 start = A->rmap->rstart; end = A->rmap->rend; 4976 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4977 } else { 4978 isrowa = *row; 4979 } 4980 if (!col){ 4981 start = A->cmap->rstart; 4982 cmap = a->garray; 4983 nzA = a->A->cmap->n; 4984 nzB = a->B->cmap->n; 4985 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4986 ncols = 0; 4987 for (i=0; i<nzB; i++) { 4988 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4989 else break; 4990 } 4991 imark = i; 4992 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4993 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4994 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4995 } else { 4996 iscola = *col; 4997 } 4998 if (scall != MAT_INITIAL_MATRIX){ 4999 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5000 aloc[0] = *A_loc; 5001 } 5002 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5003 *A_loc = aloc[0]; 5004 ierr = PetscFree(aloc);CHKERRQ(ierr); 5005 if (!row){ 5006 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5007 } 5008 if (!col){ 5009 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5010 } 5011 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5012 PetscFunctionReturn(0); 5013 } 5014 5015 #undef __FUNCT__ 5016 #define __FUNCT__ "MatGetBrowsOfAcols" 5017 /*@C 5018 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5019 5020 Collective on Mat 5021 5022 Input Parameters: 5023 + A,B - the matrices in mpiaij format 5024 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5025 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5026 5027 Output Parameter: 5028 + rowb, colb - index sets of rows and columns of B to extract 5029 - B_seq - the sequential matrix generated 5030 5031 Level: developer 5032 5033 @*/ 5034 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5035 { 5036 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5037 PetscErrorCode ierr; 5038 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5039 IS isrowb,iscolb; 5040 Mat *bseq=PETSC_NULL; 5041 5042 PetscFunctionBegin; 5043 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5044 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5045 } 5046 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5047 5048 if (scall == MAT_INITIAL_MATRIX){ 5049 start = A->cmap->rstart; 5050 cmap = a->garray; 5051 nzA = a->A->cmap->n; 5052 nzB = a->B->cmap->n; 5053 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5054 ncols = 0; 5055 for (i=0; i<nzB; i++) { /* row < local row index */ 5056 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5057 else break; 5058 } 5059 imark = i; 5060 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5061 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5062 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5063 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5064 } else { 5065 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5066 isrowb = *rowb; iscolb = *colb; 5067 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5068 bseq[0] = *B_seq; 5069 } 5070 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5071 *B_seq = bseq[0]; 5072 ierr = PetscFree(bseq);CHKERRQ(ierr); 5073 if (!rowb){ 5074 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5075 } else { 5076 *rowb = isrowb; 5077 } 5078 if (!colb){ 5079 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5080 } else { 5081 *colb = iscolb; 5082 } 5083 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5084 PetscFunctionReturn(0); 5085 } 5086 5087 #undef __FUNCT__ 5088 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5089 /* 5090 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5091 of the OFF-DIAGONAL portion of local A 5092 5093 Collective on Mat 5094 5095 Input Parameters: 5096 + A,B - the matrices in mpiaij format 5097 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5098 5099 Output Parameter: 5100 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5101 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5102 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5103 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5104 5105 Level: developer 5106 5107 */ 5108 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5109 { 5110 VecScatter_MPI_General *gen_to,*gen_from; 5111 PetscErrorCode ierr; 5112 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5113 Mat_SeqAIJ *b_oth; 5114 VecScatter ctx=a->Mvctx; 5115 MPI_Comm comm=((PetscObject)ctx)->comm; 5116 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5117 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5118 PetscScalar *rvalues,*svalues; 5119 MatScalar *b_otha,*bufa,*bufA; 5120 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5121 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5122 MPI_Status *sstatus,rstatus; 5123 PetscMPIInt jj; 5124 PetscInt *cols,sbs,rbs; 5125 PetscScalar *vals; 5126 5127 PetscFunctionBegin; 5128 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5129 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5130 } 5131 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5133 5134 gen_to = (VecScatter_MPI_General*)ctx->todata; 5135 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5136 rvalues = gen_from->values; /* holds the length of receiving row */ 5137 svalues = gen_to->values; /* holds the length of sending row */ 5138 nrecvs = gen_from->n; 5139 nsends = gen_to->n; 5140 5141 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5142 srow = gen_to->indices; /* local row index to be sent */ 5143 sstarts = gen_to->starts; 5144 sprocs = gen_to->procs; 5145 sstatus = gen_to->sstatus; 5146 sbs = gen_to->bs; 5147 rstarts = gen_from->starts; 5148 rprocs = gen_from->procs; 5149 rbs = gen_from->bs; 5150 5151 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5152 if (scall == MAT_INITIAL_MATRIX){ 5153 /* i-array */ 5154 /*---------*/ 5155 /* post receives */ 5156 for (i=0; i<nrecvs; i++){ 5157 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5158 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5159 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5160 } 5161 5162 /* pack the outgoing message */ 5163 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5164 sstartsj[0] = 0; rstartsj[0] = 0; 5165 len = 0; /* total length of j or a array to be sent */ 5166 k = 0; 5167 for (i=0; i<nsends; i++){ 5168 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5169 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5170 for (j=0; j<nrows; j++) { 5171 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5172 for (l=0; l<sbs; l++){ 5173 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5174 rowlen[j*sbs+l] = ncols; 5175 len += ncols; 5176 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5177 } 5178 k++; 5179 } 5180 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5181 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5182 } 5183 /* recvs and sends of i-array are completed */ 5184 i = nrecvs; 5185 while (i--) { 5186 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5187 } 5188 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5189 5190 /* allocate buffers for sending j and a arrays */ 5191 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5192 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5193 5194 /* create i-array of B_oth */ 5195 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5196 b_othi[0] = 0; 5197 len = 0; /* total length of j or a array to be received */ 5198 k = 0; 5199 for (i=0; i<nrecvs; i++){ 5200 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5201 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5202 for (j=0; j<nrows; j++) { 5203 b_othi[k+1] = b_othi[k] + rowlen[j]; 5204 len += rowlen[j]; k++; 5205 } 5206 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5207 } 5208 5209 /* allocate space for j and a arrrays of B_oth */ 5210 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5211 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5212 5213 /* j-array */ 5214 /*---------*/ 5215 /* post receives of j-array */ 5216 for (i=0; i<nrecvs; i++){ 5217 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5218 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5219 } 5220 5221 /* pack the outgoing message j-array */ 5222 k = 0; 5223 for (i=0; i<nsends; i++){ 5224 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5225 bufJ = bufj+sstartsj[i]; 5226 for (j=0; j<nrows; j++) { 5227 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5228 for (ll=0; ll<sbs; ll++){ 5229 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5230 for (l=0; l<ncols; l++){ 5231 *bufJ++ = cols[l]; 5232 } 5233 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5234 } 5235 } 5236 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5237 } 5238 5239 /* recvs and sends of j-array are completed */ 5240 i = nrecvs; 5241 while (i--) { 5242 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5243 } 5244 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5245 } else if (scall == MAT_REUSE_MATRIX){ 5246 sstartsj = *startsj_s; 5247 rstartsj = *startsj_r; 5248 bufa = *bufa_ptr; 5249 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5250 b_otha = b_oth->a; 5251 } else { 5252 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5253 } 5254 5255 /* a-array */ 5256 /*---------*/ 5257 /* post receives of a-array */ 5258 for (i=0; i<nrecvs; i++){ 5259 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5260 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5261 } 5262 5263 /* pack the outgoing message a-array */ 5264 k = 0; 5265 for (i=0; i<nsends; i++){ 5266 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5267 bufA = bufa+sstartsj[i]; 5268 for (j=0; j<nrows; j++) { 5269 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5270 for (ll=0; ll<sbs; ll++){ 5271 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5272 for (l=0; l<ncols; l++){ 5273 *bufA++ = vals[l]; 5274 } 5275 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5276 } 5277 } 5278 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5279 } 5280 /* recvs and sends of a-array are completed */ 5281 i = nrecvs; 5282 while (i--) { 5283 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5284 } 5285 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5286 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5287 5288 if (scall == MAT_INITIAL_MATRIX){ 5289 /* put together the new matrix */ 5290 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5291 5292 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5293 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5294 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5295 b_oth->free_a = PETSC_TRUE; 5296 b_oth->free_ij = PETSC_TRUE; 5297 b_oth->nonew = 0; 5298 5299 ierr = PetscFree(bufj);CHKERRQ(ierr); 5300 if (!startsj_s || !bufa_ptr){ 5301 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5302 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5303 } else { 5304 *startsj_s = sstartsj; 5305 *startsj_r = rstartsj; 5306 *bufa_ptr = bufa; 5307 } 5308 } 5309 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5310 PetscFunctionReturn(0); 5311 } 5312 5313 #undef __FUNCT__ 5314 #define __FUNCT__ "MatGetCommunicationStructs" 5315 /*@C 5316 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5317 5318 Not Collective 5319 5320 Input Parameters: 5321 . A - The matrix in mpiaij format 5322 5323 Output Parameter: 5324 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5325 . colmap - A map from global column index to local index into lvec 5326 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5327 5328 Level: developer 5329 5330 @*/ 5331 #if defined (PETSC_USE_CTABLE) 5332 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5333 #else 5334 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5335 #endif 5336 { 5337 Mat_MPIAIJ *a; 5338 5339 PetscFunctionBegin; 5340 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5341 PetscValidPointer(lvec, 2); 5342 PetscValidPointer(colmap, 3); 5343 PetscValidPointer(multScatter, 4); 5344 a = (Mat_MPIAIJ *) A->data; 5345 if (lvec) *lvec = a->lvec; 5346 if (colmap) *colmap = a->colmap; 5347 if (multScatter) *multScatter = a->Mvctx; 5348 PetscFunctionReturn(0); 5349 } 5350 5351 EXTERN_C_BEGIN 5352 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5353 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5354 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5355 EXTERN_C_END 5356 5357 #undef __FUNCT__ 5358 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5359 /* 5360 Computes (B'*A')' since computing B*A directly is untenable 5361 5362 n p p 5363 ( ) ( ) ( ) 5364 m ( A ) * n ( B ) = m ( C ) 5365 ( ) ( ) ( ) 5366 5367 */ 5368 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5369 { 5370 PetscErrorCode ierr; 5371 Mat At,Bt,Ct; 5372 5373 PetscFunctionBegin; 5374 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5375 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5376 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5377 ierr = MatDestroy(&At);CHKERRQ(ierr); 5378 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5379 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5380 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5381 PetscFunctionReturn(0); 5382 } 5383 5384 #undef __FUNCT__ 5385 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5386 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5387 { 5388 PetscErrorCode ierr; 5389 PetscInt m=A->rmap->n,n=B->cmap->n; 5390 Mat Cmat; 5391 5392 PetscFunctionBegin; 5393 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5394 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5395 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5396 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5397 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5398 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5399 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5400 *C = Cmat; 5401 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5402 PetscFunctionReturn(0); 5403 } 5404 5405 /* ----------------------------------------------------------------*/ 5406 #undef __FUNCT__ 5407 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5408 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5409 { 5410 PetscErrorCode ierr; 5411 5412 PetscFunctionBegin; 5413 if (scall == MAT_INITIAL_MATRIX){ 5414 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5415 } 5416 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5417 PetscFunctionReturn(0); 5418 } 5419 5420 EXTERN_C_BEGIN 5421 #if defined(PETSC_HAVE_MUMPS) 5422 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5423 #endif 5424 #if defined(PETSC_HAVE_PASTIX) 5425 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5426 #endif 5427 #if defined(PETSC_HAVE_SUPERLU_DIST) 5428 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5429 #endif 5430 #if defined(PETSC_HAVE_SPOOLES) 5431 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5432 #endif 5433 EXTERN_C_END 5434 5435 /*MC 5436 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5437 5438 Options Database Keys: 5439 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5440 5441 Level: beginner 5442 5443 .seealso: MatCreateMPIAIJ() 5444 M*/ 5445 5446 EXTERN_C_BEGIN 5447 #undef __FUNCT__ 5448 #define __FUNCT__ "MatCreate_MPIAIJ" 5449 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5450 { 5451 Mat_MPIAIJ *b; 5452 PetscErrorCode ierr; 5453 PetscMPIInt size; 5454 5455 PetscFunctionBegin; 5456 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5457 5458 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5459 B->data = (void*)b; 5460 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5461 B->rmap->bs = 1; 5462 B->assembled = PETSC_FALSE; 5463 5464 B->insertmode = NOT_SET_VALUES; 5465 b->size = size; 5466 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5467 5468 /* build cache for off array entries formed */ 5469 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5470 b->donotstash = PETSC_FALSE; 5471 b->colmap = 0; 5472 b->garray = 0; 5473 b->roworiented = PETSC_TRUE; 5474 5475 /* stuff used for matrix vector multiply */ 5476 b->lvec = PETSC_NULL; 5477 b->Mvctx = PETSC_NULL; 5478 5479 /* stuff for MatGetRow() */ 5480 b->rowindices = 0; 5481 b->rowvalues = 0; 5482 b->getrowactive = PETSC_FALSE; 5483 5484 #if defined(PETSC_HAVE_SPOOLES) 5485 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5486 "MatGetFactor_mpiaij_spooles", 5487 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5488 #endif 5489 #if defined(PETSC_HAVE_MUMPS) 5490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5491 "MatGetFactor_aij_mumps", 5492 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5493 #endif 5494 #if defined(PETSC_HAVE_PASTIX) 5495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5496 "MatGetFactor_mpiaij_pastix", 5497 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5498 #endif 5499 #if defined(PETSC_HAVE_SUPERLU_DIST) 5500 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5501 "MatGetFactor_mpiaij_superlu_dist", 5502 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5503 #endif 5504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5505 "MatStoreValues_MPIAIJ", 5506 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5508 "MatRetrieveValues_MPIAIJ", 5509 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5511 "MatGetDiagonalBlock_MPIAIJ", 5512 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5514 "MatIsTranspose_MPIAIJ", 5515 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5517 "MatMPIAIJSetPreallocation_MPIAIJ", 5518 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5520 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5521 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5523 "MatDiagonalScaleLocal_MPIAIJ", 5524 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5526 "MatConvert_MPIAIJ_MPIAIJPERM", 5527 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5528 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5529 "MatConvert_MPIAIJ_MPIAIJCRL", 5530 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5532 "MatConvert_MPIAIJ_MPISBAIJ", 5533 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5535 "MatMatMult_MPIDense_MPIAIJ", 5536 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5538 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5539 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5540 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5541 "MatMatMultNumeric_MPIDense_MPIAIJ", 5542 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5543 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5544 PetscFunctionReturn(0); 5545 } 5546 EXTERN_C_END 5547 5548 #undef __FUNCT__ 5549 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5550 /*@ 5551 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5552 and "off-diagonal" part of the matrix in CSR format. 5553 5554 Collective on MPI_Comm 5555 5556 Input Parameters: 5557 + comm - MPI communicator 5558 . m - number of local rows (Cannot be PETSC_DECIDE) 5559 . n - This value should be the same as the local size used in creating the 5560 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5561 calculated if N is given) For square matrices n is almost always m. 5562 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5563 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5564 . i - row indices for "diagonal" portion of matrix 5565 . j - column indices 5566 . a - matrix values 5567 . oi - row indices for "off-diagonal" portion of matrix 5568 . oj - column indices 5569 - oa - matrix values 5570 5571 Output Parameter: 5572 . mat - the matrix 5573 5574 Level: advanced 5575 5576 Notes: 5577 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5578 must free the arrays once the matrix has been destroyed and not before. 5579 5580 The i and j indices are 0 based 5581 5582 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5583 5584 This sets local rows and cannot be used to set off-processor values. 5585 5586 You cannot later use MatSetValues() to change values in this matrix. 5587 5588 .keywords: matrix, aij, compressed row, sparse, parallel 5589 5590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5591 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5592 @*/ 5593 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5594 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5595 { 5596 PetscErrorCode ierr; 5597 Mat_MPIAIJ *maij; 5598 5599 PetscFunctionBegin; 5600 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5601 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5602 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5603 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5604 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5605 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5606 maij = (Mat_MPIAIJ*) (*mat)->data; 5607 maij->donotstash = PETSC_TRUE; 5608 (*mat)->preallocated = PETSC_TRUE; 5609 5610 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5611 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5612 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5613 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5614 5615 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5616 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5617 5618 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5619 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5620 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5621 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5622 5623 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5624 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 PetscFunctionReturn(0); 5626 } 5627 5628 /* 5629 Special version for direct calls from Fortran 5630 */ 5631 #include <private/fortranimpl.h> 5632 5633 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5634 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5635 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5636 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5637 #endif 5638 5639 /* Change these macros so can be used in void function */ 5640 #undef CHKERRQ 5641 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5642 #undef SETERRQ2 5643 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5644 #undef SETERRQ 5645 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5646 5647 EXTERN_C_BEGIN 5648 #undef __FUNCT__ 5649 #define __FUNCT__ "matsetvaluesmpiaij_" 5650 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5651 { 5652 Mat mat = *mmat; 5653 PetscInt m = *mm, n = *mn; 5654 InsertMode addv = *maddv; 5655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5656 PetscScalar value; 5657 PetscErrorCode ierr; 5658 5659 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5660 if (mat->insertmode == NOT_SET_VALUES) { 5661 mat->insertmode = addv; 5662 } 5663 #if defined(PETSC_USE_DEBUG) 5664 else if (mat->insertmode != addv) { 5665 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5666 } 5667 #endif 5668 { 5669 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5670 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5671 PetscBool roworiented = aij->roworiented; 5672 5673 /* Some Variables required in the macro */ 5674 Mat A = aij->A; 5675 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5676 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5677 MatScalar *aa = a->a; 5678 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5679 Mat B = aij->B; 5680 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5681 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5682 MatScalar *ba = b->a; 5683 5684 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5685 PetscInt nonew = a->nonew; 5686 MatScalar *ap1,*ap2; 5687 5688 PetscFunctionBegin; 5689 for (i=0; i<m; i++) { 5690 if (im[i] < 0) continue; 5691 #if defined(PETSC_USE_DEBUG) 5692 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5693 #endif 5694 if (im[i] >= rstart && im[i] < rend) { 5695 row = im[i] - rstart; 5696 lastcol1 = -1; 5697 rp1 = aj + ai[row]; 5698 ap1 = aa + ai[row]; 5699 rmax1 = aimax[row]; 5700 nrow1 = ailen[row]; 5701 low1 = 0; 5702 high1 = nrow1; 5703 lastcol2 = -1; 5704 rp2 = bj + bi[row]; 5705 ap2 = ba + bi[row]; 5706 rmax2 = bimax[row]; 5707 nrow2 = bilen[row]; 5708 low2 = 0; 5709 high2 = nrow2; 5710 5711 for (j=0; j<n; j++) { 5712 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5713 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5714 if (in[j] >= cstart && in[j] < cend){ 5715 col = in[j] - cstart; 5716 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5717 } else if (in[j] < 0) continue; 5718 #if defined(PETSC_USE_DEBUG) 5719 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5720 #endif 5721 else { 5722 if (mat->was_assembled) { 5723 if (!aij->colmap) { 5724 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5725 } 5726 #if defined (PETSC_USE_CTABLE) 5727 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5728 col--; 5729 #else 5730 col = aij->colmap[in[j]] - 1; 5731 #endif 5732 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5733 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5734 col = in[j]; 5735 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5736 B = aij->B; 5737 b = (Mat_SeqAIJ*)B->data; 5738 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5739 rp2 = bj + bi[row]; 5740 ap2 = ba + bi[row]; 5741 rmax2 = bimax[row]; 5742 nrow2 = bilen[row]; 5743 low2 = 0; 5744 high2 = nrow2; 5745 bm = aij->B->rmap->n; 5746 ba = b->a; 5747 } 5748 } else col = in[j]; 5749 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5750 } 5751 } 5752 } else { 5753 if (!aij->donotstash) { 5754 if (roworiented) { 5755 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5756 } else { 5757 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5758 } 5759 } 5760 } 5761 }} 5762 PetscFunctionReturnVoid(); 5763 } 5764 EXTERN_C_END 5765 5766