1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1521 1522 /* copy over the A part */ 1523 Aloc = (Mat_SeqAIJ*)aij->A->data; 1524 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1525 row = mat->rmap->rstart; 1526 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1527 for (i=0; i<m; i++) { 1528 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1529 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1530 } 1531 aj = Aloc->j; 1532 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1533 1534 /* copy over the B part */ 1535 Aloc = (Mat_SeqAIJ*)aij->B->data; 1536 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 row = mat->rmap->rstart; 1538 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1539 ct = cols; 1540 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1541 for (i=0; i<m; i++) { 1542 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1543 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1544 } 1545 ierr = PetscFree(ct);CHKERRQ(ierr); 1546 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 /* 1549 Everyone has to call to draw the matrix since the graphics waits are 1550 synchronized across all processors that share the PetscDraw object 1551 */ 1552 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1553 if (!rank) { 1554 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1555 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1556 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1557 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1558 } 1559 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1560 ierr = MatDestroy(&A);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatView_MPIAIJ" 1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1568 { 1569 PetscErrorCode ierr; 1570 PetscBool iascii,isdraw,issocket,isbinary; 1571 1572 PetscFunctionBegin; 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1577 if (iascii || isdraw || isbinary || issocket) { 1578 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSOR_MPIAIJ" 1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1588 { 1589 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1590 PetscErrorCode ierr; 1591 Vec bb1 = 0; 1592 PetscBool hasop; 1593 1594 PetscFunctionBegin; 1595 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1596 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1597 } 1598 1599 if (flag == SOR_APPLY_UPPER) { 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1605 if (flag & SOR_ZERO_INITIAL_GUESS) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 its--; 1608 } 1609 1610 while (its--) { 1611 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 1614 /* update rhs: bb1 = bb - B*x */ 1615 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1616 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1620 } 1621 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_EISENSTAT) { 1654 Vec xx1; 1655 1656 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1658 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 if (!mat->diag) { 1662 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1663 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1664 } 1665 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1666 if (hasop) { 1667 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1668 } else { 1669 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1670 } 1671 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1672 1673 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1674 1675 /* local sweep */ 1676 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1677 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1678 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1679 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1680 1681 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatPermute_MPIAIJ" 1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1688 { 1689 MPI_Comm comm,pcomm; 1690 PetscInt first,local_size,nrows; 1691 const PetscInt *rows; 1692 PetscMPIInt size; 1693 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1698 /* make a collective version of 'rowp' */ 1699 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1700 if (pcomm==comm) { 1701 crowp = rowp; 1702 } else { 1703 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1704 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1705 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1706 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1707 } 1708 /* collect the global row permutation and invert it */ 1709 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1710 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1711 if (pcomm!=comm) { 1712 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1713 } 1714 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1715 /* get the local target indices */ 1716 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1717 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1718 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1719 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1720 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1721 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1722 /* the column permutation is so much easier; 1723 make a local version of 'colp' and invert it */ 1724 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1725 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1726 if (size==1) { 1727 lcolp = colp; 1728 } else { 1729 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1730 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1731 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1732 } 1733 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1734 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1735 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1736 if (size>1) { 1737 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1738 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1739 } 1740 /* now we just get the submatrix */ 1741 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1742 /* clean up */ 1743 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1744 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1750 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1751 { 1752 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1753 Mat A = mat->A,B = mat->B; 1754 PetscErrorCode ierr; 1755 PetscReal isend[5],irecv[5]; 1756 1757 PetscFunctionBegin; 1758 info->block_size = 1.0; 1759 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1760 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1761 isend[3] = info->memory; isend[4] = info->mallocs; 1762 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1773 info->nz_used = irecv[0]; 1774 info->nz_allocated = irecv[1]; 1775 info->nz_unneeded = irecv[2]; 1776 info->memory = irecv[3]; 1777 info->mallocs = irecv[4]; 1778 } else if (flag == MAT_GLOBAL_SUM) { 1779 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatSetOption_MPIAIJ" 1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1796 { 1797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 switch (op) { 1802 case MAT_NEW_NONZERO_LOCATIONS: 1803 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1804 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1805 case MAT_KEEP_NONZERO_PATTERN: 1806 case MAT_NEW_NONZERO_LOCATION_ERR: 1807 case MAT_USE_INODES: 1808 case MAT_IGNORE_ZERO_ENTRIES: 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 a->roworiented = flg; 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_HERMITIAN: 1840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1841 break; 1842 case MAT_SYMMETRY_ETERNAL: 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 default: 1846 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1847 } 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatGetRow_MPIAIJ" 1853 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1854 { 1855 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1856 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1857 PetscErrorCode ierr; 1858 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1859 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1860 PetscInt *cmap,*idx_p; 1861 1862 PetscFunctionBegin; 1863 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1864 mat->getrowactive = PETSC_TRUE; 1865 1866 if (!mat->rowvalues && (idx || v)) { 1867 /* 1868 allocate enough space to hold information from the longest row. 1869 */ 1870 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1871 PetscInt max = 1,tmp; 1872 for (i=0; i<matin->rmap->n; i++) { 1873 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1874 if (max < tmp) { max = tmp; } 1875 } 1876 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1877 } 1878 1879 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1880 lrow = row - rstart; 1881 1882 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1883 if (!v) {pvA = 0; pvB = 0;} 1884 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1885 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1886 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1887 nztot = nzA + nzB; 1888 1889 cmap = mat->garray; 1890 if (v || idx) { 1891 if (nztot) { 1892 /* Sort by increasing column numbers, assuming A and B already sorted */ 1893 PetscInt imark = -1; 1894 if (v) { 1895 *v = v_p = mat->rowvalues; 1896 for (i=0; i<nzB; i++) { 1897 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1898 else break; 1899 } 1900 imark = i; 1901 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1902 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1903 } 1904 if (idx) { 1905 *idx = idx_p = mat->rowindices; 1906 if (imark > -1) { 1907 for (i=0; i<imark; i++) { 1908 idx_p[i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 for (i=0; i<nzB; i++) { 1912 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1913 else break; 1914 } 1915 imark = i; 1916 } 1917 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1918 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1919 } 1920 } else { 1921 if (idx) *idx = 0; 1922 if (v) *v = 0; 1923 } 1924 } 1925 *nz = nztot; 1926 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1927 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1933 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1934 { 1935 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1936 1937 PetscFunctionBegin; 1938 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1939 aij->getrowactive = PETSC_FALSE; 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatNorm_MPIAIJ" 1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1946 { 1947 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1948 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1949 PetscErrorCode ierr; 1950 PetscInt i,j,cstart = mat->cmap->rstart; 1951 PetscReal sum = 0.0; 1952 MatScalar *v; 1953 1954 PetscFunctionBegin; 1955 if (aij->size == 1) { 1956 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1957 } else { 1958 if (type == NORM_FROBENIUS) { 1959 v = amat->a; 1960 for (i=0; i<amat->nz; i++) { 1961 #if defined(PETSC_USE_COMPLEX) 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 #else 1964 sum += (*v)*(*v); v++; 1965 #endif 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 #if defined(PETSC_USE_COMPLEX) 1970 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1971 #else 1972 sum += (*v)*(*v); v++; 1973 #endif 1974 } 1975 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1976 *norm = PetscSqrtReal(*norm); 1977 } else if (type == NORM_1) { /* max column norm */ 1978 PetscReal *tmp,*tmp2; 1979 PetscInt *jj,*garray = aij->garray; 1980 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1981 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1982 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1983 *norm = 0.0; 1984 v = amat->a; jj = amat->j; 1985 for (j=0; j<amat->nz; j++) { 1986 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1987 } 1988 v = bmat->a; jj = bmat->j; 1989 for (j=0; j<bmat->nz; j++) { 1990 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1991 } 1992 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1993 for (j=0; j<mat->cmap->N; j++) { 1994 if (tmp2[j] > *norm) *norm = tmp2[j]; 1995 } 1996 ierr = PetscFree(tmp);CHKERRQ(ierr); 1997 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1998 } else if (type == NORM_INFINITY) { /* max row norm */ 1999 PetscReal ntemp = 0.0; 2000 for (j=0; j<aij->A->rmap->n; j++) { 2001 v = amat->a + amat->i[j]; 2002 sum = 0.0; 2003 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 v = bmat->a + bmat->i[j]; 2007 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2008 sum += PetscAbsScalar(*v); v++; 2009 } 2010 if (sum > ntemp) ntemp = sum; 2011 } 2012 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2013 } else { 2014 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2015 } 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatTranspose_MPIAIJ" 2022 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2023 { 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2026 PetscErrorCode ierr; 2027 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2028 PetscInt cstart=A->cmap->rstart,ncol; 2029 Mat B; 2030 MatScalar *array; 2031 2032 PetscFunctionBegin; 2033 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2034 2035 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2036 ai = Aloc->i; aj = Aloc->j; 2037 bi = Bloc->i; bj = Bloc->j; 2038 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2039 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2040 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2041 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<ai[ma]; i++){ 2043 d_nnz[aj[i]] ++; 2044 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2045 } 2046 2047 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2048 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2049 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2050 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2051 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2052 } else { 2053 B = *matout; 2054 } 2055 2056 /* copy over the A part */ 2057 array = Aloc->a; 2058 row = A->rmap->rstart; 2059 for (i=0; i<ma; i++) { 2060 ncol = ai[i+1]-ai[i]; 2061 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2062 row++; array += ncol; aj += ncol; 2063 } 2064 aj = Aloc->j; 2065 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2066 2067 /* copy over the B part */ 2068 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2069 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2070 array = Bloc->a; 2071 row = A->rmap->rstart; 2072 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2073 cols_tmp = cols; 2074 for (i=0; i<mb; i++) { 2075 ncol = bi[i+1]-bi[i]; 2076 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2077 row++; array += ncol; cols_tmp += ncol; 2078 } 2079 ierr = PetscFree(cols);CHKERRQ(ierr); 2080 2081 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2084 *matout = B; 2085 } else { 2086 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2093 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2094 { 2095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2096 Mat a = aij->A,b = aij->B; 2097 PetscErrorCode ierr; 2098 PetscInt s1,s2,s3; 2099 2100 PetscFunctionBegin; 2101 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2102 if (rr) { 2103 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2104 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2105 /* Overlap communication with computation. */ 2106 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2107 } 2108 if (ll) { 2109 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2110 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2111 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2112 } 2113 /* scale the diagonal block */ 2114 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2115 2116 if (rr) { 2117 /* Do a scatter end and then right scale the off-diagonal block */ 2118 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2120 } 2121 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #undef __FUNCT__ 2126 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2127 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2134 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2135 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2136 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatEqual_MPIAIJ" 2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2154 { 2155 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2156 Mat a,b,c,d; 2157 PetscBool flg; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 a = matA->A; b = matA->B; 2162 c = matB->A; d = matB->B; 2163 2164 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2165 if (flg) { 2166 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2167 } 2168 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatCopy_MPIAIJ" 2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2175 { 2176 PetscErrorCode ierr; 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2179 2180 PetscFunctionBegin; 2181 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2182 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2183 /* because of the column compression in the off-processor part of the matrix a->B, 2184 the number of columns in a->B and b->B may be different, hence we cannot call 2185 the MatCopy() directly on the two parts. If need be, we can provide a more 2186 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2187 then copying the submatrices */ 2188 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2189 } else { 2190 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2191 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2198 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2199 { 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatAXPY_MPIAIJ" 2209 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2210 { 2211 PetscErrorCode ierr; 2212 PetscInt i; 2213 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2214 PetscBLASInt bnz,one=1; 2215 Mat_SeqAIJ *x,*y; 2216 2217 PetscFunctionBegin; 2218 if (str == SAME_NONZERO_PATTERN) { 2219 PetscScalar alpha = a; 2220 x = (Mat_SeqAIJ *)xx->A->data; 2221 y = (Mat_SeqAIJ *)yy->A->data; 2222 bnz = PetscBLASIntCast(x->nz); 2223 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2224 x = (Mat_SeqAIJ *)xx->B->data; 2225 y = (Mat_SeqAIJ *)yy->B->data; 2226 bnz = PetscBLASIntCast(x->nz); 2227 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2228 } else if (str == SUBSET_NONZERO_PATTERN) { 2229 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2230 2231 x = (Mat_SeqAIJ *)xx->B->data; 2232 y = (Mat_SeqAIJ *)yy->B->data; 2233 if (y->xtoy && y->XtoY != xx->B) { 2234 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2235 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2236 } 2237 if (!y->xtoy) { /* get xtoy */ 2238 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2239 y->XtoY = xx->B; 2240 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2241 } 2242 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2243 } else { 2244 Mat B; 2245 PetscInt *nnz_d,*nnz_o; 2246 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2247 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2248 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2249 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2250 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,B); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatConjugate_MPIAIJ" 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatRealPart_MPIAIJ" 2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2285 { 2286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2291 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2298 { 2299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2304 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #ifdef PETSC_HAVE_PBGL 2309 2310 #include <boost/parallel/mpi/bsp_process_group.hpp> 2311 #include <boost/graph/distributed/ilu_default_graph.hpp> 2312 #include <boost/graph/distributed/ilu_0_block.hpp> 2313 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2314 #include <boost/graph/distributed/petsc/interface.hpp> 2315 #include <boost/multi_array.hpp> 2316 #include <boost/parallel/distributed_property_map->hpp> 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2320 /* 2321 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2322 */ 2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2324 { 2325 namespace petsc = boost::distributed::petsc; 2326 2327 namespace graph_dist = boost::graph::distributed; 2328 using boost::graph::distributed::ilu_default::process_group_type; 2329 using boost::graph::ilu_permuted; 2330 2331 PetscBool row_identity, col_identity; 2332 PetscContainer c; 2333 PetscInt m, n, M, N; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2338 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2339 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2340 if (!row_identity || !col_identity) { 2341 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2342 } 2343 2344 process_group_type pg; 2345 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2346 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2347 lgraph_type& level_graph = *lgraph_p; 2348 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2349 2350 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2351 ilu_permuted(level_graph); 2352 2353 /* put together the new matrix */ 2354 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2355 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2356 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2357 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2358 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2363 ierr = PetscContainerSetPointer(c, lgraph_p); 2364 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2365 ierr = PetscContainerDestroy(&c); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2372 { 2373 PetscFunctionBegin; 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSolve_MPIAIJ" 2379 /* 2380 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2381 */ 2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2383 { 2384 namespace graph_dist = boost::graph::distributed; 2385 2386 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2387 lgraph_type* lgraph_p; 2388 PetscContainer c; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2393 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2394 ierr = VecCopy(b, x);CHKERRQ(ierr); 2395 2396 PetscScalar* array_x; 2397 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2398 PetscInt sx; 2399 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2400 2401 PetscScalar* array_b; 2402 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2403 PetscInt sb; 2404 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2405 2406 lgraph_type& level_graph = *lgraph_p; 2407 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2408 2409 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2410 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2411 ref_x(array_x, boost::extents[num_vertices(graph)]); 2412 2413 typedef boost::iterator_property_map<array_ref_type::iterator, 2414 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2415 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2416 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2417 2418 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2419 2420 PetscFunctionReturn(0); 2421 } 2422 #endif 2423 2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2425 PetscInt nzlocal,nsends,nrecvs; 2426 PetscMPIInt *send_rank,*recv_rank; 2427 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2428 PetscScalar *sbuf_a,**rbuf_a; 2429 PetscErrorCode (*Destroy)(Mat); 2430 } Mat_Redundant; 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2444 for (i=0; i<redund->nrecvs; i++){ 2445 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2446 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2449 ierr = PetscFree(redund);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatDestroy_MatRedundant" 2455 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2456 { 2457 PetscErrorCode ierr; 2458 PetscContainer container; 2459 Mat_Redundant *redund=PETSC_NULL; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2463 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2464 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2465 A->ops->destroy = redund->Destroy; 2466 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2467 if (A->ops->destroy) { 2468 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2476 { 2477 PetscMPIInt rank,size; 2478 MPI_Comm comm=((PetscObject)mat)->comm; 2479 PetscErrorCode ierr; 2480 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2481 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2482 PetscInt *rowrange=mat->rmap->range; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2484 Mat A=aij->A,B=aij->B,C=*matredundant; 2485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2486 PetscScalar *sbuf_a; 2487 PetscInt nzlocal=a->nz+b->nz; 2488 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2489 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2490 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2491 MatScalar *aworkA,*aworkB; 2492 PetscScalar *vals; 2493 PetscMPIInt tag1,tag2,tag3,imdex; 2494 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2495 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2496 MPI_Status recv_status,*send_status; 2497 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2498 PetscInt **rbuf_j=PETSC_NULL; 2499 PetscScalar **rbuf_a=PETSC_NULL; 2500 Mat_Redundant *redund=PETSC_NULL; 2501 PetscContainer container; 2502 2503 PetscFunctionBegin; 2504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 2507 if (reuse == MAT_REUSE_MATRIX) { 2508 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2509 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2510 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2511 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2512 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2513 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2514 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2515 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2516 2517 nsends = redund->nsends; 2518 nrecvs = redund->nrecvs; 2519 send_rank = redund->send_rank; 2520 recv_rank = redund->recv_rank; 2521 sbuf_nz = redund->sbuf_nz; 2522 rbuf_nz = redund->rbuf_nz; 2523 sbuf_j = redund->sbuf_j; 2524 sbuf_a = redund->sbuf_a; 2525 rbuf_j = redund->rbuf_j; 2526 rbuf_a = redund->rbuf_a; 2527 } 2528 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 PetscMPIInt subrank,subsize; 2531 PetscInt nleftover,np_subcomm; 2532 /* get the destination processors' id send_rank, nsends and nrecvs */ 2533 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2535 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2536 np_subcomm = size/nsubcomm; 2537 nleftover = size - nsubcomm*np_subcomm; 2538 nsends = 0; nrecvs = 0; 2539 for (i=0; i<size; i++){ /* i=rank*/ 2540 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2541 send_rank[nsends] = i; nsends++; 2542 recv_rank[nrecvs++] = i; 2543 } 2544 } 2545 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2546 i = size-nleftover-1; 2547 j = 0; 2548 while (j < nsubcomm - nleftover){ 2549 send_rank[nsends++] = i; 2550 i--; j++; 2551 } 2552 } 2553 2554 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2555 for (i=0; i<nleftover; i++){ 2556 recv_rank[nrecvs++] = size-nleftover+i; 2557 } 2558 } 2559 2560 /* allocate sbuf_j, sbuf_a */ 2561 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2562 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2563 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2564 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2565 2566 /* copy mat's local entries into the buffers */ 2567 if (reuse == MAT_INITIAL_MATRIX){ 2568 rownz_max = 0; 2569 rptr = sbuf_j; 2570 cols = sbuf_j + rend-rstart + 1; 2571 vals = sbuf_a; 2572 rptr[0] = 0; 2573 for (i=0; i<rend-rstart; i++){ 2574 row = i + rstart; 2575 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2576 ncols = nzA + nzB; 2577 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2578 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2579 /* load the column indices for this row into cols */ 2580 lwrite = 0; 2581 for (l=0; l<nzB; l++) { 2582 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2583 vals[lwrite] = aworkB[l]; 2584 cols[lwrite++] = ctmp; 2585 } 2586 } 2587 for (l=0; l<nzA; l++){ 2588 vals[lwrite] = aworkA[l]; 2589 cols[lwrite++] = cstart + cworkA[l]; 2590 } 2591 for (l=0; l<nzB; l++) { 2592 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2593 vals[lwrite] = aworkB[l]; 2594 cols[lwrite++] = ctmp; 2595 } 2596 } 2597 vals += ncols; 2598 cols += ncols; 2599 rptr[i+1] = rptr[i] + ncols; 2600 if (rownz_max < ncols) rownz_max = ncols; 2601 } 2602 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2603 } else { /* only copy matrix values into sbuf_a */ 2604 rptr = sbuf_j; 2605 vals = sbuf_a; 2606 rptr[0] = 0; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2610 ncols = nzA + nzB; 2611 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2612 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2616 } 2617 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2620 } 2621 vals += ncols; 2622 rptr[i+1] = rptr[i] + ncols; 2623 } 2624 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2625 2626 /* send nzlocal to others, and recv other's nzlocal */ 2627 /*--------------------------------------------------*/ 2628 if (reuse == MAT_INITIAL_MATRIX){ 2629 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2630 s_waits2 = s_waits3 + nsends; 2631 s_waits1 = s_waits2 + nsends; 2632 r_waits1 = s_waits1 + nsends; 2633 r_waits2 = r_waits1 + nrecvs; 2634 r_waits3 = r_waits2 + nrecvs; 2635 } else { 2636 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 r_waits3 = s_waits3 + nsends; 2638 } 2639 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 /* get new tags to keep the communication clean */ 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2645 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2646 2647 /* post receives of other's nzlocal */ 2648 for (i=0; i<nrecvs; i++){ 2649 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* send nzlocal to others */ 2652 for (i=0; i<nsends; i++){ 2653 sbuf_nz[i] = nzlocal; 2654 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2657 count = nrecvs; 2658 while (count) { 2659 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2660 recv_rank[imdex] = recv_status.MPI_SOURCE; 2661 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2662 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2663 2664 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2665 rbuf_nz[imdex] += i + 2; 2666 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2667 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2668 count--; 2669 } 2670 /* wait on sends of nzlocal */ 2671 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2672 /* send mat->i,j to others, and recv from other's */ 2673 /*------------------------------------------------*/ 2674 for (i=0; i<nsends; i++){ 2675 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2676 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of mat->i,j */ 2679 /*------------------------------*/ 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 /* wait on sends of mat->i,j */ 2687 /*---------------------------*/ 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2690 } 2691 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2692 2693 /* post receives, send and receive mat->a */ 2694 /*----------------------------------------*/ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2697 } 2698 for (i=0; i<nsends; i++){ 2699 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2700 } 2701 count = nrecvs; 2702 while (count) { 2703 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2704 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2705 count--; 2706 } 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2712 2713 /* create redundant matrix */ 2714 /*-------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX){ 2716 /* compute rownz_max for preallocation */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2719 rptr = rbuf_j[imdex]; 2720 for (i=0; i<j; i++){ 2721 ncols = rptr[i+1] - rptr[i]; 2722 if (rownz_max < ncols) rownz_max = ncols; 2723 } 2724 } 2725 2726 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2727 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2728 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2729 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2730 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2731 } else { 2732 C = *matredundant; 2733 } 2734 2735 /* insert local matrix entries */ 2736 rptr = sbuf_j; 2737 cols = sbuf_j + rend-rstart + 1; 2738 vals = sbuf_a; 2739 for (i=0; i<rend-rstart; i++){ 2740 row = i + rstart; 2741 ncols = rptr[i+1] - rptr[i]; 2742 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2743 vals += ncols; 2744 cols += ncols; 2745 } 2746 /* insert received matrix entries */ 2747 for (imdex=0; imdex<nrecvs; imdex++){ 2748 rstart = rowrange[recv_rank[imdex]]; 2749 rend = rowrange[recv_rank[imdex]+1]; 2750 rptr = rbuf_j[imdex]; 2751 cols = rbuf_j[imdex] + rend-rstart + 1; 2752 vals = rbuf_a[imdex]; 2753 for (i=0; i<rend-rstart; i++){ 2754 row = i + rstart; 2755 ncols = rptr[i+1] - rptr[i]; 2756 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2757 vals += ncols; 2758 cols += ncols; 2759 } 2760 } 2761 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2764 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2765 if (reuse == MAT_INITIAL_MATRIX) { 2766 PetscContainer container; 2767 *matredundant = C; 2768 /* create a supporting struct and attach it to C for reuse */ 2769 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2770 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2771 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2772 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2773 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2774 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2775 2776 redund->nzlocal = nzlocal; 2777 redund->nsends = nsends; 2778 redund->nrecvs = nrecvs; 2779 redund->send_rank = send_rank; 2780 redund->recv_rank = recv_rank; 2781 redund->sbuf_nz = sbuf_nz; 2782 redund->rbuf_nz = rbuf_nz; 2783 redund->sbuf_j = sbuf_j; 2784 redund->sbuf_a = sbuf_a; 2785 redund->rbuf_j = rbuf_j; 2786 redund->rbuf_a = rbuf_a; 2787 2788 redund->Destroy = C->ops->destroy; 2789 C->ops->destroy = MatDestroy_MatRedundant; 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2796 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2797 { 2798 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2799 PetscErrorCode ierr; 2800 PetscInt i,*idxb = 0; 2801 PetscScalar *va,*vb; 2802 Vec vtmp; 2803 2804 PetscFunctionBegin; 2805 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2806 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2807 if (idx) { 2808 for (i=0; i<A->rmap->n; i++) { 2809 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2810 } 2811 } 2812 2813 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2814 if (idx) { 2815 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2816 } 2817 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2818 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2819 2820 for (i=0; i<A->rmap->n; i++){ 2821 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2822 va[i] = vb[i]; 2823 if (idx) idx[i] = a->garray[idxb[i]]; 2824 } 2825 } 2826 2827 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2828 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2829 ierr = PetscFree(idxb);CHKERRQ(ierr); 2830 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2836 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2837 { 2838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2839 PetscErrorCode ierr; 2840 PetscInt i,*idxb = 0; 2841 PetscScalar *va,*vb; 2842 Vec vtmp; 2843 2844 PetscFunctionBegin; 2845 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2846 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2847 if (idx) { 2848 for (i=0; i<A->cmap->n; i++) { 2849 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2850 } 2851 } 2852 2853 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2854 if (idx) { 2855 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2856 } 2857 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2858 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2859 2860 for (i=0; i<A->rmap->n; i++){ 2861 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2862 va[i] = vb[i]; 2863 if (idx) idx[i] = a->garray[idxb[i]]; 2864 } 2865 } 2866 2867 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2868 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2869 ierr = PetscFree(idxb);CHKERRQ(ierr); 2870 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2876 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2877 { 2878 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2879 PetscInt n = A->rmap->n; 2880 PetscInt cstart = A->cmap->rstart; 2881 PetscInt *cmap = mat->garray; 2882 PetscInt *diagIdx, *offdiagIdx; 2883 Vec diagV, offdiagV; 2884 PetscScalar *a, *diagA, *offdiagA; 2885 PetscInt r; 2886 PetscErrorCode ierr; 2887 2888 PetscFunctionBegin; 2889 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2890 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2891 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2892 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2893 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2894 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2895 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2896 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2897 for(r = 0; r < n; ++r) { 2898 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2899 a[r] = diagA[r]; 2900 idx[r] = cstart + diagIdx[r]; 2901 } else { 2902 a[r] = offdiagA[r]; 2903 idx[r] = cmap[offdiagIdx[r]]; 2904 } 2905 } 2906 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2907 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2909 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2910 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2911 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 #undef __FUNCT__ 2916 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2917 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2918 { 2919 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2920 PetscInt n = A->rmap->n; 2921 PetscInt cstart = A->cmap->rstart; 2922 PetscInt *cmap = mat->garray; 2923 PetscInt *diagIdx, *offdiagIdx; 2924 Vec diagV, offdiagV; 2925 PetscScalar *a, *diagA, *offdiagA; 2926 PetscInt r; 2927 PetscErrorCode ierr; 2928 2929 PetscFunctionBegin; 2930 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2933 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2934 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2935 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2936 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2937 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2938 for(r = 0; r < n; ++r) { 2939 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2940 a[r] = diagA[r]; 2941 idx[r] = cstart + diagIdx[r]; 2942 } else { 2943 a[r] = offdiagA[r]; 2944 idx[r] = cmap[offdiagIdx[r]]; 2945 } 2946 } 2947 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2948 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2950 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2951 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2952 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2958 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2959 { 2960 PetscErrorCode ierr; 2961 Mat *dummy; 2962 2963 PetscFunctionBegin; 2964 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2965 *newmat = *dummy; 2966 ierr = PetscFree(dummy);CHKERRQ(ierr); 2967 PetscFunctionReturn(0); 2968 } 2969 2970 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2974 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2975 { 2976 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2977 PetscErrorCode ierr; 2978 2979 PetscFunctionBegin; 2980 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 2985 /* -------------------------------------------------------------------*/ 2986 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2987 MatGetRow_MPIAIJ, 2988 MatRestoreRow_MPIAIJ, 2989 MatMult_MPIAIJ, 2990 /* 4*/ MatMultAdd_MPIAIJ, 2991 MatMultTranspose_MPIAIJ, 2992 MatMultTransposeAdd_MPIAIJ, 2993 #ifdef PETSC_HAVE_PBGL 2994 MatSolve_MPIAIJ, 2995 #else 2996 0, 2997 #endif 2998 0, 2999 0, 3000 /*10*/ 0, 3001 0, 3002 0, 3003 MatSOR_MPIAIJ, 3004 MatTranspose_MPIAIJ, 3005 /*15*/ MatGetInfo_MPIAIJ, 3006 MatEqual_MPIAIJ, 3007 MatGetDiagonal_MPIAIJ, 3008 MatDiagonalScale_MPIAIJ, 3009 MatNorm_MPIAIJ, 3010 /*20*/ MatAssemblyBegin_MPIAIJ, 3011 MatAssemblyEnd_MPIAIJ, 3012 MatSetOption_MPIAIJ, 3013 MatZeroEntries_MPIAIJ, 3014 /*24*/ MatZeroRows_MPIAIJ, 3015 0, 3016 #ifdef PETSC_HAVE_PBGL 3017 0, 3018 #else 3019 0, 3020 #endif 3021 0, 3022 0, 3023 /*29*/ MatSetUpPreallocation_MPIAIJ, 3024 #ifdef PETSC_HAVE_PBGL 3025 0, 3026 #else 3027 0, 3028 #endif 3029 0, 3030 0, 3031 0, 3032 /*34*/ MatDuplicate_MPIAIJ, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*39*/ MatAXPY_MPIAIJ, 3038 MatGetSubMatrices_MPIAIJ, 3039 MatIncreaseOverlap_MPIAIJ, 3040 MatGetValues_MPIAIJ, 3041 MatCopy_MPIAIJ, 3042 /*44*/ MatGetRowMax_MPIAIJ, 3043 MatScale_MPIAIJ, 3044 0, 3045 0, 3046 MatZeroRowsColumns_MPIAIJ, 3047 /*49*/ MatSetBlockSize_MPIAIJ, 3048 0, 3049 0, 3050 0, 3051 0, 3052 /*54*/ MatFDColoringCreate_MPIAIJ, 3053 0, 3054 MatSetUnfactored_MPIAIJ, 3055 MatPermute_MPIAIJ, 3056 0, 3057 /*59*/ MatGetSubMatrix_MPIAIJ, 3058 MatDestroy_MPIAIJ, 3059 MatView_MPIAIJ, 3060 0, 3061 0, 3062 /*64*/ 0, 3063 0, 3064 0, 3065 0, 3066 0, 3067 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3068 MatGetRowMinAbs_MPIAIJ, 3069 0, 3070 MatSetColoring_MPIAIJ, 3071 #if defined(PETSC_HAVE_ADIC) 3072 MatSetValuesAdic_MPIAIJ, 3073 #else 3074 0, 3075 #endif 3076 MatSetValuesAdifor_MPIAIJ, 3077 /*75*/ MatFDColoringApply_AIJ, 3078 0, 3079 0, 3080 0, 3081 0, 3082 /*80*/ 0, 3083 0, 3084 0, 3085 /*83*/ MatLoad_MPIAIJ, 3086 0, 3087 0, 3088 0, 3089 0, 3090 0, 3091 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3092 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3093 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3094 MatPtAP_Basic, 3095 MatPtAPSymbolic_MPIAIJ, 3096 /*94*/ MatPtAPNumeric_MPIAIJ, 3097 0, 3098 0, 3099 0, 3100 0, 3101 /*99*/ 0, 3102 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3103 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3104 MatConjugate_MPIAIJ, 3105 0, 3106 /*104*/MatSetValuesRow_MPIAIJ, 3107 MatRealPart_MPIAIJ, 3108 MatImaginaryPart_MPIAIJ, 3109 0, 3110 0, 3111 /*109*/0, 3112 MatGetRedundantMatrix_MPIAIJ, 3113 MatGetRowMin_MPIAIJ, 3114 0, 3115 0, 3116 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3117 0, 3118 0, 3119 0, 3120 0, 3121 /*119*/0, 3122 0, 3123 0, 3124 0, 3125 MatGetMultiProcBlock_MPIAIJ, 3126 /*124*/MatFindNonZeroRows_MPIAIJ, 3127 MatGetColumnNorms_MPIAIJ, 3128 MatInvertBlockDiagonal_MPIAIJ, 3129 0, 3130 MatGetSubMatricesParallel_MPIAIJ, 3131 /*129*/0 3132 }; 3133 3134 /* ----------------------------------------------------------------------------------------*/ 3135 3136 EXTERN_C_BEGIN 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3139 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3140 { 3141 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3142 PetscErrorCode ierr; 3143 3144 PetscFunctionBegin; 3145 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3146 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3147 PetscFunctionReturn(0); 3148 } 3149 EXTERN_C_END 3150 3151 EXTERN_C_BEGIN 3152 #undef __FUNCT__ 3153 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3154 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3155 { 3156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3157 PetscErrorCode ierr; 3158 3159 PetscFunctionBegin; 3160 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3161 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 EXTERN_C_END 3165 3166 EXTERN_C_BEGIN 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3169 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3170 { 3171 Mat_MPIAIJ *b; 3172 PetscErrorCode ierr; 3173 PetscInt i; 3174 3175 PetscFunctionBegin; 3176 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3177 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3178 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3179 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3180 3181 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3182 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3183 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3184 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3185 if (d_nnz) { 3186 for (i=0; i<B->rmap->n; i++) { 3187 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3188 } 3189 } 3190 if (o_nnz) { 3191 for (i=0; i<B->rmap->n; i++) { 3192 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3193 } 3194 } 3195 b = (Mat_MPIAIJ*)B->data; 3196 3197 if (!B->preallocated) { 3198 /* Explicitly create 2 MATSEQAIJ matrices. */ 3199 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3200 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3201 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3202 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3203 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3204 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3205 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3206 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3207 } 3208 3209 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3210 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3211 B->preallocated = PETSC_TRUE; 3212 PetscFunctionReturn(0); 3213 } 3214 EXTERN_C_END 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3218 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3219 { 3220 Mat mat; 3221 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 *newmat = 0; 3226 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3227 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3228 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3229 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3230 a = (Mat_MPIAIJ*)mat->data; 3231 3232 mat->factortype = matin->factortype; 3233 mat->rmap->bs = matin->rmap->bs; 3234 mat->assembled = PETSC_TRUE; 3235 mat->insertmode = NOT_SET_VALUES; 3236 mat->preallocated = PETSC_TRUE; 3237 3238 a->size = oldmat->size; 3239 a->rank = oldmat->rank; 3240 a->donotstash = oldmat->donotstash; 3241 a->roworiented = oldmat->roworiented; 3242 a->rowindices = 0; 3243 a->rowvalues = 0; 3244 a->getrowactive = PETSC_FALSE; 3245 3246 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3247 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3248 3249 if (oldmat->colmap) { 3250 #if defined (PETSC_USE_CTABLE) 3251 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3252 #else 3253 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3254 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3255 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3256 #endif 3257 } else a->colmap = 0; 3258 if (oldmat->garray) { 3259 PetscInt len; 3260 len = oldmat->B->cmap->n; 3261 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3262 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3263 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3264 } else a->garray = 0; 3265 3266 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3268 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3270 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3271 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3272 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3273 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3274 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3275 *newmat = mat; 3276 PetscFunctionReturn(0); 3277 } 3278 3279 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatLoad_MPIAIJ" 3283 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3284 { 3285 PetscScalar *vals,*svals; 3286 MPI_Comm comm = ((PetscObject)viewer)->comm; 3287 PetscErrorCode ierr; 3288 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3289 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3290 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3291 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3292 PetscInt cend,cstart,n,*rowners,sizesset=1; 3293 int fd; 3294 3295 PetscFunctionBegin; 3296 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3297 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3298 if (!rank) { 3299 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3300 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3301 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3302 } 3303 3304 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3305 3306 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3307 M = header[1]; N = header[2]; 3308 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3309 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3310 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3311 3312 /* If global sizes are set, check if they are consistent with that given in the file */ 3313 if (sizesset) { 3314 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3315 } 3316 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3317 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3318 3319 /* determine ownership of all rows */ 3320 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3321 else m = newMat->rmap->n; /* Set by user */ 3322 3323 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3324 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3325 3326 /* First process needs enough room for process with most rows */ 3327 if (!rank) { 3328 mmax = rowners[1]; 3329 for (i=2; i<size; i++) { 3330 mmax = PetscMax(mmax,rowners[i]); 3331 } 3332 } else mmax = m; 3333 3334 rowners[0] = 0; 3335 for (i=2; i<=size; i++) { 3336 rowners[i] += rowners[i-1]; 3337 } 3338 rstart = rowners[rank]; 3339 rend = rowners[rank+1]; 3340 3341 /* distribute row lengths to all processors */ 3342 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3343 if (!rank) { 3344 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3345 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3346 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3347 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3348 for (j=0; j<m; j++) { 3349 procsnz[0] += ourlens[j]; 3350 } 3351 for (i=1; i<size; i++) { 3352 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3353 /* calculate the number of nonzeros on each processor */ 3354 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3355 procsnz[i] += rowlengths[j]; 3356 } 3357 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3358 } 3359 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3360 } else { 3361 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3362 } 3363 3364 if (!rank) { 3365 /* determine max buffer needed and allocate it */ 3366 maxnz = 0; 3367 for (i=0; i<size; i++) { 3368 maxnz = PetscMax(maxnz,procsnz[i]); 3369 } 3370 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3371 3372 /* read in my part of the matrix column indices */ 3373 nz = procsnz[0]; 3374 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3375 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3376 3377 /* read in every one elses and ship off */ 3378 for (i=1; i<size; i++) { 3379 nz = procsnz[i]; 3380 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3381 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3382 } 3383 ierr = PetscFree(cols);CHKERRQ(ierr); 3384 } else { 3385 /* determine buffer space needed for message */ 3386 nz = 0; 3387 for (i=0; i<m; i++) { 3388 nz += ourlens[i]; 3389 } 3390 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3391 3392 /* receive message of column indices*/ 3393 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3394 } 3395 3396 /* determine column ownership if matrix is not square */ 3397 if (N != M) { 3398 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3399 else n = newMat->cmap->n; 3400 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3401 cstart = cend - n; 3402 } else { 3403 cstart = rstart; 3404 cend = rend; 3405 n = cend - cstart; 3406 } 3407 3408 /* loop over local rows, determining number of off diagonal entries */ 3409 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3410 jj = 0; 3411 for (i=0; i<m; i++) { 3412 for (j=0; j<ourlens[i]; j++) { 3413 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3414 jj++; 3415 } 3416 } 3417 3418 for (i=0; i<m; i++) { 3419 ourlens[i] -= offlens[i]; 3420 } 3421 if (!sizesset) { 3422 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3423 } 3424 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3425 3426 for (i=0; i<m; i++) { 3427 ourlens[i] += offlens[i]; 3428 } 3429 3430 if (!rank) { 3431 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3432 3433 /* read in my part of the matrix numerical values */ 3434 nz = procsnz[0]; 3435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3436 3437 /* insert into matrix */ 3438 jj = rstart; 3439 smycols = mycols; 3440 svals = vals; 3441 for (i=0; i<m; i++) { 3442 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3443 smycols += ourlens[i]; 3444 svals += ourlens[i]; 3445 jj++; 3446 } 3447 3448 /* read in other processors and ship out */ 3449 for (i=1; i<size; i++) { 3450 nz = procsnz[i]; 3451 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3452 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3453 } 3454 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3455 } else { 3456 /* receive numeric values */ 3457 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3458 3459 /* receive message of values*/ 3460 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3461 3462 /* insert into matrix */ 3463 jj = rstart; 3464 smycols = mycols; 3465 svals = vals; 3466 for (i=0; i<m; i++) { 3467 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3468 smycols += ourlens[i]; 3469 svals += ourlens[i]; 3470 jj++; 3471 } 3472 } 3473 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3474 ierr = PetscFree(vals);CHKERRQ(ierr); 3475 ierr = PetscFree(mycols);CHKERRQ(ierr); 3476 ierr = PetscFree(rowners);CHKERRQ(ierr); 3477 3478 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3479 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 #undef __FUNCT__ 3484 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3485 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3486 { 3487 PetscErrorCode ierr; 3488 IS iscol_local; 3489 PetscInt csize; 3490 3491 PetscFunctionBegin; 3492 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3493 if (call == MAT_REUSE_MATRIX) { 3494 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3495 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3496 } else { 3497 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3498 } 3499 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3500 if (call == MAT_INITIAL_MATRIX) { 3501 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3502 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3503 } 3504 PetscFunctionReturn(0); 3505 } 3506 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3509 /* 3510 Not great since it makes two copies of the submatrix, first an SeqAIJ 3511 in local and then by concatenating the local matrices the end result. 3512 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3513 3514 Note: This requires a sequential iscol with all indices. 3515 */ 3516 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3517 { 3518 PetscErrorCode ierr; 3519 PetscMPIInt rank,size; 3520 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3521 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3522 Mat *local,M,Mreuse; 3523 MatScalar *vwork,*aa; 3524 MPI_Comm comm = ((PetscObject)mat)->comm; 3525 Mat_SeqAIJ *aij; 3526 3527 3528 PetscFunctionBegin; 3529 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3531 3532 if (call == MAT_REUSE_MATRIX) { 3533 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3534 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3535 local = &Mreuse; 3536 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3537 } else { 3538 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3539 Mreuse = *local; 3540 ierr = PetscFree(local);CHKERRQ(ierr); 3541 } 3542 3543 /* 3544 m - number of local rows 3545 n - number of columns (same on all processors) 3546 rstart - first row in new global matrix generated 3547 */ 3548 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3549 if (call == MAT_INITIAL_MATRIX) { 3550 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3551 ii = aij->i; 3552 jj = aij->j; 3553 3554 /* 3555 Determine the number of non-zeros in the diagonal and off-diagonal 3556 portions of the matrix in order to do correct preallocation 3557 */ 3558 3559 /* first get start and end of "diagonal" columns */ 3560 if (csize == PETSC_DECIDE) { 3561 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3562 if (mglobal == n) { /* square matrix */ 3563 nlocal = m; 3564 } else { 3565 nlocal = n/size + ((n % size) > rank); 3566 } 3567 } else { 3568 nlocal = csize; 3569 } 3570 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3571 rstart = rend - nlocal; 3572 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3573 3574 /* next, compute all the lengths */ 3575 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3576 olens = dlens + m; 3577 for (i=0; i<m; i++) { 3578 jend = ii[i+1] - ii[i]; 3579 olen = 0; 3580 dlen = 0; 3581 for (j=0; j<jend; j++) { 3582 if (*jj < rstart || *jj >= rend) olen++; 3583 else dlen++; 3584 jj++; 3585 } 3586 olens[i] = olen; 3587 dlens[i] = dlen; 3588 } 3589 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3590 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3591 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3592 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3593 ierr = PetscFree(dlens);CHKERRQ(ierr); 3594 } else { 3595 PetscInt ml,nl; 3596 3597 M = *newmat; 3598 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3599 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3600 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3601 /* 3602 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3603 rather than the slower MatSetValues(). 3604 */ 3605 M->was_assembled = PETSC_TRUE; 3606 M->assembled = PETSC_FALSE; 3607 } 3608 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3609 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3610 ii = aij->i; 3611 jj = aij->j; 3612 aa = aij->a; 3613 for (i=0; i<m; i++) { 3614 row = rstart + i; 3615 nz = ii[i+1] - ii[i]; 3616 cwork = jj; jj += nz; 3617 vwork = aa; aa += nz; 3618 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3619 } 3620 3621 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3623 *newmat = M; 3624 3625 /* save submatrix used in processor for next request */ 3626 if (call == MAT_INITIAL_MATRIX) { 3627 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3628 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3629 } 3630 3631 PetscFunctionReturn(0); 3632 } 3633 3634 EXTERN_C_BEGIN 3635 #undef __FUNCT__ 3636 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3637 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3638 { 3639 PetscInt m,cstart, cend,j,nnz,i,d; 3640 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3641 const PetscInt *JJ; 3642 PetscScalar *values; 3643 PetscErrorCode ierr; 3644 3645 PetscFunctionBegin; 3646 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3647 3648 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3649 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3650 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3651 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3652 m = B->rmap->n; 3653 cstart = B->cmap->rstart; 3654 cend = B->cmap->rend; 3655 rstart = B->rmap->rstart; 3656 3657 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3658 3659 #if defined(PETSC_USE_DEBUGGING) 3660 for (i=0; i<m; i++) { 3661 nnz = Ii[i+1]- Ii[i]; 3662 JJ = J + Ii[i]; 3663 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3664 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3665 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3666 } 3667 #endif 3668 3669 for (i=0; i<m; i++) { 3670 nnz = Ii[i+1]- Ii[i]; 3671 JJ = J + Ii[i]; 3672 nnz_max = PetscMax(nnz_max,nnz); 3673 d = 0; 3674 for (j=0; j<nnz; j++) { 3675 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3676 } 3677 d_nnz[i] = d; 3678 o_nnz[i] = nnz - d; 3679 } 3680 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3681 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3682 3683 if (v) values = (PetscScalar*)v; 3684 else { 3685 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3686 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3687 } 3688 3689 for (i=0; i<m; i++) { 3690 ii = i + rstart; 3691 nnz = Ii[i+1]- Ii[i]; 3692 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3693 } 3694 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3695 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3696 3697 if (!v) { 3698 ierr = PetscFree(values);CHKERRQ(ierr); 3699 } 3700 PetscFunctionReturn(0); 3701 } 3702 EXTERN_C_END 3703 3704 #undef __FUNCT__ 3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3706 /*@ 3707 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3708 (the default parallel PETSc format). 3709 3710 Collective on MPI_Comm 3711 3712 Input Parameters: 3713 + B - the matrix 3714 . i - the indices into j for the start of each local row (starts with zero) 3715 . j - the column indices for each local row (starts with zero) 3716 - v - optional values in the matrix 3717 3718 Level: developer 3719 3720 Notes: 3721 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3722 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3723 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3724 3725 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3726 3727 The format which is used for the sparse matrix input, is equivalent to a 3728 row-major ordering.. i.e for the following matrix, the input data expected is 3729 as shown: 3730 3731 1 0 0 3732 2 0 3 P0 3733 ------- 3734 4 5 6 P1 3735 3736 Process0 [P0]: rows_owned=[0,1] 3737 i = {0,1,3} [size = nrow+1 = 2+1] 3738 j = {0,0,2} [size = nz = 6] 3739 v = {1,2,3} [size = nz = 6] 3740 3741 Process1 [P1]: rows_owned=[2] 3742 i = {0,3} [size = nrow+1 = 1+1] 3743 j = {0,1,2} [size = nz = 6] 3744 v = {4,5,6} [size = nz = 6] 3745 3746 .keywords: matrix, aij, compressed row, sparse, parallel 3747 3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3749 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3750 @*/ 3751 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3752 { 3753 PetscErrorCode ierr; 3754 3755 PetscFunctionBegin; 3756 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3757 PetscFunctionReturn(0); 3758 } 3759 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3762 /*@C 3763 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3764 (the default parallel PETSc format). For good matrix assembly performance 3765 the user should preallocate the matrix storage by setting the parameters 3766 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3767 performance can be increased by more than a factor of 50. 3768 3769 Collective on MPI_Comm 3770 3771 Input Parameters: 3772 + A - the matrix 3773 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3774 (same value is used for all local rows) 3775 . d_nnz - array containing the number of nonzeros in the various rows of the 3776 DIAGONAL portion of the local submatrix (possibly different for each row) 3777 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3778 The size of this array is equal to the number of local rows, i.e 'm'. 3779 You must leave room for the diagonal entry even if it is zero. 3780 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3781 submatrix (same value is used for all local rows). 3782 - o_nnz - array containing the number of nonzeros in the various rows of the 3783 OFF-DIAGONAL portion of the local submatrix (possibly different for 3784 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3785 structure. The size of this array is equal to the number 3786 of local rows, i.e 'm'. 3787 3788 If the *_nnz parameter is given then the *_nz parameter is ignored 3789 3790 The AIJ format (also called the Yale sparse matrix format or 3791 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3792 storage. The stored row and column indices begin with zero. 3793 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3794 3795 The parallel matrix is partitioned such that the first m0 rows belong to 3796 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3797 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3798 3799 The DIAGONAL portion of the local submatrix of a processor can be defined 3800 as the submatrix which is obtained by extraction the part corresponding to 3801 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3802 first row that belongs to the processor, r2 is the last row belonging to 3803 the this processor, and c1-c2 is range of indices of the local part of a 3804 vector suitable for applying the matrix to. This is an mxn matrix. In the 3805 common case of a square matrix, the row and column ranges are the same and 3806 the DIAGONAL part is also square. The remaining portion of the local 3807 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3808 3809 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3810 3811 You can call MatGetInfo() to get information on how effective the preallocation was; 3812 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3813 You can also run with the option -info and look for messages with the string 3814 malloc in them to see if additional memory allocation was needed. 3815 3816 Example usage: 3817 3818 Consider the following 8x8 matrix with 34 non-zero values, that is 3819 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3820 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3821 as follows: 3822 3823 .vb 3824 1 2 0 | 0 3 0 | 0 4 3825 Proc0 0 5 6 | 7 0 0 | 8 0 3826 9 0 10 | 11 0 0 | 12 0 3827 ------------------------------------- 3828 13 0 14 | 15 16 17 | 0 0 3829 Proc1 0 18 0 | 19 20 21 | 0 0 3830 0 0 0 | 22 23 0 | 24 0 3831 ------------------------------------- 3832 Proc2 25 26 27 | 0 0 28 | 29 0 3833 30 0 0 | 31 32 33 | 0 34 3834 .ve 3835 3836 This can be represented as a collection of submatrices as: 3837 3838 .vb 3839 A B C 3840 D E F 3841 G H I 3842 .ve 3843 3844 Where the submatrices A,B,C are owned by proc0, D,E,F are 3845 owned by proc1, G,H,I are owned by proc2. 3846 3847 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3848 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3849 The 'M','N' parameters are 8,8, and have the same values on all procs. 3850 3851 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3852 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3853 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3854 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3855 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3856 matrix, ans [DF] as another SeqAIJ matrix. 3857 3858 When d_nz, o_nz parameters are specified, d_nz storage elements are 3859 allocated for every row of the local diagonal submatrix, and o_nz 3860 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3861 One way to choose d_nz and o_nz is to use the max nonzerors per local 3862 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3863 In this case, the values of d_nz,o_nz are: 3864 .vb 3865 proc0 : dnz = 2, o_nz = 2 3866 proc1 : dnz = 3, o_nz = 2 3867 proc2 : dnz = 1, o_nz = 4 3868 .ve 3869 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3870 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3871 for proc3. i.e we are using 12+15+10=37 storage locations to store 3872 34 values. 3873 3874 When d_nnz, o_nnz parameters are specified, the storage is specified 3875 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3876 In the above case the values for d_nnz,o_nnz are: 3877 .vb 3878 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3879 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3880 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3881 .ve 3882 Here the space allocated is sum of all the above values i.e 34, and 3883 hence pre-allocation is perfect. 3884 3885 Level: intermediate 3886 3887 .keywords: matrix, aij, compressed row, sparse, parallel 3888 3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3890 MPIAIJ, MatGetInfo() 3891 @*/ 3892 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3893 { 3894 PetscErrorCode ierr; 3895 3896 PetscFunctionBegin; 3897 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3898 PetscFunctionReturn(0); 3899 } 3900 3901 #undef __FUNCT__ 3902 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3903 /*@ 3904 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3905 CSR format the local rows. 3906 3907 Collective on MPI_Comm 3908 3909 Input Parameters: 3910 + comm - MPI communicator 3911 . m - number of local rows (Cannot be PETSC_DECIDE) 3912 . n - This value should be the same as the local size used in creating the 3913 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3914 calculated if N is given) For square matrices n is almost always m. 3915 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3916 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3917 . i - row indices 3918 . j - column indices 3919 - a - matrix values 3920 3921 Output Parameter: 3922 . mat - the matrix 3923 3924 Level: intermediate 3925 3926 Notes: 3927 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3928 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3929 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3930 3931 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3932 3933 The format which is used for the sparse matrix input, is equivalent to a 3934 row-major ordering.. i.e for the following matrix, the input data expected is 3935 as shown: 3936 3937 1 0 0 3938 2 0 3 P0 3939 ------- 3940 4 5 6 P1 3941 3942 Process0 [P0]: rows_owned=[0,1] 3943 i = {0,1,3} [size = nrow+1 = 2+1] 3944 j = {0,0,2} [size = nz = 6] 3945 v = {1,2,3} [size = nz = 6] 3946 3947 Process1 [P1]: rows_owned=[2] 3948 i = {0,3} [size = nrow+1 = 1+1] 3949 j = {0,1,2} [size = nz = 6] 3950 v = {4,5,6} [size = nz = 6] 3951 3952 .keywords: matrix, aij, compressed row, sparse, parallel 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3955 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3956 @*/ 3957 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 if (i[0]) { 3963 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3964 } 3965 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3966 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3967 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3968 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3969 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3970 PetscFunctionReturn(0); 3971 } 3972 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "MatCreateMPIAIJ" 3975 /*@C 3976 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3977 (the default parallel PETSc format). For good matrix assembly performance 3978 the user should preallocate the matrix storage by setting the parameters 3979 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3980 performance can be increased by more than a factor of 50. 3981 3982 Collective on MPI_Comm 3983 3984 Input Parameters: 3985 + comm - MPI communicator 3986 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3987 This value should be the same as the local size used in creating the 3988 y vector for the matrix-vector product y = Ax. 3989 . n - This value should be the same as the local size used in creating the 3990 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3991 calculated if N is given) For square matrices n is almost always m. 3992 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3993 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3994 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3995 (same value is used for all local rows) 3996 . d_nnz - array containing the number of nonzeros in the various rows of the 3997 DIAGONAL portion of the local submatrix (possibly different for each row) 3998 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3999 The size of this array is equal to the number of local rows, i.e 'm'. 4000 You must leave room for the diagonal entry even if it is zero. 4001 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4002 submatrix (same value is used for all local rows). 4003 - o_nnz - array containing the number of nonzeros in the various rows of the 4004 OFF-DIAGONAL portion of the local submatrix (possibly different for 4005 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4006 structure. The size of this array is equal to the number 4007 of local rows, i.e 'm'. 4008 4009 Output Parameter: 4010 . A - the matrix 4011 4012 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4013 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4014 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4015 4016 Notes: 4017 If the *_nnz parameter is given then the *_nz parameter is ignored 4018 4019 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4020 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4021 storage requirements for this matrix. 4022 4023 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4024 processor than it must be used on all processors that share the object for 4025 that argument. 4026 4027 The user MUST specify either the local or global matrix dimensions 4028 (possibly both). 4029 4030 The parallel matrix is partitioned across processors such that the 4031 first m0 rows belong to process 0, the next m1 rows belong to 4032 process 1, the next m2 rows belong to process 2 etc.. where 4033 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4034 values corresponding to [m x N] submatrix. 4035 4036 The columns are logically partitioned with the n0 columns belonging 4037 to 0th partition, the next n1 columns belonging to the next 4038 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4039 4040 The DIAGONAL portion of the local submatrix on any given processor 4041 is the submatrix corresponding to the rows and columns m,n 4042 corresponding to the given processor. i.e diagonal matrix on 4043 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4044 etc. The remaining portion of the local submatrix [m x (N-n)] 4045 constitute the OFF-DIAGONAL portion. The example below better 4046 illustrates this concept. 4047 4048 For a square global matrix we define each processor's diagonal portion 4049 to be its local rows and the corresponding columns (a square submatrix); 4050 each processor's off-diagonal portion encompasses the remainder of the 4051 local matrix (a rectangular submatrix). 4052 4053 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4054 4055 When calling this routine with a single process communicator, a matrix of 4056 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4057 type of communicator, use the construction mechanism: 4058 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4059 4060 By default, this format uses inodes (identical nodes) when possible. 4061 We search for consecutive rows with the same nonzero structure, thereby 4062 reusing matrix information to achieve increased efficiency. 4063 4064 Options Database Keys: 4065 + -mat_no_inode - Do not use inodes 4066 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4067 - -mat_aij_oneindex - Internally use indexing starting at 1 4068 rather than 0. Note that when calling MatSetValues(), 4069 the user still MUST index entries starting at 0! 4070 4071 4072 Example usage: 4073 4074 Consider the following 8x8 matrix with 34 non-zero values, that is 4075 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4076 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4077 as follows: 4078 4079 .vb 4080 1 2 0 | 0 3 0 | 0 4 4081 Proc0 0 5 6 | 7 0 0 | 8 0 4082 9 0 10 | 11 0 0 | 12 0 4083 ------------------------------------- 4084 13 0 14 | 15 16 17 | 0 0 4085 Proc1 0 18 0 | 19 20 21 | 0 0 4086 0 0 0 | 22 23 0 | 24 0 4087 ------------------------------------- 4088 Proc2 25 26 27 | 0 0 28 | 29 0 4089 30 0 0 | 31 32 33 | 0 34 4090 .ve 4091 4092 This can be represented as a collection of submatrices as: 4093 4094 .vb 4095 A B C 4096 D E F 4097 G H I 4098 .ve 4099 4100 Where the submatrices A,B,C are owned by proc0, D,E,F are 4101 owned by proc1, G,H,I are owned by proc2. 4102 4103 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4104 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4105 The 'M','N' parameters are 8,8, and have the same values on all procs. 4106 4107 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4108 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4109 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4110 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4111 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4112 matrix, ans [DF] as another SeqAIJ matrix. 4113 4114 When d_nz, o_nz parameters are specified, d_nz storage elements are 4115 allocated for every row of the local diagonal submatrix, and o_nz 4116 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4117 One way to choose d_nz and o_nz is to use the max nonzerors per local 4118 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4119 In this case, the values of d_nz,o_nz are: 4120 .vb 4121 proc0 : dnz = 2, o_nz = 2 4122 proc1 : dnz = 3, o_nz = 2 4123 proc2 : dnz = 1, o_nz = 4 4124 .ve 4125 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4126 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4127 for proc3. i.e we are using 12+15+10=37 storage locations to store 4128 34 values. 4129 4130 When d_nnz, o_nnz parameters are specified, the storage is specified 4131 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4132 In the above case the values for d_nnz,o_nnz are: 4133 .vb 4134 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4135 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4136 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4137 .ve 4138 Here the space allocated is sum of all the above values i.e 34, and 4139 hence pre-allocation is perfect. 4140 4141 Level: intermediate 4142 4143 .keywords: matrix, aij, compressed row, sparse, parallel 4144 4145 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4146 MPIAIJ, MatCreateMPIAIJWithArrays() 4147 @*/ 4148 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4149 { 4150 PetscErrorCode ierr; 4151 PetscMPIInt size; 4152 4153 PetscFunctionBegin; 4154 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4155 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4156 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4157 if (size > 1) { 4158 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4159 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4160 } else { 4161 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4162 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4163 } 4164 PetscFunctionReturn(0); 4165 } 4166 4167 #undef __FUNCT__ 4168 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4169 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4170 { 4171 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4172 4173 PetscFunctionBegin; 4174 *Ad = a->A; 4175 *Ao = a->B; 4176 *colmap = a->garray; 4177 PetscFunctionReturn(0); 4178 } 4179 4180 #undef __FUNCT__ 4181 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4182 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4183 { 4184 PetscErrorCode ierr; 4185 PetscInt i; 4186 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4187 4188 PetscFunctionBegin; 4189 if (coloring->ctype == IS_COLORING_GLOBAL) { 4190 ISColoringValue *allcolors,*colors; 4191 ISColoring ocoloring; 4192 4193 /* set coloring for diagonal portion */ 4194 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4195 4196 /* set coloring for off-diagonal portion */ 4197 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4198 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4199 for (i=0; i<a->B->cmap->n; i++) { 4200 colors[i] = allcolors[a->garray[i]]; 4201 } 4202 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4203 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4204 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4205 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4206 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4207 ISColoringValue *colors; 4208 PetscInt *larray; 4209 ISColoring ocoloring; 4210 4211 /* set coloring for diagonal portion */ 4212 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4213 for (i=0; i<a->A->cmap->n; i++) { 4214 larray[i] = i + A->cmap->rstart; 4215 } 4216 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4217 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4218 for (i=0; i<a->A->cmap->n; i++) { 4219 colors[i] = coloring->colors[larray[i]]; 4220 } 4221 ierr = PetscFree(larray);CHKERRQ(ierr); 4222 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4223 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4224 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4225 4226 /* set coloring for off-diagonal portion */ 4227 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4228 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4229 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4230 for (i=0; i<a->B->cmap->n; i++) { 4231 colors[i] = coloring->colors[larray[i]]; 4232 } 4233 ierr = PetscFree(larray);CHKERRQ(ierr); 4234 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4235 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4236 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4237 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4238 4239 PetscFunctionReturn(0); 4240 } 4241 4242 #if defined(PETSC_HAVE_ADIC) 4243 #undef __FUNCT__ 4244 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4245 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4246 { 4247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4248 PetscErrorCode ierr; 4249 4250 PetscFunctionBegin; 4251 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4252 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4253 PetscFunctionReturn(0); 4254 } 4255 #endif 4256 4257 #undef __FUNCT__ 4258 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4259 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4260 { 4261 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4262 PetscErrorCode ierr; 4263 4264 PetscFunctionBegin; 4265 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4266 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4267 PetscFunctionReturn(0); 4268 } 4269 4270 #undef __FUNCT__ 4271 #define __FUNCT__ "MatMerge" 4272 /*@ 4273 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4274 matrices from each processor 4275 4276 Collective on MPI_Comm 4277 4278 Input Parameters: 4279 + comm - the communicators the parallel matrix will live on 4280 . inmat - the input sequential matrices 4281 . n - number of local columns (or PETSC_DECIDE) 4282 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4283 4284 Output Parameter: 4285 . outmat - the parallel matrix generated 4286 4287 Level: advanced 4288 4289 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4290 4291 @*/ 4292 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4293 { 4294 PetscErrorCode ierr; 4295 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4296 PetscInt *indx; 4297 PetscScalar *values; 4298 4299 PetscFunctionBegin; 4300 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4301 if (scall == MAT_INITIAL_MATRIX){ 4302 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4303 if (n == PETSC_DECIDE){ 4304 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4305 } 4306 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4307 rstart -= m; 4308 4309 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4310 for (i=0;i<m;i++) { 4311 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4312 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4313 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4314 } 4315 /* This routine will ONLY return MPIAIJ type matrix */ 4316 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4317 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4318 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4319 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4320 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4321 4322 } else if (scall == MAT_REUSE_MATRIX){ 4323 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4324 } else { 4325 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4326 } 4327 4328 for (i=0;i<m;i++) { 4329 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4330 Ii = i + rstart; 4331 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4332 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4333 } 4334 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4335 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4336 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4337 4338 PetscFunctionReturn(0); 4339 } 4340 4341 #undef __FUNCT__ 4342 #define __FUNCT__ "MatFileSplit" 4343 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4344 { 4345 PetscErrorCode ierr; 4346 PetscMPIInt rank; 4347 PetscInt m,N,i,rstart,nnz; 4348 size_t len; 4349 const PetscInt *indx; 4350 PetscViewer out; 4351 char *name; 4352 Mat B; 4353 const PetscScalar *values; 4354 4355 PetscFunctionBegin; 4356 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4357 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4358 /* Should this be the type of the diagonal block of A? */ 4359 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4360 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4361 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4362 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4363 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4364 for (i=0;i<m;i++) { 4365 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4366 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4367 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4368 } 4369 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4370 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4371 4372 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4373 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4374 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4375 sprintf(name,"%s.%d",outfile,rank); 4376 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4377 ierr = PetscFree(name); 4378 ierr = MatView(B,out);CHKERRQ(ierr); 4379 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4380 ierr = MatDestroy(&B);CHKERRQ(ierr); 4381 PetscFunctionReturn(0); 4382 } 4383 4384 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4385 #undef __FUNCT__ 4386 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4387 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4388 { 4389 PetscErrorCode ierr; 4390 Mat_Merge_SeqsToMPI *merge; 4391 PetscContainer container; 4392 4393 PetscFunctionBegin; 4394 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4395 if (container) { 4396 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4397 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4398 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4399 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4400 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4401 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4402 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4403 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4404 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4405 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4406 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4407 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4408 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4409 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4410 ierr = PetscFree(merge);CHKERRQ(ierr); 4411 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4412 } 4413 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4414 PetscFunctionReturn(0); 4415 } 4416 4417 #include <../src/mat/utils/freespace.h> 4418 #include <petscbt.h> 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4422 /*@C 4423 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4424 matrices from each processor 4425 4426 Collective on MPI_Comm 4427 4428 Input Parameters: 4429 + comm - the communicators the parallel matrix will live on 4430 . seqmat - the input sequential matrices 4431 . m - number of local rows (or PETSC_DECIDE) 4432 . n - number of local columns (or PETSC_DECIDE) 4433 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4434 4435 Output Parameter: 4436 . mpimat - the parallel matrix generated 4437 4438 Level: advanced 4439 4440 Notes: 4441 The dimensions of the sequential matrix in each processor MUST be the same. 4442 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4443 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4444 @*/ 4445 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4446 { 4447 PetscErrorCode ierr; 4448 MPI_Comm comm=((PetscObject)mpimat)->comm; 4449 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4450 PetscMPIInt size,rank,taga,*len_s; 4451 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4452 PetscInt proc,m; 4453 PetscInt **buf_ri,**buf_rj; 4454 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4455 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4456 MPI_Request *s_waits,*r_waits; 4457 MPI_Status *status; 4458 MatScalar *aa=a->a; 4459 MatScalar **abuf_r,*ba_i; 4460 Mat_Merge_SeqsToMPI *merge; 4461 PetscContainer container; 4462 4463 PetscFunctionBegin; 4464 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4465 4466 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4467 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4468 4469 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4470 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4471 4472 bi = merge->bi; 4473 bj = merge->bj; 4474 buf_ri = merge->buf_ri; 4475 buf_rj = merge->buf_rj; 4476 4477 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4478 owners = merge->rowmap->range; 4479 len_s = merge->len_s; 4480 4481 /* send and recv matrix values */ 4482 /*-----------------------------*/ 4483 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4484 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4485 4486 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4487 for (proc=0,k=0; proc<size; proc++){ 4488 if (!len_s[proc]) continue; 4489 i = owners[proc]; 4490 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4491 k++; 4492 } 4493 4494 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4495 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4496 ierr = PetscFree(status);CHKERRQ(ierr); 4497 4498 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4499 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4500 4501 /* insert mat values of mpimat */ 4502 /*----------------------------*/ 4503 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4504 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4505 4506 for (k=0; k<merge->nrecv; k++){ 4507 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4508 nrows = *(buf_ri_k[k]); 4509 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4510 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4511 } 4512 4513 /* set values of ba */ 4514 m = merge->rowmap->n; 4515 for (i=0; i<m; i++) { 4516 arow = owners[rank] + i; 4517 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4518 bnzi = bi[i+1] - bi[i]; 4519 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4520 4521 /* add local non-zero vals of this proc's seqmat into ba */ 4522 anzi = ai[arow+1] - ai[arow]; 4523 aj = a->j + ai[arow]; 4524 aa = a->a + ai[arow]; 4525 nextaj = 0; 4526 for (j=0; nextaj<anzi; j++){ 4527 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4528 ba_i[j] += aa[nextaj++]; 4529 } 4530 } 4531 4532 /* add received vals into ba */ 4533 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4534 /* i-th row */ 4535 if (i == *nextrow[k]) { 4536 anzi = *(nextai[k]+1) - *nextai[k]; 4537 aj = buf_rj[k] + *(nextai[k]); 4538 aa = abuf_r[k] + *(nextai[k]); 4539 nextaj = 0; 4540 for (j=0; nextaj<anzi; j++){ 4541 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4542 ba_i[j] += aa[nextaj++]; 4543 } 4544 } 4545 nextrow[k]++; nextai[k]++; 4546 } 4547 } 4548 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4549 } 4550 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 4553 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4554 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4555 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4556 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4557 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4558 PetscFunctionReturn(0); 4559 } 4560 4561 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4562 4563 #undef __FUNCT__ 4564 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4565 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4566 { 4567 PetscErrorCode ierr; 4568 Mat B_mpi; 4569 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4570 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4571 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4572 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4573 PetscInt len,proc,*dnz,*onz; 4574 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4575 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4576 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4577 MPI_Status *status; 4578 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4579 PetscBT lnkbt; 4580 Mat_Merge_SeqsToMPI *merge; 4581 PetscContainer container; 4582 4583 PetscFunctionBegin; 4584 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4585 4586 /* make sure it is a PETSc comm */ 4587 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4590 4591 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4592 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4593 4594 /* determine row ownership */ 4595 /*---------------------------------------------------------*/ 4596 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4597 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4598 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4599 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4600 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4601 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4602 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4603 4604 m = merge->rowmap->n; 4605 M = merge->rowmap->N; 4606 owners = merge->rowmap->range; 4607 4608 /* determine the number of messages to send, their lengths */ 4609 /*---------------------------------------------------------*/ 4610 len_s = merge->len_s; 4611 4612 len = 0; /* length of buf_si[] */ 4613 merge->nsend = 0; 4614 for (proc=0; proc<size; proc++){ 4615 len_si[proc] = 0; 4616 if (proc == rank){ 4617 len_s[proc] = 0; 4618 } else { 4619 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4620 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4621 } 4622 if (len_s[proc]) { 4623 merge->nsend++; 4624 nrows = 0; 4625 for (i=owners[proc]; i<owners[proc+1]; i++){ 4626 if (ai[i+1] > ai[i]) nrows++; 4627 } 4628 len_si[proc] = 2*(nrows+1); 4629 len += len_si[proc]; 4630 } 4631 } 4632 4633 /* determine the number and length of messages to receive for ij-structure */ 4634 /*-------------------------------------------------------------------------*/ 4635 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4636 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4637 4638 /* post the Irecv of j-structure */ 4639 /*-------------------------------*/ 4640 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4641 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4642 4643 /* post the Isend of j-structure */ 4644 /*--------------------------------*/ 4645 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4646 4647 for (proc=0, k=0; proc<size; proc++){ 4648 if (!len_s[proc]) continue; 4649 i = owners[proc]; 4650 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4651 k++; 4652 } 4653 4654 /* receives and sends of j-structure are complete */ 4655 /*------------------------------------------------*/ 4656 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4657 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4658 4659 /* send and recv i-structure */ 4660 /*---------------------------*/ 4661 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4662 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4663 4664 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4665 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4666 for (proc=0,k=0; proc<size; proc++){ 4667 if (!len_s[proc]) continue; 4668 /* form outgoing message for i-structure: 4669 buf_si[0]: nrows to be sent 4670 [1:nrows]: row index (global) 4671 [nrows+1:2*nrows+1]: i-structure index 4672 */ 4673 /*-------------------------------------------*/ 4674 nrows = len_si[proc]/2 - 1; 4675 buf_si_i = buf_si + nrows+1; 4676 buf_si[0] = nrows; 4677 buf_si_i[0] = 0; 4678 nrows = 0; 4679 for (i=owners[proc]; i<owners[proc+1]; i++){ 4680 anzi = ai[i+1] - ai[i]; 4681 if (anzi) { 4682 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4683 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4684 nrows++; 4685 } 4686 } 4687 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4688 k++; 4689 buf_si += len_si[proc]; 4690 } 4691 4692 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4693 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4694 4695 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4696 for (i=0; i<merge->nrecv; i++){ 4697 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4698 } 4699 4700 ierr = PetscFree(len_si);CHKERRQ(ierr); 4701 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4702 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4703 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4704 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4705 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4706 ierr = PetscFree(status);CHKERRQ(ierr); 4707 4708 /* compute a local seq matrix in each processor */ 4709 /*----------------------------------------------*/ 4710 /* allocate bi array and free space for accumulating nonzero column info */ 4711 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4712 bi[0] = 0; 4713 4714 /* create and initialize a linked list */ 4715 nlnk = N+1; 4716 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4717 4718 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4719 len = 0; 4720 len = ai[owners[rank+1]] - ai[owners[rank]]; 4721 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4722 current_space = free_space; 4723 4724 /* determine symbolic info for each local row */ 4725 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4726 4727 for (k=0; k<merge->nrecv; k++){ 4728 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4729 nrows = *buf_ri_k[k]; 4730 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4731 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4732 } 4733 4734 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4735 len = 0; 4736 for (i=0;i<m;i++) { 4737 bnzi = 0; 4738 /* add local non-zero cols of this proc's seqmat into lnk */ 4739 arow = owners[rank] + i; 4740 anzi = ai[arow+1] - ai[arow]; 4741 aj = a->j + ai[arow]; 4742 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4743 bnzi += nlnk; 4744 /* add received col data into lnk */ 4745 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4746 if (i == *nextrow[k]) { /* i-th row */ 4747 anzi = *(nextai[k]+1) - *nextai[k]; 4748 aj = buf_rj[k] + *nextai[k]; 4749 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4750 bnzi += nlnk; 4751 nextrow[k]++; nextai[k]++; 4752 } 4753 } 4754 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4755 4756 /* if free space is not available, make more free space */ 4757 if (current_space->local_remaining<bnzi) { 4758 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4759 nspacedouble++; 4760 } 4761 /* copy data into free space, then initialize lnk */ 4762 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4763 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4764 4765 current_space->array += bnzi; 4766 current_space->local_used += bnzi; 4767 current_space->local_remaining -= bnzi; 4768 4769 bi[i+1] = bi[i] + bnzi; 4770 } 4771 4772 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4773 4774 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4775 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4776 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4777 4778 /* create symbolic parallel matrix B_mpi */ 4779 /*---------------------------------------*/ 4780 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4781 if (n==PETSC_DECIDE) { 4782 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4783 } else { 4784 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4785 } 4786 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4787 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4788 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4789 4790 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4791 B_mpi->assembled = PETSC_FALSE; 4792 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4793 merge->bi = bi; 4794 merge->bj = bj; 4795 merge->buf_ri = buf_ri; 4796 merge->buf_rj = buf_rj; 4797 merge->coi = PETSC_NULL; 4798 merge->coj = PETSC_NULL; 4799 merge->owners_co = PETSC_NULL; 4800 4801 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4802 4803 /* attach the supporting struct to B_mpi for reuse */ 4804 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4805 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4806 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4807 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4808 *mpimat = B_mpi; 4809 4810 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4811 PetscFunctionReturn(0); 4812 } 4813 4814 #undef __FUNCT__ 4815 #define __FUNCT__ "MatMerge_SeqsToMPI" 4816 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4817 { 4818 PetscErrorCode ierr; 4819 4820 PetscFunctionBegin; 4821 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4822 if (scall == MAT_INITIAL_MATRIX){ 4823 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4824 } 4825 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4826 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4827 PetscFunctionReturn(0); 4828 } 4829 4830 #undef __FUNCT__ 4831 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4832 /*@ 4833 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4834 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4835 with MatGetSize() 4836 4837 Not Collective 4838 4839 Input Parameters: 4840 + A - the matrix 4841 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4842 4843 Output Parameter: 4844 . A_loc - the local sequential matrix generated 4845 4846 Level: developer 4847 4848 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4849 4850 @*/ 4851 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4852 { 4853 PetscErrorCode ierr; 4854 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4855 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4856 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4857 MatScalar *aa=a->a,*ba=b->a,*cam; 4858 PetscScalar *ca; 4859 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4860 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4861 PetscBool match; 4862 4863 PetscFunctionBegin; 4864 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4865 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4866 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4867 if (scall == MAT_INITIAL_MATRIX){ 4868 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4869 ci[0] = 0; 4870 for (i=0; i<am; i++){ 4871 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4872 } 4873 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4874 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4875 k = 0; 4876 for (i=0; i<am; i++) { 4877 ncols_o = bi[i+1] - bi[i]; 4878 ncols_d = ai[i+1] - ai[i]; 4879 /* off-diagonal portion of A */ 4880 for (jo=0; jo<ncols_o; jo++) { 4881 col = cmap[*bj]; 4882 if (col >= cstart) break; 4883 cj[k] = col; bj++; 4884 ca[k++] = *ba++; 4885 } 4886 /* diagonal portion of A */ 4887 for (j=0; j<ncols_d; j++) { 4888 cj[k] = cstart + *aj++; 4889 ca[k++] = *aa++; 4890 } 4891 /* off-diagonal portion of A */ 4892 for (j=jo; j<ncols_o; j++) { 4893 cj[k] = cmap[*bj++]; 4894 ca[k++] = *ba++; 4895 } 4896 } 4897 /* put together the new matrix */ 4898 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4899 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4900 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4901 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4902 mat->free_a = PETSC_TRUE; 4903 mat->free_ij = PETSC_TRUE; 4904 mat->nonew = 0; 4905 } else if (scall == MAT_REUSE_MATRIX){ 4906 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4907 ci = mat->i; cj = mat->j; cam = mat->a; 4908 for (i=0; i<am; i++) { 4909 /* off-diagonal portion of A */ 4910 ncols_o = bi[i+1] - bi[i]; 4911 for (jo=0; jo<ncols_o; jo++) { 4912 col = cmap[*bj]; 4913 if (col >= cstart) break; 4914 *cam++ = *ba++; bj++; 4915 } 4916 /* diagonal portion of A */ 4917 ncols_d = ai[i+1] - ai[i]; 4918 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4919 /* off-diagonal portion of A */ 4920 for (j=jo; j<ncols_o; j++) { 4921 *cam++ = *ba++; bj++; 4922 } 4923 } 4924 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4925 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4926 PetscFunctionReturn(0); 4927 } 4928 4929 #undef __FUNCT__ 4930 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4931 /*@C 4932 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4933 4934 Not Collective 4935 4936 Input Parameters: 4937 + A - the matrix 4938 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4939 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4940 4941 Output Parameter: 4942 . A_loc - the local sequential matrix generated 4943 4944 Level: developer 4945 4946 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4947 4948 @*/ 4949 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4950 { 4951 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4952 PetscErrorCode ierr; 4953 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4954 IS isrowa,iscola; 4955 Mat *aloc; 4956 PetscBool match; 4957 4958 PetscFunctionBegin; 4959 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4960 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4961 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4962 if (!row){ 4963 start = A->rmap->rstart; end = A->rmap->rend; 4964 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4965 } else { 4966 isrowa = *row; 4967 } 4968 if (!col){ 4969 start = A->cmap->rstart; 4970 cmap = a->garray; 4971 nzA = a->A->cmap->n; 4972 nzB = a->B->cmap->n; 4973 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4974 ncols = 0; 4975 for (i=0; i<nzB; i++) { 4976 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4977 else break; 4978 } 4979 imark = i; 4980 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4981 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4982 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4983 } else { 4984 iscola = *col; 4985 } 4986 if (scall != MAT_INITIAL_MATRIX){ 4987 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4988 aloc[0] = *A_loc; 4989 } 4990 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4991 *A_loc = aloc[0]; 4992 ierr = PetscFree(aloc);CHKERRQ(ierr); 4993 if (!row){ 4994 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4995 } 4996 if (!col){ 4997 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4998 } 4999 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 5003 #undef __FUNCT__ 5004 #define __FUNCT__ "MatGetBrowsOfAcols" 5005 /*@C 5006 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5007 5008 Collective on Mat 5009 5010 Input Parameters: 5011 + A,B - the matrices in mpiaij format 5012 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5013 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5014 5015 Output Parameter: 5016 + rowb, colb - index sets of rows and columns of B to extract 5017 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5018 - B_seq - the sequential matrix generated 5019 5020 Level: developer 5021 5022 @*/ 5023 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5024 { 5025 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5026 PetscErrorCode ierr; 5027 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5028 IS isrowb,iscolb; 5029 Mat *bseq; 5030 5031 PetscFunctionBegin; 5032 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5033 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5034 } 5035 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5036 5037 if (scall == MAT_INITIAL_MATRIX){ 5038 start = A->cmap->rstart; 5039 cmap = a->garray; 5040 nzA = a->A->cmap->n; 5041 nzB = a->B->cmap->n; 5042 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5043 ncols = 0; 5044 for (i=0; i<nzB; i++) { /* row < local row index */ 5045 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5046 else break; 5047 } 5048 imark = i; 5049 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5050 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5051 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5052 *brstart = imark; 5053 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5054 } else { 5055 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5056 isrowb = *rowb; iscolb = *colb; 5057 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5058 bseq[0] = *B_seq; 5059 } 5060 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5061 *B_seq = bseq[0]; 5062 ierr = PetscFree(bseq);CHKERRQ(ierr); 5063 if (!rowb){ 5064 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5065 } else { 5066 *rowb = isrowb; 5067 } 5068 if (!colb){ 5069 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5070 } else { 5071 *colb = iscolb; 5072 } 5073 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5074 PetscFunctionReturn(0); 5075 } 5076 5077 #undef __FUNCT__ 5078 #define __FUNCT__ "MatGetBrowsOfAoCols" 5079 /*@C 5080 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5081 of the OFF-DIAGONAL portion of local A 5082 5083 Collective on Mat 5084 5085 Input Parameters: 5086 + A,B - the matrices in mpiaij format 5087 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5088 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5089 . startsj_r - similar to startsj for receives 5090 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5091 5092 Output Parameter: 5093 + B_oth - the sequential matrix generated 5094 5095 Level: developer 5096 5097 @*/ 5098 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5099 { 5100 VecScatter_MPI_General *gen_to,*gen_from; 5101 PetscErrorCode ierr; 5102 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5103 Mat_SeqAIJ *b_oth; 5104 VecScatter ctx=a->Mvctx; 5105 MPI_Comm comm=((PetscObject)ctx)->comm; 5106 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5107 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5108 PetscScalar *rvalues,*svalues; 5109 MatScalar *b_otha,*bufa,*bufA; 5110 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5111 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5112 MPI_Status *sstatus,rstatus; 5113 PetscMPIInt jj; 5114 PetscInt *cols,sbs,rbs; 5115 PetscScalar *vals; 5116 5117 PetscFunctionBegin; 5118 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5119 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5120 } 5121 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5122 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5123 5124 gen_to = (VecScatter_MPI_General*)ctx->todata; 5125 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5126 rvalues = gen_from->values; /* holds the length of receiving row */ 5127 svalues = gen_to->values; /* holds the length of sending row */ 5128 nrecvs = gen_from->n; 5129 nsends = gen_to->n; 5130 5131 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5132 srow = gen_to->indices; /* local row index to be sent */ 5133 sstarts = gen_to->starts; 5134 sprocs = gen_to->procs; 5135 sstatus = gen_to->sstatus; 5136 sbs = gen_to->bs; 5137 rstarts = gen_from->starts; 5138 rprocs = gen_from->procs; 5139 rbs = gen_from->bs; 5140 5141 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5142 if (scall == MAT_INITIAL_MATRIX){ 5143 /* i-array */ 5144 /*---------*/ 5145 /* post receives */ 5146 for (i=0; i<nrecvs; i++){ 5147 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5148 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5149 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5150 } 5151 5152 /* pack the outgoing message */ 5153 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5154 sstartsj[0] = 0; rstartsj[0] = 0; 5155 len = 0; /* total length of j or a array to be sent */ 5156 k = 0; 5157 for (i=0; i<nsends; i++){ 5158 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5159 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5160 for (j=0; j<nrows; j++) { 5161 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5162 for (l=0; l<sbs; l++){ 5163 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5164 rowlen[j*sbs+l] = ncols; 5165 len += ncols; 5166 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5167 } 5168 k++; 5169 } 5170 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5171 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5172 } 5173 /* recvs and sends of i-array are completed */ 5174 i = nrecvs; 5175 while (i--) { 5176 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5177 } 5178 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5179 5180 /* allocate buffers for sending j and a arrays */ 5181 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5182 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5183 5184 /* create i-array of B_oth */ 5185 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5186 b_othi[0] = 0; 5187 len = 0; /* total length of j or a array to be received */ 5188 k = 0; 5189 for (i=0; i<nrecvs; i++){ 5190 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5191 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5192 for (j=0; j<nrows; j++) { 5193 b_othi[k+1] = b_othi[k] + rowlen[j]; 5194 len += rowlen[j]; k++; 5195 } 5196 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5197 } 5198 5199 /* allocate space for j and a arrrays of B_oth */ 5200 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5201 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5202 5203 /* j-array */ 5204 /*---------*/ 5205 /* post receives of j-array */ 5206 for (i=0; i<nrecvs; i++){ 5207 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5208 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5209 } 5210 5211 /* pack the outgoing message j-array */ 5212 k = 0; 5213 for (i=0; i<nsends; i++){ 5214 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5215 bufJ = bufj+sstartsj[i]; 5216 for (j=0; j<nrows; j++) { 5217 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5218 for (ll=0; ll<sbs; ll++){ 5219 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5220 for (l=0; l<ncols; l++){ 5221 *bufJ++ = cols[l]; 5222 } 5223 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5224 } 5225 } 5226 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5227 } 5228 5229 /* recvs and sends of j-array are completed */ 5230 i = nrecvs; 5231 while (i--) { 5232 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5233 } 5234 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5235 } else if (scall == MAT_REUSE_MATRIX){ 5236 sstartsj = *startsj; 5237 rstartsj = *startsj_r; 5238 bufa = *bufa_ptr; 5239 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5240 b_otha = b_oth->a; 5241 } else { 5242 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5243 } 5244 5245 /* a-array */ 5246 /*---------*/ 5247 /* post receives of a-array */ 5248 for (i=0; i<nrecvs; i++){ 5249 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5250 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5251 } 5252 5253 /* pack the outgoing message a-array */ 5254 k = 0; 5255 for (i=0; i<nsends; i++){ 5256 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5257 bufA = bufa+sstartsj[i]; 5258 for (j=0; j<nrows; j++) { 5259 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5260 for (ll=0; ll<sbs; ll++){ 5261 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5262 for (l=0; l<ncols; l++){ 5263 *bufA++ = vals[l]; 5264 } 5265 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5266 } 5267 } 5268 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5269 } 5270 /* recvs and sends of a-array are completed */ 5271 i = nrecvs; 5272 while (i--) { 5273 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5274 } 5275 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5276 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5277 5278 if (scall == MAT_INITIAL_MATRIX){ 5279 /* put together the new matrix */ 5280 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5281 5282 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5283 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5284 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5285 b_oth->free_a = PETSC_TRUE; 5286 b_oth->free_ij = PETSC_TRUE; 5287 b_oth->nonew = 0; 5288 5289 ierr = PetscFree(bufj);CHKERRQ(ierr); 5290 if (!startsj || !bufa_ptr){ 5291 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5292 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5293 } else { 5294 *startsj = sstartsj; 5295 *startsj_r = rstartsj; 5296 *bufa_ptr = bufa; 5297 } 5298 } 5299 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5300 PetscFunctionReturn(0); 5301 } 5302 5303 #undef __FUNCT__ 5304 #define __FUNCT__ "MatGetCommunicationStructs" 5305 /*@C 5306 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5307 5308 Not Collective 5309 5310 Input Parameters: 5311 . A - The matrix in mpiaij format 5312 5313 Output Parameter: 5314 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5315 . colmap - A map from global column index to local index into lvec 5316 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5317 5318 Level: developer 5319 5320 @*/ 5321 #if defined (PETSC_USE_CTABLE) 5322 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5323 #else 5324 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5325 #endif 5326 { 5327 Mat_MPIAIJ *a; 5328 5329 PetscFunctionBegin; 5330 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5331 PetscValidPointer(lvec, 2); 5332 PetscValidPointer(colmap, 3); 5333 PetscValidPointer(multScatter, 4); 5334 a = (Mat_MPIAIJ *) A->data; 5335 if (lvec) *lvec = a->lvec; 5336 if (colmap) *colmap = a->colmap; 5337 if (multScatter) *multScatter = a->Mvctx; 5338 PetscFunctionReturn(0); 5339 } 5340 5341 EXTERN_C_BEGIN 5342 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5343 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5344 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5345 EXTERN_C_END 5346 5347 #undef __FUNCT__ 5348 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5349 /* 5350 Computes (B'*A')' since computing B*A directly is untenable 5351 5352 n p p 5353 ( ) ( ) ( ) 5354 m ( A ) * n ( B ) = m ( C ) 5355 ( ) ( ) ( ) 5356 5357 */ 5358 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5359 { 5360 PetscErrorCode ierr; 5361 Mat At,Bt,Ct; 5362 5363 PetscFunctionBegin; 5364 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5365 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5366 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5367 ierr = MatDestroy(&At);CHKERRQ(ierr); 5368 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5369 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5370 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5371 PetscFunctionReturn(0); 5372 } 5373 5374 #undef __FUNCT__ 5375 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5376 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5377 { 5378 PetscErrorCode ierr; 5379 PetscInt m=A->rmap->n,n=B->cmap->n; 5380 Mat Cmat; 5381 5382 PetscFunctionBegin; 5383 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5384 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5385 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5386 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5387 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5388 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5389 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5390 *C = Cmat; 5391 PetscFunctionReturn(0); 5392 } 5393 5394 /* ----------------------------------------------------------------*/ 5395 #undef __FUNCT__ 5396 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5397 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5398 { 5399 PetscErrorCode ierr; 5400 5401 PetscFunctionBegin; 5402 if (scall == MAT_INITIAL_MATRIX){ 5403 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5404 } 5405 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5406 PetscFunctionReturn(0); 5407 } 5408 5409 EXTERN_C_BEGIN 5410 #if defined(PETSC_HAVE_MUMPS) 5411 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5412 #endif 5413 #if defined(PETSC_HAVE_PASTIX) 5414 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5415 #endif 5416 #if defined(PETSC_HAVE_SUPERLU_DIST) 5417 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5418 #endif 5419 #if defined(PETSC_HAVE_SPOOLES) 5420 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5421 #endif 5422 EXTERN_C_END 5423 5424 /*MC 5425 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5426 5427 Options Database Keys: 5428 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5429 5430 Level: beginner 5431 5432 .seealso: MatCreateMPIAIJ() 5433 M*/ 5434 5435 EXTERN_C_BEGIN 5436 #undef __FUNCT__ 5437 #define __FUNCT__ "MatCreate_MPIAIJ" 5438 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5439 { 5440 Mat_MPIAIJ *b; 5441 PetscErrorCode ierr; 5442 PetscMPIInt size; 5443 5444 PetscFunctionBegin; 5445 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5446 5447 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5448 B->data = (void*)b; 5449 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5450 B->rmap->bs = 1; 5451 B->assembled = PETSC_FALSE; 5452 5453 B->insertmode = NOT_SET_VALUES; 5454 b->size = size; 5455 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5456 5457 /* build cache for off array entries formed */ 5458 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5459 b->donotstash = PETSC_FALSE; 5460 b->colmap = 0; 5461 b->garray = 0; 5462 b->roworiented = PETSC_TRUE; 5463 5464 /* stuff used for matrix vector multiply */ 5465 b->lvec = PETSC_NULL; 5466 b->Mvctx = PETSC_NULL; 5467 5468 /* stuff for MatGetRow() */ 5469 b->rowindices = 0; 5470 b->rowvalues = 0; 5471 b->getrowactive = PETSC_FALSE; 5472 5473 #if defined(PETSC_HAVE_SPOOLES) 5474 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5475 "MatGetFactor_mpiaij_spooles", 5476 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5477 #endif 5478 #if defined(PETSC_HAVE_MUMPS) 5479 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5480 "MatGetFactor_aij_mumps", 5481 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5482 #endif 5483 #if defined(PETSC_HAVE_PASTIX) 5484 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5485 "MatGetFactor_mpiaij_pastix", 5486 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5487 #endif 5488 #if defined(PETSC_HAVE_SUPERLU_DIST) 5489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5490 "MatGetFactor_mpiaij_superlu_dist", 5491 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5492 #endif 5493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5494 "MatStoreValues_MPIAIJ", 5495 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5497 "MatRetrieveValues_MPIAIJ", 5498 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5500 "MatGetDiagonalBlock_MPIAIJ", 5501 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5503 "MatIsTranspose_MPIAIJ", 5504 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5506 "MatMPIAIJSetPreallocation_MPIAIJ", 5507 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5509 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5510 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5512 "MatDiagonalScaleLocal_MPIAIJ", 5513 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5515 "MatConvert_MPIAIJ_MPIAIJPERM", 5516 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5518 "MatConvert_MPIAIJ_MPIAIJCRL", 5519 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5521 "MatConvert_MPIAIJ_MPISBAIJ", 5522 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5524 "MatMatMult_MPIDense_MPIAIJ", 5525 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5527 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5528 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5530 "MatMatMultNumeric_MPIDense_MPIAIJ", 5531 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5533 PetscFunctionReturn(0); 5534 } 5535 EXTERN_C_END 5536 5537 #undef __FUNCT__ 5538 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5539 /*@ 5540 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5541 and "off-diagonal" part of the matrix in CSR format. 5542 5543 Collective on MPI_Comm 5544 5545 Input Parameters: 5546 + comm - MPI communicator 5547 . m - number of local rows (Cannot be PETSC_DECIDE) 5548 . n - This value should be the same as the local size used in creating the 5549 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5550 calculated if N is given) For square matrices n is almost always m. 5551 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5552 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5553 . i - row indices for "diagonal" portion of matrix 5554 . j - column indices 5555 . a - matrix values 5556 . oi - row indices for "off-diagonal" portion of matrix 5557 . oj - column indices 5558 - oa - matrix values 5559 5560 Output Parameter: 5561 . mat - the matrix 5562 5563 Level: advanced 5564 5565 Notes: 5566 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5567 must free the arrays once the matrix has been destroyed and not before. 5568 5569 The i and j indices are 0 based 5570 5571 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5572 5573 This sets local rows and cannot be used to set off-processor values. 5574 5575 You cannot later use MatSetValues() to change values in this matrix. 5576 5577 .keywords: matrix, aij, compressed row, sparse, parallel 5578 5579 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5580 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5581 @*/ 5582 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5583 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5584 { 5585 PetscErrorCode ierr; 5586 Mat_MPIAIJ *maij; 5587 5588 PetscFunctionBegin; 5589 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5590 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5591 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5592 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5593 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5594 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5595 maij = (Mat_MPIAIJ*) (*mat)->data; 5596 maij->donotstash = PETSC_TRUE; 5597 (*mat)->preallocated = PETSC_TRUE; 5598 5599 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5600 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5601 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5602 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5603 5604 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5605 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5606 5607 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5608 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5609 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5610 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5611 5612 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5613 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5614 PetscFunctionReturn(0); 5615 } 5616 5617 /* 5618 Special version for direct calls from Fortran 5619 */ 5620 #include <private/fortranimpl.h> 5621 5622 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5623 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5624 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5625 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5626 #endif 5627 5628 /* Change these macros so can be used in void function */ 5629 #undef CHKERRQ 5630 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5631 #undef SETERRQ2 5632 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5633 #undef SETERRQ 5634 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5635 5636 EXTERN_C_BEGIN 5637 #undef __FUNCT__ 5638 #define __FUNCT__ "matsetvaluesmpiaij_" 5639 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5640 { 5641 Mat mat = *mmat; 5642 PetscInt m = *mm, n = *mn; 5643 InsertMode addv = *maddv; 5644 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5645 PetscScalar value; 5646 PetscErrorCode ierr; 5647 5648 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5649 if (mat->insertmode == NOT_SET_VALUES) { 5650 mat->insertmode = addv; 5651 } 5652 #if defined(PETSC_USE_DEBUG) 5653 else if (mat->insertmode != addv) { 5654 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5655 } 5656 #endif 5657 { 5658 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5659 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5660 PetscBool roworiented = aij->roworiented; 5661 5662 /* Some Variables required in the macro */ 5663 Mat A = aij->A; 5664 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5665 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5666 MatScalar *aa = a->a; 5667 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5668 Mat B = aij->B; 5669 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5670 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5671 MatScalar *ba = b->a; 5672 5673 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5674 PetscInt nonew = a->nonew; 5675 MatScalar *ap1,*ap2; 5676 5677 PetscFunctionBegin; 5678 for (i=0; i<m; i++) { 5679 if (im[i] < 0) continue; 5680 #if defined(PETSC_USE_DEBUG) 5681 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5682 #endif 5683 if (im[i] >= rstart && im[i] < rend) { 5684 row = im[i] - rstart; 5685 lastcol1 = -1; 5686 rp1 = aj + ai[row]; 5687 ap1 = aa + ai[row]; 5688 rmax1 = aimax[row]; 5689 nrow1 = ailen[row]; 5690 low1 = 0; 5691 high1 = nrow1; 5692 lastcol2 = -1; 5693 rp2 = bj + bi[row]; 5694 ap2 = ba + bi[row]; 5695 rmax2 = bimax[row]; 5696 nrow2 = bilen[row]; 5697 low2 = 0; 5698 high2 = nrow2; 5699 5700 for (j=0; j<n; j++) { 5701 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5702 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5703 if (in[j] >= cstart && in[j] < cend){ 5704 col = in[j] - cstart; 5705 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5706 } else if (in[j] < 0) continue; 5707 #if defined(PETSC_USE_DEBUG) 5708 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5709 #endif 5710 else { 5711 if (mat->was_assembled) { 5712 if (!aij->colmap) { 5713 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5714 } 5715 #if defined (PETSC_USE_CTABLE) 5716 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5717 col--; 5718 #else 5719 col = aij->colmap[in[j]] - 1; 5720 #endif 5721 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5722 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5723 col = in[j]; 5724 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5725 B = aij->B; 5726 b = (Mat_SeqAIJ*)B->data; 5727 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5728 rp2 = bj + bi[row]; 5729 ap2 = ba + bi[row]; 5730 rmax2 = bimax[row]; 5731 nrow2 = bilen[row]; 5732 low2 = 0; 5733 high2 = nrow2; 5734 bm = aij->B->rmap->n; 5735 ba = b->a; 5736 } 5737 } else col = in[j]; 5738 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5739 } 5740 } 5741 } else { 5742 if (!aij->donotstash) { 5743 if (roworiented) { 5744 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5745 } else { 5746 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5747 } 5748 } 5749 } 5750 }} 5751 PetscFunctionReturnVoid(); 5752 } 5753 EXTERN_C_END 5754 5755