1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = sqrt(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1006 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 1009 /* remove zeroed rows of off diagonal matrix */ 1010 ii = aij->i; 1011 for (i=0; i<slen; i++) { 1012 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1013 } 1014 1015 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1016 if (aij->compressedrow.use){ 1017 m = aij->compressedrow.nrows; 1018 ii = aij->compressedrow.i; 1019 ridx = aij->compressedrow.rindex; 1020 for (i=0; i<m; i++){ 1021 n = ii[i+1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij->a + ii[i]; 1024 1025 for (j=0; j<n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 bb[*ridx] -= *aa*xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 ridx++; 1034 } 1035 } else { /* do not use compressed row format */ 1036 m = l->B->rmap->n; 1037 for (i=0; i<m; i++) { 1038 n = ii[i+1] - ii[i]; 1039 aj = aij->j + ii[i]; 1040 aa = aij->a + ii[i]; 1041 for (j=0; j<n; j++) { 1042 if (PetscAbsScalar(mask[*aj])) { 1043 bb[i] -= *aa*xx[*aj]; 1044 *aa = 0.0; 1045 } 1046 aa++; 1047 aj++; 1048 } 1049 } 1050 } 1051 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1052 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1053 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1054 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1055 ierr = PetscFree(lrows);CHKERRQ(ierr); 1056 1057 /* wait on sends */ 1058 if (nsends) { 1059 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1060 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1061 ierr = PetscFree(send_status);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1064 ierr = PetscFree(svalues);CHKERRQ(ierr); 1065 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMult_MPIAIJ" 1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 PetscInt nt; 1076 1077 PetscFunctionBegin; 1078 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1079 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1080 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscBool merged; 1121 1122 PetscFunctionBegin; 1123 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1124 /* do nondiagonal part */ 1125 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1126 if (!merged) { 1127 /* send it on its way */ 1128 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1129 /* do local part */ 1130 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1131 /* receive remote parts: note this assumes the values are not actually */ 1132 /* added in yy until the next line, */ 1133 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 } else { 1135 /* do local part */ 1136 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1137 /* send it on its way */ 1138 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 /* values actually were received in the Begin() but we need to call this nop */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 EXTERN_C_BEGIN 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1148 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1149 { 1150 MPI_Comm comm; 1151 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1152 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1153 IS Me,Notme; 1154 PetscErrorCode ierr; 1155 PetscInt M,N,first,last,*notme,i; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 1160 /* Easy test: symmetric diagonal block */ 1161 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1162 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 EXTERN_C_END 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 /* do nondiagonal part */ 1199 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1200 /* send it on its way */ 1201 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 /* do local part */ 1203 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1204 /* receive remote parts */ 1205 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* 1210 This only works correctly for square matrices where the subblock A->A is the 1211 diagonal block 1212 */ 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1216 { 1217 PetscErrorCode ierr; 1218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1219 1220 PetscFunctionBegin; 1221 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1222 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1223 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatScale_MPIAIJ" 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatDestroy_MPIAIJ" 1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1243 { 1244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 #if defined(PETSC_USE_LOG) 1249 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1250 #endif 1251 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1252 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1254 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1255 #if defined (PETSC_USE_CTABLE) 1256 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1257 #else 1258 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1259 #endif 1260 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1285 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1288 int fd; 1289 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1290 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1291 PetscScalar *column_values; 1292 PetscInt message_count,flowcontrolcount; 1293 1294 PetscFunctionBegin; 1295 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1296 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1297 nz = A->nz + B->nz; 1298 if (!rank) { 1299 header[0] = MAT_FILE_CLASSID; 1300 header[1] = mat->rmap->N; 1301 header[2] = mat->cmap->N; 1302 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1304 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 /* get largest number of rows any processor has */ 1306 rlen = mat->rmap->n; 1307 range = mat->rmap->range; 1308 for (i=1; i<size; i++) { 1309 rlen = PetscMax(rlen,range[i+1] - range[i]); 1310 } 1311 } else { 1312 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 rlen = mat->rmap->n; 1314 } 1315 1316 /* load up the local row counts */ 1317 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1318 for (i=0; i<mat->rmap->n; i++) { 1319 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1320 } 1321 1322 /* store the row lengths to the file */ 1323 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1324 if (!rank) { 1325 MPI_Status status; 1326 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1327 for (i=1; i<size; i++) { 1328 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1329 rlen = range[i+1] - range[i]; 1330 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1331 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 } 1333 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1334 } else { 1335 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1336 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1337 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1338 } 1339 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1340 1341 /* load up the local column indices */ 1342 nzmax = nz; /* )th processor needs space a largest processor needs */ 1343 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1345 cnt = 0; 1346 for (i=0; i<mat->rmap->n; i++) { 1347 for (j=B->i[i]; j<B->i[i+1]; j++) { 1348 if ( (col = garray[B->j[j]]) > cstart) break; 1349 column_indices[cnt++] = col; 1350 } 1351 for (k=A->i[i]; k<A->i[i+1]; k++) { 1352 column_indices[cnt++] = A->j[k] + cstart; 1353 } 1354 for (; j<B->i[i+1]; j++) { 1355 column_indices[cnt++] = garray[B->j[j]]; 1356 } 1357 } 1358 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1359 1360 /* store the column indices to the file */ 1361 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1362 if (!rank) { 1363 MPI_Status status; 1364 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1365 for (i=1; i<size; i++) { 1366 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1367 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1368 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1369 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1370 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1371 } 1372 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1373 } else { 1374 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1375 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1380 1381 /* load up the local column values */ 1382 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1383 cnt = 0; 1384 for (i=0; i<mat->rmap->n; i++) { 1385 for (j=B->i[i]; j<B->i[i+1]; j++) { 1386 if ( garray[B->j[j]] > cstart) break; 1387 column_values[cnt++] = B->a[j]; 1388 } 1389 for (k=A->i[i]; k<A->i[i+1]; k++) { 1390 column_values[cnt++] = A->a[k]; 1391 } 1392 for (; j<B->i[i+1]; j++) { 1393 column_values[cnt++] = B->a[j]; 1394 } 1395 } 1396 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1397 1398 /* store the column values to the file */ 1399 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1400 if (!rank) { 1401 MPI_Status status; 1402 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1403 for (i=1; i<size; i++) { 1404 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1405 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1406 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1407 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1408 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1409 } 1410 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1411 } else { 1412 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1413 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1415 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1416 } 1417 ierr = PetscFree(column_values);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1423 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1424 { 1425 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1426 PetscErrorCode ierr; 1427 PetscMPIInt rank = aij->rank,size = aij->size; 1428 PetscBool isdraw,iascii,isbinary; 1429 PetscViewer sviewer; 1430 PetscViewerFormat format; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1434 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1435 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1436 if (iascii) { 1437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1438 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1439 MatInfo info; 1440 PetscBool inodes; 1441 1442 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1443 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1444 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1445 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1446 if (!inodes) { 1447 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1448 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1449 } else { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } 1453 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1455 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1457 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1460 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1463 PetscInt inodecount,inodelimit,*inodes; 1464 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1465 if (inodes) { 1466 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1467 } else { 1468 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1472 PetscFunctionReturn(0); 1473 } 1474 } else if (isbinary) { 1475 if (size == 1) { 1476 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1477 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1478 } else { 1479 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1480 } 1481 PetscFunctionReturn(0); 1482 } else if (isdraw) { 1483 PetscDraw draw; 1484 PetscBool isnull; 1485 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1486 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1487 } 1488 1489 if (size == 1) { 1490 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1491 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1492 } else { 1493 /* assemble the entire matrix onto first processor. */ 1494 Mat A; 1495 Mat_SeqAIJ *Aloc; 1496 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1497 MatScalar *a; 1498 1499 if (mat->rmap->N > 1024) { 1500 PetscBool flg = PETSC_FALSE; 1501 1502 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1503 if (!flg) { 1504 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1505 } 1506 } 1507 1508 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1509 if (!rank) { 1510 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1511 } else { 1512 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1513 } 1514 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1515 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1516 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1517 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1518 1519 /* copy over the A part */ 1520 Aloc = (Mat_SeqAIJ*)aij->A->data; 1521 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1522 row = mat->rmap->rstart; 1523 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1524 for (i=0; i<m; i++) { 1525 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1526 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1527 } 1528 aj = Aloc->j; 1529 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1530 1531 /* copy over the B part */ 1532 Aloc = (Mat_SeqAIJ*)aij->B->data; 1533 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1534 row = mat->rmap->rstart; 1535 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1536 ct = cols; 1537 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1538 for (i=0; i<m; i++) { 1539 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1540 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1541 } 1542 ierr = PetscFree(ct);CHKERRQ(ierr); 1543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1545 /* 1546 Everyone has to call to draw the matrix since the graphics waits are 1547 synchronized across all processors that share the PetscDraw object 1548 */ 1549 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1550 if (!rank) { 1551 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1552 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1553 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1554 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1555 } 1556 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1557 ierr = MatDestroy(&A);CHKERRQ(ierr); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatView_MPIAIJ" 1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1565 { 1566 PetscErrorCode ierr; 1567 PetscBool iascii,isdraw,issocket,isbinary; 1568 1569 PetscFunctionBegin; 1570 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1571 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1574 if (iascii || isdraw || isbinary || issocket) { 1575 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1576 } else { 1577 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatSOR_MPIAIJ" 1584 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1585 { 1586 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1587 PetscErrorCode ierr; 1588 Vec bb1 = 0; 1589 PetscBool hasop; 1590 1591 PetscFunctionBegin; 1592 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1593 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1594 } 1595 1596 if (flag == SOR_APPLY_UPPER) { 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1602 if (flag & SOR_ZERO_INITIAL_GUESS) { 1603 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1604 its--; 1605 } 1606 1607 while (its--) { 1608 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 1611 /* update rhs: bb1 = bb - B*x */ 1612 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1613 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1617 } 1618 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1619 if (flag & SOR_ZERO_INITIAL_GUESS) { 1620 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1621 its--; 1622 } 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_EISENSTAT) { 1651 Vec xx1; 1652 1653 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1654 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1655 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 if (!mat->diag) { 1659 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1660 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1661 } 1662 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1663 if (hasop) { 1664 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1665 } else { 1666 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1667 } 1668 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1669 1670 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1671 1672 /* local sweep */ 1673 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1674 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1675 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1676 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1677 1678 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatPermute_MPIAIJ" 1684 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1685 { 1686 MPI_Comm comm,pcomm; 1687 PetscInt first,local_size,nrows; 1688 const PetscInt *rows; 1689 PetscMPIInt size; 1690 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1695 /* make a collective version of 'rowp' */ 1696 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1697 if (pcomm==comm) { 1698 crowp = rowp; 1699 } else { 1700 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1701 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1702 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1703 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1704 } 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 if (pcomm!=comm) { 1709 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1710 } 1711 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1712 /* get the local target indices */ 1713 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1714 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1715 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1716 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1717 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1718 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1719 /* the column permutation is so much easier; 1720 make a local version of 'colp' and invert it */ 1721 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1722 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1723 if (size==1) { 1724 lcolp = colp; 1725 } else { 1726 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1727 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1728 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1729 } 1730 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1731 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1732 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1733 if (size>1) { 1734 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1735 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1736 } 1737 /* now we just get the submatrix */ 1738 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1739 /* clean up */ 1740 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1741 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1758 isend[3] = info->memory; isend[4] = info->mallocs; 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1761 isend[3] += info->memory; isend[4] += info->mallocs; 1762 if (flag == MAT_LOCAL) { 1763 info->nz_used = isend[0]; 1764 info->nz_allocated = isend[1]; 1765 info->nz_unneeded = isend[2]; 1766 info->memory = isend[3]; 1767 info->mallocs = isend[4]; 1768 } else if (flag == MAT_GLOBAL_MAX) { 1769 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1770 info->nz_used = irecv[0]; 1771 info->nz_allocated = irecv[1]; 1772 info->nz_unneeded = irecv[2]; 1773 info->memory = irecv[3]; 1774 info->mallocs = irecv[4]; 1775 } else if (flag == MAT_GLOBAL_SUM) { 1776 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1777 info->nz_used = irecv[0]; 1778 info->nz_allocated = irecv[1]; 1779 info->nz_unneeded = irecv[2]; 1780 info->memory = irecv[3]; 1781 info->mallocs = irecv[4]; 1782 } 1783 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1784 info->fill_ratio_needed = 0; 1785 info->factor_mallocs = 0; 1786 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatSetOption_MPIAIJ" 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_ROW_ORIENTED: 1810 a->roworiented = flg; 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_NEW_DIAGONALS: 1815 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1816 break; 1817 case MAT_IGNORE_OFF_PROC_ENTRIES: 1818 a->donotstash = flg; 1819 break; 1820 case MAT_SPD: 1821 A->spd_set = PETSC_TRUE; 1822 A->spd = flg; 1823 if (flg) { 1824 A->symmetric = PETSC_TRUE; 1825 A->structurally_symmetric = PETSC_TRUE; 1826 A->symmetric_set = PETSC_TRUE; 1827 A->structurally_symmetric_set = PETSC_TRUE; 1828 } 1829 break; 1830 case MAT_SYMMETRIC: 1831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1832 break; 1833 case MAT_STRUCTURALLY_SYMMETRIC: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 case MAT_HERMITIAN: 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_SYMMETRY_ETERNAL: 1840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1841 break; 1842 default: 1843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatGetRow_MPIAIJ" 1850 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1851 { 1852 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1853 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1854 PetscErrorCode ierr; 1855 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1856 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1857 PetscInt *cmap,*idx_p; 1858 1859 PetscFunctionBegin; 1860 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1861 mat->getrowactive = PETSC_TRUE; 1862 1863 if (!mat->rowvalues && (idx || v)) { 1864 /* 1865 allocate enough space to hold information from the longest row. 1866 */ 1867 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1868 PetscInt max = 1,tmp; 1869 for (i=0; i<matin->rmap->n; i++) { 1870 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1871 if (max < tmp) { max = tmp; } 1872 } 1873 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1874 } 1875 1876 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1877 lrow = row - rstart; 1878 1879 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1880 if (!v) {pvA = 0; pvB = 0;} 1881 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1882 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1883 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1884 nztot = nzA + nzB; 1885 1886 cmap = mat->garray; 1887 if (v || idx) { 1888 if (nztot) { 1889 /* Sort by increasing column numbers, assuming A and B already sorted */ 1890 PetscInt imark = -1; 1891 if (v) { 1892 *v = v_p = mat->rowvalues; 1893 for (i=0; i<nzB; i++) { 1894 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1895 else break; 1896 } 1897 imark = i; 1898 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1899 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1900 } 1901 if (idx) { 1902 *idx = idx_p = mat->rowindices; 1903 if (imark > -1) { 1904 for (i=0; i<imark; i++) { 1905 idx_p[i] = cmap[cworkB[i]]; 1906 } 1907 } else { 1908 for (i=0; i<nzB; i++) { 1909 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1910 else break; 1911 } 1912 imark = i; 1913 } 1914 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1915 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1916 } 1917 } else { 1918 if (idx) *idx = 0; 1919 if (v) *v = 0; 1920 } 1921 } 1922 *nz = nztot; 1923 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1924 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 1934 PetscFunctionBegin; 1935 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1936 aij->getrowactive = PETSC_FALSE; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatNorm_MPIAIJ" 1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1943 { 1944 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1945 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1946 PetscErrorCode ierr; 1947 PetscInt i,j,cstart = mat->cmap->rstart; 1948 PetscReal sum = 0.0; 1949 MatScalar *v; 1950 1951 PetscFunctionBegin; 1952 if (aij->size == 1) { 1953 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1954 } else { 1955 if (type == NORM_FROBENIUS) { 1956 v = amat->a; 1957 for (i=0; i<amat->nz; i++) { 1958 #if defined(PETSC_USE_COMPLEX) 1959 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1960 #else 1961 sum += (*v)*(*v); v++; 1962 #endif 1963 } 1964 v = bmat->a; 1965 for (i=0; i<bmat->nz; i++) { 1966 #if defined(PETSC_USE_COMPLEX) 1967 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1968 #else 1969 sum += (*v)*(*v); v++; 1970 #endif 1971 } 1972 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1973 *norm = sqrt(*norm); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1979 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1980 *norm = 0.0; 1981 v = amat->a; jj = amat->j; 1982 for (j=0; j<amat->nz; j++) { 1983 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a; jj = bmat->j; 1986 for (j=0; j<bmat->nz; j++) { 1987 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1988 } 1989 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1990 for (j=0; j<mat->cmap->N; j++) { 1991 if (tmp2[j] > *norm) *norm = tmp2[j]; 1992 } 1993 ierr = PetscFree(tmp);CHKERRQ(ierr); 1994 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2010 } else { 2011 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 } 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatTranspose_MPIAIJ" 2019 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2022 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2023 PetscErrorCode ierr; 2024 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2025 PetscInt cstart=A->cmap->rstart,ncol; 2026 Mat B; 2027 MatScalar *array; 2028 2029 PetscFunctionBegin; 2030 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2031 2032 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2033 ai = Aloc->i; aj = Aloc->j; 2034 bi = Bloc->i; bj = Bloc->j; 2035 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2036 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2037 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2038 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2039 for (i=0; i<ai[ma]; i++){ 2040 d_nnz[aj[i]] ++; 2041 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2042 } 2043 2044 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2045 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2046 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2047 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2048 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2049 } else { 2050 B = *matout; 2051 } 2052 2053 /* copy over the A part */ 2054 array = Aloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<ma; i++) { 2057 ncol = ai[i+1]-ai[i]; 2058 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2059 row++; array += ncol; aj += ncol; 2060 } 2061 aj = Aloc->j; 2062 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2063 2064 /* copy over the B part */ 2065 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2066 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2067 array = Bloc->a; 2068 row = A->rmap->rstart; 2069 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2070 cols_tmp = cols; 2071 for (i=0; i<mb; i++) { 2072 ncol = bi[i+1]-bi[i]; 2073 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2074 row++; array += ncol; cols_tmp += ncol; 2075 } 2076 ierr = PetscFree(cols);CHKERRQ(ierr); 2077 2078 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2081 *matout = B; 2082 } else { 2083 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2091 { 2092 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2093 Mat a = aij->A,b = aij->B; 2094 PetscErrorCode ierr; 2095 PetscInt s1,s2,s3; 2096 2097 PetscFunctionBegin; 2098 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2099 if (rr) { 2100 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2101 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2102 /* Overlap communication with computation. */ 2103 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 } 2105 if (ll) { 2106 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2107 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2108 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2109 } 2110 /* scale the diagonal block */ 2111 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2112 2113 if (rr) { 2114 /* Do a scatter end and then right scale the off-diagonal block */ 2115 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2117 } 2118 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2124 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2125 { 2126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2131 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2132 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2133 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2138 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatEqual_MPIAIJ" 2150 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2151 { 2152 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2153 Mat a,b,c,d; 2154 PetscBool flg; 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 a = matA->A; b = matA->B; 2159 c = matB->A; d = matB->B; 2160 2161 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2162 if (flg) { 2163 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2164 } 2165 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatCopy_MPIAIJ" 2171 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2172 { 2173 PetscErrorCode ierr; 2174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2175 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2176 2177 PetscFunctionBegin; 2178 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2179 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2180 /* because of the column compression in the off-processor part of the matrix a->B, 2181 the number of columns in a->B and b->B may be different, hence we cannot call 2182 the MatCopy() directly on the two parts. If need be, we can provide a more 2183 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2184 then copying the submatrices */ 2185 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2186 } else { 2187 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2188 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2189 } 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2195 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2196 { 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatAXPY_MPIAIJ" 2206 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2207 { 2208 PetscErrorCode ierr; 2209 PetscInt i; 2210 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2211 PetscBLASInt bnz,one=1; 2212 Mat_SeqAIJ *x,*y; 2213 2214 PetscFunctionBegin; 2215 if (str == SAME_NONZERO_PATTERN) { 2216 PetscScalar alpha = a; 2217 x = (Mat_SeqAIJ *)xx->A->data; 2218 y = (Mat_SeqAIJ *)yy->A->data; 2219 bnz = PetscBLASIntCast(x->nz); 2220 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2221 x = (Mat_SeqAIJ *)xx->B->data; 2222 y = (Mat_SeqAIJ *)yy->B->data; 2223 bnz = PetscBLASIntCast(x->nz); 2224 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2225 } else if (str == SUBSET_NONZERO_PATTERN) { 2226 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2227 2228 x = (Mat_SeqAIJ *)xx->B->data; 2229 y = (Mat_SeqAIJ *)yy->B->data; 2230 if (y->xtoy && y->XtoY != xx->B) { 2231 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2232 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2233 } 2234 if (!y->xtoy) { /* get xtoy */ 2235 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2236 y->XtoY = xx->B; 2237 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2238 } 2239 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2240 } else { 2241 Mat B; 2242 PetscInt *nnz_d,*nnz_o; 2243 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2244 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2245 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2246 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2247 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2248 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2250 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2251 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2252 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2253 ierr = MatHeaderReplace(Y,B); 2254 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2255 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatConjugate_MPIAIJ" 2264 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2265 { 2266 #if defined(PETSC_USE_COMPLEX) 2267 PetscErrorCode ierr; 2268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2269 2270 PetscFunctionBegin; 2271 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2272 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2273 #else 2274 PetscFunctionBegin; 2275 #endif 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatRealPart_MPIAIJ" 2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2282 { 2283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2288 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2295 { 2296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2301 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #ifdef PETSC_HAVE_PBGL 2306 2307 #include <boost/parallel/mpi/bsp_process_group.hpp> 2308 #include <boost/graph/distributed/ilu_default_graph.hpp> 2309 #include <boost/graph/distributed/ilu_0_block.hpp> 2310 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2311 #include <boost/graph/distributed/petsc/interface.hpp> 2312 #include <boost/multi_array.hpp> 2313 #include <boost/parallel/distributed_property_map->hpp> 2314 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2317 /* 2318 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2319 */ 2320 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2321 { 2322 namespace petsc = boost::distributed::petsc; 2323 2324 namespace graph_dist = boost::graph::distributed; 2325 using boost::graph::distributed::ilu_default::process_group_type; 2326 using boost::graph::ilu_permuted; 2327 2328 PetscBool row_identity, col_identity; 2329 PetscContainer c; 2330 PetscInt m, n, M, N; 2331 PetscErrorCode ierr; 2332 2333 PetscFunctionBegin; 2334 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2335 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2336 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2337 if (!row_identity || !col_identity) { 2338 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2339 } 2340 2341 process_group_type pg; 2342 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2343 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2344 lgraph_type& level_graph = *lgraph_p; 2345 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2346 2347 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2348 ilu_permuted(level_graph); 2349 2350 /* put together the new matrix */ 2351 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2352 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2353 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2354 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2355 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2356 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 2359 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2360 ierr = PetscContainerSetPointer(c, lgraph_p); 2361 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2362 ierr = PetscContainerDestroy(&c); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2368 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2369 { 2370 PetscFunctionBegin; 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatSolve_MPIAIJ" 2376 /* 2377 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2378 */ 2379 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2380 { 2381 namespace graph_dist = boost::graph::distributed; 2382 2383 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2384 lgraph_type* lgraph_p; 2385 PetscContainer c; 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2390 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2391 ierr = VecCopy(b, x);CHKERRQ(ierr); 2392 2393 PetscScalar* array_x; 2394 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2395 PetscInt sx; 2396 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2397 2398 PetscScalar* array_b; 2399 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2400 PetscInt sb; 2401 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2402 2403 lgraph_type& level_graph = *lgraph_p; 2404 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2405 2406 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2407 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2408 ref_x(array_x, boost::extents[num_vertices(graph)]); 2409 2410 typedef boost::iterator_property_map<array_ref_type::iterator, 2411 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2412 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2413 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2414 2415 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2416 2417 PetscFunctionReturn(0); 2418 } 2419 #endif 2420 2421 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2422 PetscInt nzlocal,nsends,nrecvs; 2423 PetscMPIInt *send_rank,*recv_rank; 2424 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2425 PetscScalar *sbuf_a,**rbuf_a; 2426 PetscErrorCode (*Destroy)(Mat); 2427 } Mat_Redundant; 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2431 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2432 { 2433 PetscErrorCode ierr; 2434 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2435 PetscInt i; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2439 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2440 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2441 for (i=0; i<redund->nrecvs; i++){ 2442 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2444 } 2445 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2446 ierr = PetscFree(redund);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatDestroy_MatRedundant" 2452 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2453 { 2454 PetscErrorCode ierr; 2455 PetscContainer container; 2456 Mat_Redundant *redund=PETSC_NULL; 2457 2458 PetscFunctionBegin; 2459 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2460 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2461 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2462 A->ops->destroy = redund->Destroy; 2463 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2464 if (A->ops->destroy) { 2465 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2472 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2473 { 2474 PetscMPIInt rank,size; 2475 MPI_Comm comm=((PetscObject)mat)->comm; 2476 PetscErrorCode ierr; 2477 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2478 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2479 PetscInt *rowrange=mat->rmap->range; 2480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2481 Mat A=aij->A,B=aij->B,C=*matredundant; 2482 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2483 PetscScalar *sbuf_a; 2484 PetscInt nzlocal=a->nz+b->nz; 2485 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2486 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2487 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2488 MatScalar *aworkA,*aworkB; 2489 PetscScalar *vals; 2490 PetscMPIInt tag1,tag2,tag3,imdex; 2491 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2492 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2493 MPI_Status recv_status,*send_status; 2494 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2495 PetscInt **rbuf_j=PETSC_NULL; 2496 PetscScalar **rbuf_a=PETSC_NULL; 2497 Mat_Redundant *redund=PETSC_NULL; 2498 PetscContainer container; 2499 2500 PetscFunctionBegin; 2501 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2502 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2503 2504 if (reuse == MAT_REUSE_MATRIX) { 2505 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2506 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2507 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2508 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2509 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2510 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2511 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2512 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2513 2514 nsends = redund->nsends; 2515 nrecvs = redund->nrecvs; 2516 send_rank = redund->send_rank; 2517 recv_rank = redund->recv_rank; 2518 sbuf_nz = redund->sbuf_nz; 2519 rbuf_nz = redund->rbuf_nz; 2520 sbuf_j = redund->sbuf_j; 2521 sbuf_a = redund->sbuf_a; 2522 rbuf_j = redund->rbuf_j; 2523 rbuf_a = redund->rbuf_a; 2524 } 2525 2526 if (reuse == MAT_INITIAL_MATRIX){ 2527 PetscMPIInt subrank,subsize; 2528 PetscInt nleftover,np_subcomm; 2529 /* get the destination processors' id send_rank, nsends and nrecvs */ 2530 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2531 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2532 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2533 np_subcomm = size/nsubcomm; 2534 nleftover = size - nsubcomm*np_subcomm; 2535 nsends = 0; nrecvs = 0; 2536 for (i=0; i<size; i++){ /* i=rank*/ 2537 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2538 send_rank[nsends] = i; nsends++; 2539 recv_rank[nrecvs++] = i; 2540 } 2541 } 2542 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2543 i = size-nleftover-1; 2544 j = 0; 2545 while (j < nsubcomm - nleftover){ 2546 send_rank[nsends++] = i; 2547 i--; j++; 2548 } 2549 } 2550 2551 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2552 for (i=0; i<nleftover; i++){ 2553 recv_rank[nrecvs++] = size-nleftover+i; 2554 } 2555 } 2556 2557 /* allocate sbuf_j, sbuf_a */ 2558 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2559 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2560 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2561 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2562 2563 /* copy mat's local entries into the buffers */ 2564 if (reuse == MAT_INITIAL_MATRIX){ 2565 rownz_max = 0; 2566 rptr = sbuf_j; 2567 cols = sbuf_j + rend-rstart + 1; 2568 vals = sbuf_a; 2569 rptr[0] = 0; 2570 for (i=0; i<rend-rstart; i++){ 2571 row = i + rstart; 2572 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2573 ncols = nzA + nzB; 2574 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2575 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2576 /* load the column indices for this row into cols */ 2577 lwrite = 0; 2578 for (l=0; l<nzB; l++) { 2579 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2580 vals[lwrite] = aworkB[l]; 2581 cols[lwrite++] = ctmp; 2582 } 2583 } 2584 for (l=0; l<nzA; l++){ 2585 vals[lwrite] = aworkA[l]; 2586 cols[lwrite++] = cstart + cworkA[l]; 2587 } 2588 for (l=0; l<nzB; l++) { 2589 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2590 vals[lwrite] = aworkB[l]; 2591 cols[lwrite++] = ctmp; 2592 } 2593 } 2594 vals += ncols; 2595 cols += ncols; 2596 rptr[i+1] = rptr[i] + ncols; 2597 if (rownz_max < ncols) rownz_max = ncols; 2598 } 2599 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2600 } else { /* only copy matrix values into sbuf_a */ 2601 rptr = sbuf_j; 2602 vals = sbuf_a; 2603 rptr[0] = 0; 2604 for (i=0; i<rend-rstart; i++){ 2605 row = i + rstart; 2606 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2607 ncols = nzA + nzB; 2608 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2609 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2610 lwrite = 0; 2611 for (l=0; l<nzB; l++) { 2612 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2613 } 2614 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2615 for (l=0; l<nzB; l++) { 2616 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2617 } 2618 vals += ncols; 2619 rptr[i+1] = rptr[i] + ncols; 2620 } 2621 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2622 2623 /* send nzlocal to others, and recv other's nzlocal */ 2624 /*--------------------------------------------------*/ 2625 if (reuse == MAT_INITIAL_MATRIX){ 2626 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2627 s_waits2 = s_waits3 + nsends; 2628 s_waits1 = s_waits2 + nsends; 2629 r_waits1 = s_waits1 + nsends; 2630 r_waits2 = r_waits1 + nrecvs; 2631 r_waits3 = r_waits2 + nrecvs; 2632 } else { 2633 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2634 r_waits3 = s_waits3 + nsends; 2635 } 2636 2637 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2638 if (reuse == MAT_INITIAL_MATRIX){ 2639 /* get new tags to keep the communication clean */ 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2641 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2642 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2643 2644 /* post receives of other's nzlocal */ 2645 for (i=0; i<nrecvs; i++){ 2646 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2647 } 2648 /* send nzlocal to others */ 2649 for (i=0; i<nsends; i++){ 2650 sbuf_nz[i] = nzlocal; 2651 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2652 } 2653 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2654 count = nrecvs; 2655 while (count) { 2656 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2657 recv_rank[imdex] = recv_status.MPI_SOURCE; 2658 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2659 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2660 2661 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2662 rbuf_nz[imdex] += i + 2; 2663 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2664 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2665 count--; 2666 } 2667 /* wait on sends of nzlocal */ 2668 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2669 /* send mat->i,j to others, and recv from other's */ 2670 /*------------------------------------------------*/ 2671 for (i=0; i<nsends; i++){ 2672 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2673 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2674 } 2675 /* wait on receives of mat->i,j */ 2676 /*------------------------------*/ 2677 count = nrecvs; 2678 while (count) { 2679 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2680 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2681 count--; 2682 } 2683 /* wait on sends of mat->i,j */ 2684 /*---------------------------*/ 2685 if (nsends) { 2686 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2687 } 2688 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2689 2690 /* post receives, send and receive mat->a */ 2691 /*----------------------------------------*/ 2692 for (imdex=0; imdex<nrecvs; imdex++) { 2693 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2694 } 2695 for (i=0; i<nsends; i++){ 2696 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2697 } 2698 count = nrecvs; 2699 while (count) { 2700 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2701 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2702 count--; 2703 } 2704 if (nsends) { 2705 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2706 } 2707 2708 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2709 2710 /* create redundant matrix */ 2711 /*-------------------------*/ 2712 if (reuse == MAT_INITIAL_MATRIX){ 2713 /* compute rownz_max for preallocation */ 2714 for (imdex=0; imdex<nrecvs; imdex++){ 2715 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2716 rptr = rbuf_j[imdex]; 2717 for (i=0; i<j; i++){ 2718 ncols = rptr[i+1] - rptr[i]; 2719 if (rownz_max < ncols) rownz_max = ncols; 2720 } 2721 } 2722 2723 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2724 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2725 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2726 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2727 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2728 } else { 2729 C = *matredundant; 2730 } 2731 2732 /* insert local matrix entries */ 2733 rptr = sbuf_j; 2734 cols = sbuf_j + rend-rstart + 1; 2735 vals = sbuf_a; 2736 for (i=0; i<rend-rstart; i++){ 2737 row = i + rstart; 2738 ncols = rptr[i+1] - rptr[i]; 2739 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2740 vals += ncols; 2741 cols += ncols; 2742 } 2743 /* insert received matrix entries */ 2744 for (imdex=0; imdex<nrecvs; imdex++){ 2745 rstart = rowrange[recv_rank[imdex]]; 2746 rend = rowrange[recv_rank[imdex]+1]; 2747 rptr = rbuf_j[imdex]; 2748 cols = rbuf_j[imdex] + rend-rstart + 1; 2749 vals = rbuf_a[imdex]; 2750 for (i=0; i<rend-rstart; i++){ 2751 row = i + rstart; 2752 ncols = rptr[i+1] - rptr[i]; 2753 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2754 vals += ncols; 2755 cols += ncols; 2756 } 2757 } 2758 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2761 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2762 if (reuse == MAT_INITIAL_MATRIX) { 2763 PetscContainer container; 2764 *matredundant = C; 2765 /* create a supporting struct and attach it to C for reuse */ 2766 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2767 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2768 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2769 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2770 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2771 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2772 2773 redund->nzlocal = nzlocal; 2774 redund->nsends = nsends; 2775 redund->nrecvs = nrecvs; 2776 redund->send_rank = send_rank; 2777 redund->recv_rank = recv_rank; 2778 redund->sbuf_nz = sbuf_nz; 2779 redund->rbuf_nz = rbuf_nz; 2780 redund->sbuf_j = sbuf_j; 2781 redund->sbuf_a = sbuf_a; 2782 redund->rbuf_j = rbuf_j; 2783 redund->rbuf_a = rbuf_a; 2784 2785 redund->Destroy = C->ops->destroy; 2786 C->ops->destroy = MatDestroy_MatRedundant; 2787 } 2788 PetscFunctionReturn(0); 2789 } 2790 2791 #undef __FUNCT__ 2792 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2793 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2794 { 2795 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2796 PetscErrorCode ierr; 2797 PetscInt i,*idxb = 0; 2798 PetscScalar *va,*vb; 2799 Vec vtmp; 2800 2801 PetscFunctionBegin; 2802 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2803 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2804 if (idx) { 2805 for (i=0; i<A->rmap->n; i++) { 2806 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2807 } 2808 } 2809 2810 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2811 if (idx) { 2812 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2813 } 2814 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2815 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2816 2817 for (i=0; i<A->rmap->n; i++){ 2818 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2819 va[i] = vb[i]; 2820 if (idx) idx[i] = a->garray[idxb[i]]; 2821 } 2822 } 2823 2824 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2825 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2826 ierr = PetscFree(idxb);CHKERRQ(ierr); 2827 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2833 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2834 { 2835 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2836 PetscErrorCode ierr; 2837 PetscInt i,*idxb = 0; 2838 PetscScalar *va,*vb; 2839 Vec vtmp; 2840 2841 PetscFunctionBegin; 2842 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2843 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2844 if (idx) { 2845 for (i=0; i<A->cmap->n; i++) { 2846 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2847 } 2848 } 2849 2850 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2851 if (idx) { 2852 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2853 } 2854 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2855 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2856 2857 for (i=0; i<A->rmap->n; i++){ 2858 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2859 va[i] = vb[i]; 2860 if (idx) idx[i] = a->garray[idxb[i]]; 2861 } 2862 } 2863 2864 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2865 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2866 ierr = PetscFree(idxb);CHKERRQ(ierr); 2867 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2873 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2874 { 2875 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2876 PetscInt n = A->rmap->n; 2877 PetscInt cstart = A->cmap->rstart; 2878 PetscInt *cmap = mat->garray; 2879 PetscInt *diagIdx, *offdiagIdx; 2880 Vec diagV, offdiagV; 2881 PetscScalar *a, *diagA, *offdiagA; 2882 PetscInt r; 2883 PetscErrorCode ierr; 2884 2885 PetscFunctionBegin; 2886 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2887 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2888 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2889 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2890 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2891 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2892 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2893 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2894 for(r = 0; r < n; ++r) { 2895 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2896 a[r] = diagA[r]; 2897 idx[r] = cstart + diagIdx[r]; 2898 } else { 2899 a[r] = offdiagA[r]; 2900 idx[r] = cmap[offdiagIdx[r]]; 2901 } 2902 } 2903 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2905 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2906 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2907 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2908 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2909 PetscFunctionReturn(0); 2910 } 2911 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2914 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2915 { 2916 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2917 PetscInt n = A->rmap->n; 2918 PetscInt cstart = A->cmap->rstart; 2919 PetscInt *cmap = mat->garray; 2920 PetscInt *diagIdx, *offdiagIdx; 2921 Vec diagV, offdiagV; 2922 PetscScalar *a, *diagA, *offdiagA; 2923 PetscInt r; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2928 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2929 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2930 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2931 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2932 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2933 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2934 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2935 for(r = 0; r < n; ++r) { 2936 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2937 a[r] = diagA[r]; 2938 idx[r] = cstart + diagIdx[r]; 2939 } else { 2940 a[r] = offdiagA[r]; 2941 idx[r] = cmap[offdiagIdx[r]]; 2942 } 2943 } 2944 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2946 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2947 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2948 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2949 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2955 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2956 { 2957 PetscErrorCode ierr; 2958 Mat *dummy; 2959 2960 PetscFunctionBegin; 2961 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2962 *newmat = *dummy; 2963 ierr = PetscFree(dummy);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2968 /* -------------------------------------------------------------------*/ 2969 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2970 MatGetRow_MPIAIJ, 2971 MatRestoreRow_MPIAIJ, 2972 MatMult_MPIAIJ, 2973 /* 4*/ MatMultAdd_MPIAIJ, 2974 MatMultTranspose_MPIAIJ, 2975 MatMultTransposeAdd_MPIAIJ, 2976 #ifdef PETSC_HAVE_PBGL 2977 MatSolve_MPIAIJ, 2978 #else 2979 0, 2980 #endif 2981 0, 2982 0, 2983 /*10*/ 0, 2984 0, 2985 0, 2986 MatSOR_MPIAIJ, 2987 MatTranspose_MPIAIJ, 2988 /*15*/ MatGetInfo_MPIAIJ, 2989 MatEqual_MPIAIJ, 2990 MatGetDiagonal_MPIAIJ, 2991 MatDiagonalScale_MPIAIJ, 2992 MatNorm_MPIAIJ, 2993 /*20*/ MatAssemblyBegin_MPIAIJ, 2994 MatAssemblyEnd_MPIAIJ, 2995 MatSetOption_MPIAIJ, 2996 MatZeroEntries_MPIAIJ, 2997 /*24*/ MatZeroRows_MPIAIJ, 2998 0, 2999 #ifdef PETSC_HAVE_PBGL 3000 0, 3001 #else 3002 0, 3003 #endif 3004 0, 3005 0, 3006 /*29*/ MatSetUpPreallocation_MPIAIJ, 3007 #ifdef PETSC_HAVE_PBGL 3008 0, 3009 #else 3010 0, 3011 #endif 3012 0, 3013 0, 3014 0, 3015 /*34*/ MatDuplicate_MPIAIJ, 3016 0, 3017 0, 3018 0, 3019 0, 3020 /*39*/ MatAXPY_MPIAIJ, 3021 MatGetSubMatrices_MPIAIJ, 3022 MatIncreaseOverlap_MPIAIJ, 3023 MatGetValues_MPIAIJ, 3024 MatCopy_MPIAIJ, 3025 /*44*/ MatGetRowMax_MPIAIJ, 3026 MatScale_MPIAIJ, 3027 0, 3028 0, 3029 MatZeroRowsColumns_MPIAIJ, 3030 /*49*/ MatSetBlockSize_MPIAIJ, 3031 0, 3032 0, 3033 0, 3034 0, 3035 /*54*/ MatFDColoringCreate_MPIAIJ, 3036 0, 3037 MatSetUnfactored_MPIAIJ, 3038 MatPermute_MPIAIJ, 3039 0, 3040 /*59*/ MatGetSubMatrix_MPIAIJ, 3041 MatDestroy_MPIAIJ, 3042 MatView_MPIAIJ, 3043 0, 3044 0, 3045 /*64*/ 0, 3046 0, 3047 0, 3048 0, 3049 0, 3050 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3051 MatGetRowMinAbs_MPIAIJ, 3052 0, 3053 MatSetColoring_MPIAIJ, 3054 #if defined(PETSC_HAVE_ADIC) 3055 MatSetValuesAdic_MPIAIJ, 3056 #else 3057 0, 3058 #endif 3059 MatSetValuesAdifor_MPIAIJ, 3060 /*75*/ MatFDColoringApply_AIJ, 3061 0, 3062 0, 3063 0, 3064 0, 3065 /*80*/ 0, 3066 0, 3067 0, 3068 /*83*/ MatLoad_MPIAIJ, 3069 0, 3070 0, 3071 0, 3072 0, 3073 0, 3074 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3075 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3076 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3077 MatPtAP_Basic, 3078 MatPtAPSymbolic_MPIAIJ, 3079 /*94*/ MatPtAPNumeric_MPIAIJ, 3080 0, 3081 0, 3082 0, 3083 0, 3084 /*99*/ 0, 3085 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3086 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3087 MatConjugate_MPIAIJ, 3088 0, 3089 /*104*/MatSetValuesRow_MPIAIJ, 3090 MatRealPart_MPIAIJ, 3091 MatImaginaryPart_MPIAIJ, 3092 0, 3093 0, 3094 /*109*/0, 3095 MatGetRedundantMatrix_MPIAIJ, 3096 MatGetRowMin_MPIAIJ, 3097 0, 3098 0, 3099 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3100 0, 3101 0, 3102 0, 3103 0, 3104 /*119*/0, 3105 0, 3106 0, 3107 0, 3108 MatGetMultiProcBlock_MPIAIJ, 3109 /*124*/MatFindNonZeroRows_MPIAIJ, 3110 MatGetColumnNorms_MPIAIJ, 3111 0, 3112 0, 3113 MatGetSubMatricesParallel_MPIAIJ 3114 }; 3115 3116 /* ----------------------------------------------------------------------------------------*/ 3117 3118 EXTERN_C_BEGIN 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3121 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3122 { 3123 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3124 PetscErrorCode ierr; 3125 3126 PetscFunctionBegin; 3127 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3128 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3129 PetscFunctionReturn(0); 3130 } 3131 EXTERN_C_END 3132 3133 EXTERN_C_BEGIN 3134 #undef __FUNCT__ 3135 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3136 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3137 { 3138 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3139 PetscErrorCode ierr; 3140 3141 PetscFunctionBegin; 3142 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3143 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3144 PetscFunctionReturn(0); 3145 } 3146 EXTERN_C_END 3147 3148 EXTERN_C_BEGIN 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3151 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3152 { 3153 Mat_MPIAIJ *b; 3154 PetscErrorCode ierr; 3155 PetscInt i; 3156 3157 PetscFunctionBegin; 3158 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3159 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3160 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3161 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3162 3163 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3164 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3165 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3166 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3167 if (d_nnz) { 3168 for (i=0; i<B->rmap->n; i++) { 3169 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3170 } 3171 } 3172 if (o_nnz) { 3173 for (i=0; i<B->rmap->n; i++) { 3174 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3175 } 3176 } 3177 b = (Mat_MPIAIJ*)B->data; 3178 3179 if (!B->preallocated) { 3180 /* Explicitly create 2 MATSEQAIJ matrices. */ 3181 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3182 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3183 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3184 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3185 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3186 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3187 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3188 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3189 } 3190 3191 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3192 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3193 B->preallocated = PETSC_TRUE; 3194 PetscFunctionReturn(0); 3195 } 3196 EXTERN_C_END 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3200 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3201 { 3202 Mat mat; 3203 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3204 PetscErrorCode ierr; 3205 3206 PetscFunctionBegin; 3207 *newmat = 0; 3208 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3209 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3210 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3211 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3212 a = (Mat_MPIAIJ*)mat->data; 3213 3214 mat->factortype = matin->factortype; 3215 mat->rmap->bs = matin->rmap->bs; 3216 mat->assembled = PETSC_TRUE; 3217 mat->insertmode = NOT_SET_VALUES; 3218 mat->preallocated = PETSC_TRUE; 3219 3220 a->size = oldmat->size; 3221 a->rank = oldmat->rank; 3222 a->donotstash = oldmat->donotstash; 3223 a->roworiented = oldmat->roworiented; 3224 a->rowindices = 0; 3225 a->rowvalues = 0; 3226 a->getrowactive = PETSC_FALSE; 3227 3228 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3229 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3230 3231 if (oldmat->colmap) { 3232 #if defined (PETSC_USE_CTABLE) 3233 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3234 #else 3235 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3236 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3237 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3238 #endif 3239 } else a->colmap = 0; 3240 if (oldmat->garray) { 3241 PetscInt len; 3242 len = oldmat->B->cmap->n; 3243 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3244 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3245 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3246 } else a->garray = 0; 3247 3248 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3249 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3250 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3251 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3252 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3253 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3254 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3255 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3256 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3257 *newmat = mat; 3258 PetscFunctionReturn(0); 3259 } 3260 3261 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatLoad_MPIAIJ" 3265 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3266 { 3267 PetscScalar *vals,*svals; 3268 MPI_Comm comm = ((PetscObject)viewer)->comm; 3269 PetscErrorCode ierr; 3270 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3271 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3272 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3273 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3274 PetscInt cend,cstart,n,*rowners,sizesset=1; 3275 int fd; 3276 3277 PetscFunctionBegin; 3278 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3279 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3280 if (!rank) { 3281 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3282 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3283 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3284 } 3285 3286 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3287 3288 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3289 M = header[1]; N = header[2]; 3290 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3291 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3292 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3293 3294 /* If global sizes are set, check if they are consistent with that given in the file */ 3295 if (sizesset) { 3296 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3297 } 3298 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3299 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3300 3301 /* determine ownership of all rows */ 3302 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3303 else m = newMat->rmap->n; /* Set by user */ 3304 3305 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3306 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3307 3308 /* First process needs enough room for process with most rows */ 3309 if (!rank) { 3310 mmax = rowners[1]; 3311 for (i=2; i<size; i++) { 3312 mmax = PetscMax(mmax,rowners[i]); 3313 } 3314 } else mmax = m; 3315 3316 rowners[0] = 0; 3317 for (i=2; i<=size; i++) { 3318 rowners[i] += rowners[i-1]; 3319 } 3320 rstart = rowners[rank]; 3321 rend = rowners[rank+1]; 3322 3323 /* distribute row lengths to all processors */ 3324 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3325 if (!rank) { 3326 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3327 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3328 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3329 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3330 for (j=0; j<m; j++) { 3331 procsnz[0] += ourlens[j]; 3332 } 3333 for (i=1; i<size; i++) { 3334 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3335 /* calculate the number of nonzeros on each processor */ 3336 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3337 procsnz[i] += rowlengths[j]; 3338 } 3339 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3340 } 3341 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3342 } else { 3343 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3344 } 3345 3346 if (!rank) { 3347 /* determine max buffer needed and allocate it */ 3348 maxnz = 0; 3349 for (i=0; i<size; i++) { 3350 maxnz = PetscMax(maxnz,procsnz[i]); 3351 } 3352 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3353 3354 /* read in my part of the matrix column indices */ 3355 nz = procsnz[0]; 3356 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3357 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3358 3359 /* read in every one elses and ship off */ 3360 for (i=1; i<size; i++) { 3361 nz = procsnz[i]; 3362 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3363 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3364 } 3365 ierr = PetscFree(cols);CHKERRQ(ierr); 3366 } else { 3367 /* determine buffer space needed for message */ 3368 nz = 0; 3369 for (i=0; i<m; i++) { 3370 nz += ourlens[i]; 3371 } 3372 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3373 3374 /* receive message of column indices*/ 3375 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3376 } 3377 3378 /* determine column ownership if matrix is not square */ 3379 if (N != M) { 3380 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3381 else n = newMat->cmap->n; 3382 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3383 cstart = cend - n; 3384 } else { 3385 cstart = rstart; 3386 cend = rend; 3387 n = cend - cstart; 3388 } 3389 3390 /* loop over local rows, determining number of off diagonal entries */ 3391 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3392 jj = 0; 3393 for (i=0; i<m; i++) { 3394 for (j=0; j<ourlens[i]; j++) { 3395 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3396 jj++; 3397 } 3398 } 3399 3400 for (i=0; i<m; i++) { 3401 ourlens[i] -= offlens[i]; 3402 } 3403 if (!sizesset) { 3404 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3405 } 3406 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3407 3408 for (i=0; i<m; i++) { 3409 ourlens[i] += offlens[i]; 3410 } 3411 3412 if (!rank) { 3413 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3414 3415 /* read in my part of the matrix numerical values */ 3416 nz = procsnz[0]; 3417 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3418 3419 /* insert into matrix */ 3420 jj = rstart; 3421 smycols = mycols; 3422 svals = vals; 3423 for (i=0; i<m; i++) { 3424 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3425 smycols += ourlens[i]; 3426 svals += ourlens[i]; 3427 jj++; 3428 } 3429 3430 /* read in other processors and ship out */ 3431 for (i=1; i<size; i++) { 3432 nz = procsnz[i]; 3433 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3434 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3435 } 3436 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3437 } else { 3438 /* receive numeric values */ 3439 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3440 3441 /* receive message of values*/ 3442 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3443 3444 /* insert into matrix */ 3445 jj = rstart; 3446 smycols = mycols; 3447 svals = vals; 3448 for (i=0; i<m; i++) { 3449 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3450 smycols += ourlens[i]; 3451 svals += ourlens[i]; 3452 jj++; 3453 } 3454 } 3455 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3456 ierr = PetscFree(vals);CHKERRQ(ierr); 3457 ierr = PetscFree(mycols);CHKERRQ(ierr); 3458 ierr = PetscFree(rowners);CHKERRQ(ierr); 3459 3460 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3461 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3462 PetscFunctionReturn(0); 3463 } 3464 3465 #undef __FUNCT__ 3466 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3467 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3468 { 3469 PetscErrorCode ierr; 3470 IS iscol_local; 3471 PetscInt csize; 3472 3473 PetscFunctionBegin; 3474 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3475 if (call == MAT_REUSE_MATRIX) { 3476 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3477 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3478 } else { 3479 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3480 } 3481 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3482 if (call == MAT_INITIAL_MATRIX) { 3483 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3484 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3485 } 3486 PetscFunctionReturn(0); 3487 } 3488 3489 #undef __FUNCT__ 3490 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3491 /* 3492 Not great since it makes two copies of the submatrix, first an SeqAIJ 3493 in local and then by concatenating the local matrices the end result. 3494 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3495 3496 Note: This requires a sequential iscol with all indices. 3497 */ 3498 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3499 { 3500 PetscErrorCode ierr; 3501 PetscMPIInt rank,size; 3502 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3503 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3504 Mat *local,M,Mreuse; 3505 MatScalar *vwork,*aa; 3506 MPI_Comm comm = ((PetscObject)mat)->comm; 3507 Mat_SeqAIJ *aij; 3508 3509 3510 PetscFunctionBegin; 3511 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3512 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3513 3514 if (call == MAT_REUSE_MATRIX) { 3515 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3516 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3517 local = &Mreuse; 3518 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3519 } else { 3520 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3521 Mreuse = *local; 3522 ierr = PetscFree(local);CHKERRQ(ierr); 3523 } 3524 3525 /* 3526 m - number of local rows 3527 n - number of columns (same on all processors) 3528 rstart - first row in new global matrix generated 3529 */ 3530 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3531 if (call == MAT_INITIAL_MATRIX) { 3532 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3533 ii = aij->i; 3534 jj = aij->j; 3535 3536 /* 3537 Determine the number of non-zeros in the diagonal and off-diagonal 3538 portions of the matrix in order to do correct preallocation 3539 */ 3540 3541 /* first get start and end of "diagonal" columns */ 3542 if (csize == PETSC_DECIDE) { 3543 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3544 if (mglobal == n) { /* square matrix */ 3545 nlocal = m; 3546 } else { 3547 nlocal = n/size + ((n % size) > rank); 3548 } 3549 } else { 3550 nlocal = csize; 3551 } 3552 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3553 rstart = rend - nlocal; 3554 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3555 3556 /* next, compute all the lengths */ 3557 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3558 olens = dlens + m; 3559 for (i=0; i<m; i++) { 3560 jend = ii[i+1] - ii[i]; 3561 olen = 0; 3562 dlen = 0; 3563 for (j=0; j<jend; j++) { 3564 if (*jj < rstart || *jj >= rend) olen++; 3565 else dlen++; 3566 jj++; 3567 } 3568 olens[i] = olen; 3569 dlens[i] = dlen; 3570 } 3571 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3572 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3573 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3574 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3575 ierr = PetscFree(dlens);CHKERRQ(ierr); 3576 } else { 3577 PetscInt ml,nl; 3578 3579 M = *newmat; 3580 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3581 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3582 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3583 /* 3584 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3585 rather than the slower MatSetValues(). 3586 */ 3587 M->was_assembled = PETSC_TRUE; 3588 M->assembled = PETSC_FALSE; 3589 } 3590 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3591 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3592 ii = aij->i; 3593 jj = aij->j; 3594 aa = aij->a; 3595 for (i=0; i<m; i++) { 3596 row = rstart + i; 3597 nz = ii[i+1] - ii[i]; 3598 cwork = jj; jj += nz; 3599 vwork = aa; aa += nz; 3600 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3601 } 3602 3603 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3604 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3605 *newmat = M; 3606 3607 /* save submatrix used in processor for next request */ 3608 if (call == MAT_INITIAL_MATRIX) { 3609 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3610 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3611 } 3612 3613 PetscFunctionReturn(0); 3614 } 3615 3616 EXTERN_C_BEGIN 3617 #undef __FUNCT__ 3618 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3619 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3620 { 3621 PetscInt m,cstart, cend,j,nnz,i,d; 3622 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3623 const PetscInt *JJ; 3624 PetscScalar *values; 3625 PetscErrorCode ierr; 3626 3627 PetscFunctionBegin; 3628 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3629 3630 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3631 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3632 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3633 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3634 m = B->rmap->n; 3635 cstart = B->cmap->rstart; 3636 cend = B->cmap->rend; 3637 rstart = B->rmap->rstart; 3638 3639 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3640 3641 #if defined(PETSC_USE_DEBUGGING) 3642 for (i=0; i<m; i++) { 3643 nnz = Ii[i+1]- Ii[i]; 3644 JJ = J + Ii[i]; 3645 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3646 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3647 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3648 } 3649 #endif 3650 3651 for (i=0; i<m; i++) { 3652 nnz = Ii[i+1]- Ii[i]; 3653 JJ = J + Ii[i]; 3654 nnz_max = PetscMax(nnz_max,nnz); 3655 d = 0; 3656 for (j=0; j<nnz; j++) { 3657 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3658 } 3659 d_nnz[i] = d; 3660 o_nnz[i] = nnz - d; 3661 } 3662 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3663 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3664 3665 if (v) values = (PetscScalar*)v; 3666 else { 3667 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3668 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3669 } 3670 3671 for (i=0; i<m; i++) { 3672 ii = i + rstart; 3673 nnz = Ii[i+1]- Ii[i]; 3674 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3675 } 3676 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3677 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3678 3679 if (!v) { 3680 ierr = PetscFree(values);CHKERRQ(ierr); 3681 } 3682 PetscFunctionReturn(0); 3683 } 3684 EXTERN_C_END 3685 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3688 /*@ 3689 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3690 (the default parallel PETSc format). 3691 3692 Collective on MPI_Comm 3693 3694 Input Parameters: 3695 + B - the matrix 3696 . i - the indices into j for the start of each local row (starts with zero) 3697 . j - the column indices for each local row (starts with zero) 3698 - v - optional values in the matrix 3699 3700 Level: developer 3701 3702 Notes: 3703 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3704 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3705 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3706 3707 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3708 3709 The format which is used for the sparse matrix input, is equivalent to a 3710 row-major ordering.. i.e for the following matrix, the input data expected is 3711 as shown: 3712 3713 1 0 0 3714 2 0 3 P0 3715 ------- 3716 4 5 6 P1 3717 3718 Process0 [P0]: rows_owned=[0,1] 3719 i = {0,1,3} [size = nrow+1 = 2+1] 3720 j = {0,0,2} [size = nz = 6] 3721 v = {1,2,3} [size = nz = 6] 3722 3723 Process1 [P1]: rows_owned=[2] 3724 i = {0,3} [size = nrow+1 = 1+1] 3725 j = {0,1,2} [size = nz = 6] 3726 v = {4,5,6} [size = nz = 6] 3727 3728 .keywords: matrix, aij, compressed row, sparse, parallel 3729 3730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3731 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3732 @*/ 3733 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3734 { 3735 PetscErrorCode ierr; 3736 3737 PetscFunctionBegin; 3738 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3739 PetscFunctionReturn(0); 3740 } 3741 3742 #undef __FUNCT__ 3743 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3744 /*@C 3745 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3746 (the default parallel PETSc format). For good matrix assembly performance 3747 the user should preallocate the matrix storage by setting the parameters 3748 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3749 performance can be increased by more than a factor of 50. 3750 3751 Collective on MPI_Comm 3752 3753 Input Parameters: 3754 + A - the matrix 3755 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3756 (same value is used for all local rows) 3757 . d_nnz - array containing the number of nonzeros in the various rows of the 3758 DIAGONAL portion of the local submatrix (possibly different for each row) 3759 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3760 The size of this array is equal to the number of local rows, i.e 'm'. 3761 You must leave room for the diagonal entry even if it is zero. 3762 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3763 submatrix (same value is used for all local rows). 3764 - o_nnz - array containing the number of nonzeros in the various rows of the 3765 OFF-DIAGONAL portion of the local submatrix (possibly different for 3766 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3767 structure. The size of this array is equal to the number 3768 of local rows, i.e 'm'. 3769 3770 If the *_nnz parameter is given then the *_nz parameter is ignored 3771 3772 The AIJ format (also called the Yale sparse matrix format or 3773 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3774 storage. The stored row and column indices begin with zero. 3775 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3776 3777 The parallel matrix is partitioned such that the first m0 rows belong to 3778 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3779 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3780 3781 The DIAGONAL portion of the local submatrix of a processor can be defined 3782 as the submatrix which is obtained by extraction the part corresponding to 3783 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3784 first row that belongs to the processor, r2 is the last row belonging to 3785 the this processor, and c1-c2 is range of indices of the local part of a 3786 vector suitable for applying the matrix to. This is an mxn matrix. In the 3787 common case of a square matrix, the row and column ranges are the same and 3788 the DIAGONAL part is also square. The remaining portion of the local 3789 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3790 3791 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3792 3793 You can call MatGetInfo() to get information on how effective the preallocation was; 3794 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3795 You can also run with the option -info and look for messages with the string 3796 malloc in them to see if additional memory allocation was needed. 3797 3798 Example usage: 3799 3800 Consider the following 8x8 matrix with 34 non-zero values, that is 3801 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3802 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3803 as follows: 3804 3805 .vb 3806 1 2 0 | 0 3 0 | 0 4 3807 Proc0 0 5 6 | 7 0 0 | 8 0 3808 9 0 10 | 11 0 0 | 12 0 3809 ------------------------------------- 3810 13 0 14 | 15 16 17 | 0 0 3811 Proc1 0 18 0 | 19 20 21 | 0 0 3812 0 0 0 | 22 23 0 | 24 0 3813 ------------------------------------- 3814 Proc2 25 26 27 | 0 0 28 | 29 0 3815 30 0 0 | 31 32 33 | 0 34 3816 .ve 3817 3818 This can be represented as a collection of submatrices as: 3819 3820 .vb 3821 A B C 3822 D E F 3823 G H I 3824 .ve 3825 3826 Where the submatrices A,B,C are owned by proc0, D,E,F are 3827 owned by proc1, G,H,I are owned by proc2. 3828 3829 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3830 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3831 The 'M','N' parameters are 8,8, and have the same values on all procs. 3832 3833 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3834 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3835 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3836 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3837 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3838 matrix, ans [DF] as another SeqAIJ matrix. 3839 3840 When d_nz, o_nz parameters are specified, d_nz storage elements are 3841 allocated for every row of the local diagonal submatrix, and o_nz 3842 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3843 One way to choose d_nz and o_nz is to use the max nonzerors per local 3844 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3845 In this case, the values of d_nz,o_nz are: 3846 .vb 3847 proc0 : dnz = 2, o_nz = 2 3848 proc1 : dnz = 3, o_nz = 2 3849 proc2 : dnz = 1, o_nz = 4 3850 .ve 3851 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3852 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3853 for proc3. i.e we are using 12+15+10=37 storage locations to store 3854 34 values. 3855 3856 When d_nnz, o_nnz parameters are specified, the storage is specified 3857 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3858 In the above case the values for d_nnz,o_nnz are: 3859 .vb 3860 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3861 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3862 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3863 .ve 3864 Here the space allocated is sum of all the above values i.e 34, and 3865 hence pre-allocation is perfect. 3866 3867 Level: intermediate 3868 3869 .keywords: matrix, aij, compressed row, sparse, parallel 3870 3871 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3872 MPIAIJ, MatGetInfo() 3873 @*/ 3874 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3875 { 3876 PetscErrorCode ierr; 3877 3878 PetscFunctionBegin; 3879 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3880 PetscFunctionReturn(0); 3881 } 3882 3883 #undef __FUNCT__ 3884 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3885 /*@ 3886 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3887 CSR format the local rows. 3888 3889 Collective on MPI_Comm 3890 3891 Input Parameters: 3892 + comm - MPI communicator 3893 . m - number of local rows (Cannot be PETSC_DECIDE) 3894 . n - This value should be the same as the local size used in creating the 3895 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3896 calculated if N is given) For square matrices n is almost always m. 3897 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3898 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3899 . i - row indices 3900 . j - column indices 3901 - a - matrix values 3902 3903 Output Parameter: 3904 . mat - the matrix 3905 3906 Level: intermediate 3907 3908 Notes: 3909 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3910 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3911 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3912 3913 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3914 3915 The format which is used for the sparse matrix input, is equivalent to a 3916 row-major ordering.. i.e for the following matrix, the input data expected is 3917 as shown: 3918 3919 1 0 0 3920 2 0 3 P0 3921 ------- 3922 4 5 6 P1 3923 3924 Process0 [P0]: rows_owned=[0,1] 3925 i = {0,1,3} [size = nrow+1 = 2+1] 3926 j = {0,0,2} [size = nz = 6] 3927 v = {1,2,3} [size = nz = 6] 3928 3929 Process1 [P1]: rows_owned=[2] 3930 i = {0,3} [size = nrow+1 = 1+1] 3931 j = {0,1,2} [size = nz = 6] 3932 v = {4,5,6} [size = nz = 6] 3933 3934 .keywords: matrix, aij, compressed row, sparse, parallel 3935 3936 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3937 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3938 @*/ 3939 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3940 { 3941 PetscErrorCode ierr; 3942 3943 PetscFunctionBegin; 3944 if (i[0]) { 3945 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3946 } 3947 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3948 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3949 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3950 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3951 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3952 PetscFunctionReturn(0); 3953 } 3954 3955 #undef __FUNCT__ 3956 #define __FUNCT__ "MatCreateMPIAIJ" 3957 /*@C 3958 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3959 (the default parallel PETSc format). For good matrix assembly performance 3960 the user should preallocate the matrix storage by setting the parameters 3961 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3962 performance can be increased by more than a factor of 50. 3963 3964 Collective on MPI_Comm 3965 3966 Input Parameters: 3967 + comm - MPI communicator 3968 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3969 This value should be the same as the local size used in creating the 3970 y vector for the matrix-vector product y = Ax. 3971 . n - This value should be the same as the local size used in creating the 3972 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3973 calculated if N is given) For square matrices n is almost always m. 3974 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3975 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3976 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3977 (same value is used for all local rows) 3978 . d_nnz - array containing the number of nonzeros in the various rows of the 3979 DIAGONAL portion of the local submatrix (possibly different for each row) 3980 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3981 The size of this array is equal to the number of local rows, i.e 'm'. 3982 You must leave room for the diagonal entry even if it is zero. 3983 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3984 submatrix (same value is used for all local rows). 3985 - o_nnz - array containing the number of nonzeros in the various rows of the 3986 OFF-DIAGONAL portion of the local submatrix (possibly different for 3987 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3988 structure. The size of this array is equal to the number 3989 of local rows, i.e 'm'. 3990 3991 Output Parameter: 3992 . A - the matrix 3993 3994 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3995 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3996 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3997 3998 Notes: 3999 If the *_nnz parameter is given then the *_nz parameter is ignored 4000 4001 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4002 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4003 storage requirements for this matrix. 4004 4005 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4006 processor than it must be used on all processors that share the object for 4007 that argument. 4008 4009 The user MUST specify either the local or global matrix dimensions 4010 (possibly both). 4011 4012 The parallel matrix is partitioned across processors such that the 4013 first m0 rows belong to process 0, the next m1 rows belong to 4014 process 1, the next m2 rows belong to process 2 etc.. where 4015 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4016 values corresponding to [m x N] submatrix. 4017 4018 The columns are logically partitioned with the n0 columns belonging 4019 to 0th partition, the next n1 columns belonging to the next 4020 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4021 4022 The DIAGONAL portion of the local submatrix on any given processor 4023 is the submatrix corresponding to the rows and columns m,n 4024 corresponding to the given processor. i.e diagonal matrix on 4025 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4026 etc. The remaining portion of the local submatrix [m x (N-n)] 4027 constitute the OFF-DIAGONAL portion. The example below better 4028 illustrates this concept. 4029 4030 For a square global matrix we define each processor's diagonal portion 4031 to be its local rows and the corresponding columns (a square submatrix); 4032 each processor's off-diagonal portion encompasses the remainder of the 4033 local matrix (a rectangular submatrix). 4034 4035 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4036 4037 When calling this routine with a single process communicator, a matrix of 4038 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4039 type of communicator, use the construction mechanism: 4040 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4041 4042 By default, this format uses inodes (identical nodes) when possible. 4043 We search for consecutive rows with the same nonzero structure, thereby 4044 reusing matrix information to achieve increased efficiency. 4045 4046 Options Database Keys: 4047 + -mat_no_inode - Do not use inodes 4048 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4049 - -mat_aij_oneindex - Internally use indexing starting at 1 4050 rather than 0. Note that when calling MatSetValues(), 4051 the user still MUST index entries starting at 0! 4052 4053 4054 Example usage: 4055 4056 Consider the following 8x8 matrix with 34 non-zero values, that is 4057 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4058 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4059 as follows: 4060 4061 .vb 4062 1 2 0 | 0 3 0 | 0 4 4063 Proc0 0 5 6 | 7 0 0 | 8 0 4064 9 0 10 | 11 0 0 | 12 0 4065 ------------------------------------- 4066 13 0 14 | 15 16 17 | 0 0 4067 Proc1 0 18 0 | 19 20 21 | 0 0 4068 0 0 0 | 22 23 0 | 24 0 4069 ------------------------------------- 4070 Proc2 25 26 27 | 0 0 28 | 29 0 4071 30 0 0 | 31 32 33 | 0 34 4072 .ve 4073 4074 This can be represented as a collection of submatrices as: 4075 4076 .vb 4077 A B C 4078 D E F 4079 G H I 4080 .ve 4081 4082 Where the submatrices A,B,C are owned by proc0, D,E,F are 4083 owned by proc1, G,H,I are owned by proc2. 4084 4085 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4086 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4087 The 'M','N' parameters are 8,8, and have the same values on all procs. 4088 4089 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4090 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4091 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4092 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4093 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4094 matrix, ans [DF] as another SeqAIJ matrix. 4095 4096 When d_nz, o_nz parameters are specified, d_nz storage elements are 4097 allocated for every row of the local diagonal submatrix, and o_nz 4098 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4099 One way to choose d_nz and o_nz is to use the max nonzerors per local 4100 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4101 In this case, the values of d_nz,o_nz are: 4102 .vb 4103 proc0 : dnz = 2, o_nz = 2 4104 proc1 : dnz = 3, o_nz = 2 4105 proc2 : dnz = 1, o_nz = 4 4106 .ve 4107 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4108 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4109 for proc3. i.e we are using 12+15+10=37 storage locations to store 4110 34 values. 4111 4112 When d_nnz, o_nnz parameters are specified, the storage is specified 4113 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4114 In the above case the values for d_nnz,o_nnz are: 4115 .vb 4116 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4117 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4118 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4119 .ve 4120 Here the space allocated is sum of all the above values i.e 34, and 4121 hence pre-allocation is perfect. 4122 4123 Level: intermediate 4124 4125 .keywords: matrix, aij, compressed row, sparse, parallel 4126 4127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4128 MPIAIJ, MatCreateMPIAIJWithArrays() 4129 @*/ 4130 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4131 { 4132 PetscErrorCode ierr; 4133 PetscMPIInt size; 4134 4135 PetscFunctionBegin; 4136 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4137 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4138 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4139 if (size > 1) { 4140 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4141 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4142 } else { 4143 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4144 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4145 } 4146 PetscFunctionReturn(0); 4147 } 4148 4149 #undef __FUNCT__ 4150 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4151 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4152 { 4153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4154 4155 PetscFunctionBegin; 4156 *Ad = a->A; 4157 *Ao = a->B; 4158 *colmap = a->garray; 4159 PetscFunctionReturn(0); 4160 } 4161 4162 #undef __FUNCT__ 4163 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4164 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4165 { 4166 PetscErrorCode ierr; 4167 PetscInt i; 4168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4169 4170 PetscFunctionBegin; 4171 if (coloring->ctype == IS_COLORING_GLOBAL) { 4172 ISColoringValue *allcolors,*colors; 4173 ISColoring ocoloring; 4174 4175 /* set coloring for diagonal portion */ 4176 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4177 4178 /* set coloring for off-diagonal portion */ 4179 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4180 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4181 for (i=0; i<a->B->cmap->n; i++) { 4182 colors[i] = allcolors[a->garray[i]]; 4183 } 4184 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4185 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4186 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4187 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4188 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4189 ISColoringValue *colors; 4190 PetscInt *larray; 4191 ISColoring ocoloring; 4192 4193 /* set coloring for diagonal portion */ 4194 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4195 for (i=0; i<a->A->cmap->n; i++) { 4196 larray[i] = i + A->cmap->rstart; 4197 } 4198 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4199 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4200 for (i=0; i<a->A->cmap->n; i++) { 4201 colors[i] = coloring->colors[larray[i]]; 4202 } 4203 ierr = PetscFree(larray);CHKERRQ(ierr); 4204 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4205 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4206 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4207 4208 /* set coloring for off-diagonal portion */ 4209 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4210 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4211 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4212 for (i=0; i<a->B->cmap->n; i++) { 4213 colors[i] = coloring->colors[larray[i]]; 4214 } 4215 ierr = PetscFree(larray);CHKERRQ(ierr); 4216 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4217 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4218 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4219 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4220 4221 PetscFunctionReturn(0); 4222 } 4223 4224 #if defined(PETSC_HAVE_ADIC) 4225 #undef __FUNCT__ 4226 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4227 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4228 { 4229 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4230 PetscErrorCode ierr; 4231 4232 PetscFunctionBegin; 4233 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4234 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4235 PetscFunctionReturn(0); 4236 } 4237 #endif 4238 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4241 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4242 { 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 PetscErrorCode ierr; 4245 4246 PetscFunctionBegin; 4247 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4248 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4249 PetscFunctionReturn(0); 4250 } 4251 4252 #undef __FUNCT__ 4253 #define __FUNCT__ "MatMerge" 4254 /*@ 4255 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4256 matrices from each processor 4257 4258 Collective on MPI_Comm 4259 4260 Input Parameters: 4261 + comm - the communicators the parallel matrix will live on 4262 . inmat - the input sequential matrices 4263 . n - number of local columns (or PETSC_DECIDE) 4264 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4265 4266 Output Parameter: 4267 . outmat - the parallel matrix generated 4268 4269 Level: advanced 4270 4271 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4272 4273 @*/ 4274 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4275 { 4276 PetscErrorCode ierr; 4277 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4278 PetscInt *indx; 4279 PetscScalar *values; 4280 4281 PetscFunctionBegin; 4282 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4283 if (scall == MAT_INITIAL_MATRIX){ 4284 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4285 if (n == PETSC_DECIDE){ 4286 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4287 } 4288 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4289 rstart -= m; 4290 4291 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4292 for (i=0;i<m;i++) { 4293 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4294 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4295 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4296 } 4297 /* This routine will ONLY return MPIAIJ type matrix */ 4298 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4299 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4300 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4301 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4302 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4303 4304 } else if (scall == MAT_REUSE_MATRIX){ 4305 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4306 } else { 4307 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4308 } 4309 4310 for (i=0;i<m;i++) { 4311 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4312 Ii = i + rstart; 4313 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4314 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4315 } 4316 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4317 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4318 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4319 4320 PetscFunctionReturn(0); 4321 } 4322 4323 #undef __FUNCT__ 4324 #define __FUNCT__ "MatFileSplit" 4325 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4326 { 4327 PetscErrorCode ierr; 4328 PetscMPIInt rank; 4329 PetscInt m,N,i,rstart,nnz; 4330 size_t len; 4331 const PetscInt *indx; 4332 PetscViewer out; 4333 char *name; 4334 Mat B; 4335 const PetscScalar *values; 4336 4337 PetscFunctionBegin; 4338 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4339 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4340 /* Should this be the type of the diagonal block of A? */ 4341 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4342 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4343 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4344 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4345 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4346 for (i=0;i<m;i++) { 4347 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4348 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4349 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4350 } 4351 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4352 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4353 4354 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4355 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4356 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4357 sprintf(name,"%s.%d",outfile,rank); 4358 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4359 ierr = PetscFree(name); 4360 ierr = MatView(B,out);CHKERRQ(ierr); 4361 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4362 ierr = MatDestroy(&B);CHKERRQ(ierr); 4363 PetscFunctionReturn(0); 4364 } 4365 4366 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4367 #undef __FUNCT__ 4368 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4369 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4370 { 4371 PetscErrorCode ierr; 4372 Mat_Merge_SeqsToMPI *merge; 4373 PetscContainer container; 4374 4375 PetscFunctionBegin; 4376 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4377 if (container) { 4378 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4379 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4380 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4381 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4382 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4383 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4384 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4385 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4386 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4387 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4388 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4389 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4390 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4391 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4392 ierr = PetscFree(merge);CHKERRQ(ierr); 4393 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4394 } 4395 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4396 PetscFunctionReturn(0); 4397 } 4398 4399 #include <../src/mat/utils/freespace.h> 4400 #include <petscbt.h> 4401 4402 #undef __FUNCT__ 4403 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4404 /*@C 4405 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4406 matrices from each processor 4407 4408 Collective on MPI_Comm 4409 4410 Input Parameters: 4411 + comm - the communicators the parallel matrix will live on 4412 . seqmat - the input sequential matrices 4413 . m - number of local rows (or PETSC_DECIDE) 4414 . n - number of local columns (or PETSC_DECIDE) 4415 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4416 4417 Output Parameter: 4418 . mpimat - the parallel matrix generated 4419 4420 Level: advanced 4421 4422 Notes: 4423 The dimensions of the sequential matrix in each processor MUST be the same. 4424 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4425 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4426 @*/ 4427 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4428 { 4429 PetscErrorCode ierr; 4430 MPI_Comm comm=((PetscObject)mpimat)->comm; 4431 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4432 PetscMPIInt size,rank,taga,*len_s; 4433 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4434 PetscInt proc,m; 4435 PetscInt **buf_ri,**buf_rj; 4436 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4437 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4438 MPI_Request *s_waits,*r_waits; 4439 MPI_Status *status; 4440 MatScalar *aa=a->a; 4441 MatScalar **abuf_r,*ba_i; 4442 Mat_Merge_SeqsToMPI *merge; 4443 PetscContainer container; 4444 4445 PetscFunctionBegin; 4446 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4447 4448 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4449 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4450 4451 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4452 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4453 4454 bi = merge->bi; 4455 bj = merge->bj; 4456 buf_ri = merge->buf_ri; 4457 buf_rj = merge->buf_rj; 4458 4459 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4460 owners = merge->rowmap->range; 4461 len_s = merge->len_s; 4462 4463 /* send and recv matrix values */ 4464 /*-----------------------------*/ 4465 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4466 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4467 4468 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4469 for (proc=0,k=0; proc<size; proc++){ 4470 if (!len_s[proc]) continue; 4471 i = owners[proc]; 4472 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4473 k++; 4474 } 4475 4476 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4477 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4478 ierr = PetscFree(status);CHKERRQ(ierr); 4479 4480 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4481 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4482 4483 /* insert mat values of mpimat */ 4484 /*----------------------------*/ 4485 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4486 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4487 4488 for (k=0; k<merge->nrecv; k++){ 4489 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4490 nrows = *(buf_ri_k[k]); 4491 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4492 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4493 } 4494 4495 /* set values of ba */ 4496 m = merge->rowmap->n; 4497 for (i=0; i<m; i++) { 4498 arow = owners[rank] + i; 4499 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4500 bnzi = bi[i+1] - bi[i]; 4501 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4502 4503 /* add local non-zero vals of this proc's seqmat into ba */ 4504 anzi = ai[arow+1] - ai[arow]; 4505 aj = a->j + ai[arow]; 4506 aa = a->a + ai[arow]; 4507 nextaj = 0; 4508 for (j=0; nextaj<anzi; j++){ 4509 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4510 ba_i[j] += aa[nextaj++]; 4511 } 4512 } 4513 4514 /* add received vals into ba */ 4515 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4516 /* i-th row */ 4517 if (i == *nextrow[k]) { 4518 anzi = *(nextai[k]+1) - *nextai[k]; 4519 aj = buf_rj[k] + *(nextai[k]); 4520 aa = abuf_r[k] + *(nextai[k]); 4521 nextaj = 0; 4522 for (j=0; nextaj<anzi; j++){ 4523 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4524 ba_i[j] += aa[nextaj++]; 4525 } 4526 } 4527 nextrow[k]++; nextai[k]++; 4528 } 4529 } 4530 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4531 } 4532 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4533 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4534 4535 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4536 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4537 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4538 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4539 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4540 PetscFunctionReturn(0); 4541 } 4542 4543 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4544 4545 #undef __FUNCT__ 4546 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4547 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4548 { 4549 PetscErrorCode ierr; 4550 Mat B_mpi; 4551 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4552 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4553 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4554 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4555 PetscInt len,proc,*dnz,*onz; 4556 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4557 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4558 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4559 MPI_Status *status; 4560 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4561 PetscBT lnkbt; 4562 Mat_Merge_SeqsToMPI *merge; 4563 PetscContainer container; 4564 4565 PetscFunctionBegin; 4566 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4567 4568 /* make sure it is a PETSc comm */ 4569 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4571 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4572 4573 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4574 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4575 4576 /* determine row ownership */ 4577 /*---------------------------------------------------------*/ 4578 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4579 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4580 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4581 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4582 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4583 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4584 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4585 4586 m = merge->rowmap->n; 4587 M = merge->rowmap->N; 4588 owners = merge->rowmap->range; 4589 4590 /* determine the number of messages to send, their lengths */ 4591 /*---------------------------------------------------------*/ 4592 len_s = merge->len_s; 4593 4594 len = 0; /* length of buf_si[] */ 4595 merge->nsend = 0; 4596 for (proc=0; proc<size; proc++){ 4597 len_si[proc] = 0; 4598 if (proc == rank){ 4599 len_s[proc] = 0; 4600 } else { 4601 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4602 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4603 } 4604 if (len_s[proc]) { 4605 merge->nsend++; 4606 nrows = 0; 4607 for (i=owners[proc]; i<owners[proc+1]; i++){ 4608 if (ai[i+1] > ai[i]) nrows++; 4609 } 4610 len_si[proc] = 2*(nrows+1); 4611 len += len_si[proc]; 4612 } 4613 } 4614 4615 /* determine the number and length of messages to receive for ij-structure */ 4616 /*-------------------------------------------------------------------------*/ 4617 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4618 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4619 4620 /* post the Irecv of j-structure */ 4621 /*-------------------------------*/ 4622 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4623 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4624 4625 /* post the Isend of j-structure */ 4626 /*--------------------------------*/ 4627 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4628 4629 for (proc=0, k=0; proc<size; proc++){ 4630 if (!len_s[proc]) continue; 4631 i = owners[proc]; 4632 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4633 k++; 4634 } 4635 4636 /* receives and sends of j-structure are complete */ 4637 /*------------------------------------------------*/ 4638 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4639 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4640 4641 /* send and recv i-structure */ 4642 /*---------------------------*/ 4643 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4644 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4645 4646 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4647 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4648 for (proc=0,k=0; proc<size; proc++){ 4649 if (!len_s[proc]) continue; 4650 /* form outgoing message for i-structure: 4651 buf_si[0]: nrows to be sent 4652 [1:nrows]: row index (global) 4653 [nrows+1:2*nrows+1]: i-structure index 4654 */ 4655 /*-------------------------------------------*/ 4656 nrows = len_si[proc]/2 - 1; 4657 buf_si_i = buf_si + nrows+1; 4658 buf_si[0] = nrows; 4659 buf_si_i[0] = 0; 4660 nrows = 0; 4661 for (i=owners[proc]; i<owners[proc+1]; i++){ 4662 anzi = ai[i+1] - ai[i]; 4663 if (anzi) { 4664 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4665 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4666 nrows++; 4667 } 4668 } 4669 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4670 k++; 4671 buf_si += len_si[proc]; 4672 } 4673 4674 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4675 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4676 4677 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4678 for (i=0; i<merge->nrecv; i++){ 4679 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4680 } 4681 4682 ierr = PetscFree(len_si);CHKERRQ(ierr); 4683 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4684 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4685 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4686 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4687 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4688 ierr = PetscFree(status);CHKERRQ(ierr); 4689 4690 /* compute a local seq matrix in each processor */ 4691 /*----------------------------------------------*/ 4692 /* allocate bi array and free space for accumulating nonzero column info */ 4693 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4694 bi[0] = 0; 4695 4696 /* create and initialize a linked list */ 4697 nlnk = N+1; 4698 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4699 4700 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4701 len = 0; 4702 len = ai[owners[rank+1]] - ai[owners[rank]]; 4703 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4704 current_space = free_space; 4705 4706 /* determine symbolic info for each local row */ 4707 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4708 4709 for (k=0; k<merge->nrecv; k++){ 4710 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4711 nrows = *buf_ri_k[k]; 4712 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4713 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4714 } 4715 4716 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4717 len = 0; 4718 for (i=0;i<m;i++) { 4719 bnzi = 0; 4720 /* add local non-zero cols of this proc's seqmat into lnk */ 4721 arow = owners[rank] + i; 4722 anzi = ai[arow+1] - ai[arow]; 4723 aj = a->j + ai[arow]; 4724 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4725 bnzi += nlnk; 4726 /* add received col data into lnk */ 4727 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4728 if (i == *nextrow[k]) { /* i-th row */ 4729 anzi = *(nextai[k]+1) - *nextai[k]; 4730 aj = buf_rj[k] + *nextai[k]; 4731 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4732 bnzi += nlnk; 4733 nextrow[k]++; nextai[k]++; 4734 } 4735 } 4736 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4737 4738 /* if free space is not available, make more free space */ 4739 if (current_space->local_remaining<bnzi) { 4740 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4741 nspacedouble++; 4742 } 4743 /* copy data into free space, then initialize lnk */ 4744 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4745 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4746 4747 current_space->array += bnzi; 4748 current_space->local_used += bnzi; 4749 current_space->local_remaining -= bnzi; 4750 4751 bi[i+1] = bi[i] + bnzi; 4752 } 4753 4754 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4755 4756 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4757 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4758 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4759 4760 /* create symbolic parallel matrix B_mpi */ 4761 /*---------------------------------------*/ 4762 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4763 if (n==PETSC_DECIDE) { 4764 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4765 } else { 4766 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4767 } 4768 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4769 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4770 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4771 4772 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4773 B_mpi->assembled = PETSC_FALSE; 4774 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4775 merge->bi = bi; 4776 merge->bj = bj; 4777 merge->buf_ri = buf_ri; 4778 merge->buf_rj = buf_rj; 4779 merge->coi = PETSC_NULL; 4780 merge->coj = PETSC_NULL; 4781 merge->owners_co = PETSC_NULL; 4782 4783 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4784 4785 /* attach the supporting struct to B_mpi for reuse */ 4786 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4787 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4788 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4789 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4790 *mpimat = B_mpi; 4791 4792 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4793 PetscFunctionReturn(0); 4794 } 4795 4796 #undef __FUNCT__ 4797 #define __FUNCT__ "MatMerge_SeqsToMPI" 4798 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4799 { 4800 PetscErrorCode ierr; 4801 4802 PetscFunctionBegin; 4803 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4804 if (scall == MAT_INITIAL_MATRIX){ 4805 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4806 } 4807 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4808 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4809 PetscFunctionReturn(0); 4810 } 4811 4812 #undef __FUNCT__ 4813 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4814 /*@ 4815 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4816 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4817 with MatGetSize() 4818 4819 Not Collective 4820 4821 Input Parameters: 4822 + A - the matrix 4823 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4824 4825 Output Parameter: 4826 . A_loc - the local sequential matrix generated 4827 4828 Level: developer 4829 4830 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4831 4832 @*/ 4833 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4834 { 4835 PetscErrorCode ierr; 4836 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4837 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4838 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4839 MatScalar *aa=a->a,*ba=b->a,*cam; 4840 PetscScalar *ca; 4841 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4842 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4843 PetscBool match; 4844 4845 PetscFunctionBegin; 4846 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4847 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4848 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4849 if (scall == MAT_INITIAL_MATRIX){ 4850 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4851 ci[0] = 0; 4852 for (i=0; i<am; i++){ 4853 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4854 } 4855 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4856 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4857 k = 0; 4858 for (i=0; i<am; i++) { 4859 ncols_o = bi[i+1] - bi[i]; 4860 ncols_d = ai[i+1] - ai[i]; 4861 /* off-diagonal portion of A */ 4862 for (jo=0; jo<ncols_o; jo++) { 4863 col = cmap[*bj]; 4864 if (col >= cstart) break; 4865 cj[k] = col; bj++; 4866 ca[k++] = *ba++; 4867 } 4868 /* diagonal portion of A */ 4869 for (j=0; j<ncols_d; j++) { 4870 cj[k] = cstart + *aj++; 4871 ca[k++] = *aa++; 4872 } 4873 /* off-diagonal portion of A */ 4874 for (j=jo; j<ncols_o; j++) { 4875 cj[k] = cmap[*bj++]; 4876 ca[k++] = *ba++; 4877 } 4878 } 4879 /* put together the new matrix */ 4880 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4881 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4882 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4883 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4884 mat->free_a = PETSC_TRUE; 4885 mat->free_ij = PETSC_TRUE; 4886 mat->nonew = 0; 4887 } else if (scall == MAT_REUSE_MATRIX){ 4888 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4889 ci = mat->i; cj = mat->j; cam = mat->a; 4890 for (i=0; i<am; i++) { 4891 /* off-diagonal portion of A */ 4892 ncols_o = bi[i+1] - bi[i]; 4893 for (jo=0; jo<ncols_o; jo++) { 4894 col = cmap[*bj]; 4895 if (col >= cstart) break; 4896 *cam++ = *ba++; bj++; 4897 } 4898 /* diagonal portion of A */ 4899 ncols_d = ai[i+1] - ai[i]; 4900 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4901 /* off-diagonal portion of A */ 4902 for (j=jo; j<ncols_o; j++) { 4903 *cam++ = *ba++; bj++; 4904 } 4905 } 4906 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4907 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4908 PetscFunctionReturn(0); 4909 } 4910 4911 #undef __FUNCT__ 4912 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4913 /*@C 4914 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4915 4916 Not Collective 4917 4918 Input Parameters: 4919 + A - the matrix 4920 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4921 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4922 4923 Output Parameter: 4924 . A_loc - the local sequential matrix generated 4925 4926 Level: developer 4927 4928 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4929 4930 @*/ 4931 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4932 { 4933 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4934 PetscErrorCode ierr; 4935 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4936 IS isrowa,iscola; 4937 Mat *aloc; 4938 PetscBool match; 4939 4940 PetscFunctionBegin; 4941 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4942 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4943 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4944 if (!row){ 4945 start = A->rmap->rstart; end = A->rmap->rend; 4946 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4947 } else { 4948 isrowa = *row; 4949 } 4950 if (!col){ 4951 start = A->cmap->rstart; 4952 cmap = a->garray; 4953 nzA = a->A->cmap->n; 4954 nzB = a->B->cmap->n; 4955 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4956 ncols = 0; 4957 for (i=0; i<nzB; i++) { 4958 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4959 else break; 4960 } 4961 imark = i; 4962 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4963 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4964 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4965 } else { 4966 iscola = *col; 4967 } 4968 if (scall != MAT_INITIAL_MATRIX){ 4969 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4970 aloc[0] = *A_loc; 4971 } 4972 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4973 *A_loc = aloc[0]; 4974 ierr = PetscFree(aloc);CHKERRQ(ierr); 4975 if (!row){ 4976 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4977 } 4978 if (!col){ 4979 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4980 } 4981 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4982 PetscFunctionReturn(0); 4983 } 4984 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "MatGetBrowsOfAcols" 4987 /*@C 4988 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4989 4990 Collective on Mat 4991 4992 Input Parameters: 4993 + A,B - the matrices in mpiaij format 4994 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4995 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4996 4997 Output Parameter: 4998 + rowb, colb - index sets of rows and columns of B to extract 4999 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5000 - B_seq - the sequential matrix generated 5001 5002 Level: developer 5003 5004 @*/ 5005 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5006 { 5007 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5008 PetscErrorCode ierr; 5009 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5010 IS isrowb,iscolb; 5011 Mat *bseq; 5012 5013 PetscFunctionBegin; 5014 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5015 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5016 } 5017 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5018 5019 if (scall == MAT_INITIAL_MATRIX){ 5020 start = A->cmap->rstart; 5021 cmap = a->garray; 5022 nzA = a->A->cmap->n; 5023 nzB = a->B->cmap->n; 5024 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5025 ncols = 0; 5026 for (i=0; i<nzB; i++) { /* row < local row index */ 5027 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5028 else break; 5029 } 5030 imark = i; 5031 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5032 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5033 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5034 *brstart = imark; 5035 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5036 } else { 5037 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5038 isrowb = *rowb; iscolb = *colb; 5039 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5040 bseq[0] = *B_seq; 5041 } 5042 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5043 *B_seq = bseq[0]; 5044 ierr = PetscFree(bseq);CHKERRQ(ierr); 5045 if (!rowb){ 5046 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5047 } else { 5048 *rowb = isrowb; 5049 } 5050 if (!colb){ 5051 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5052 } else { 5053 *colb = iscolb; 5054 } 5055 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5056 PetscFunctionReturn(0); 5057 } 5058 5059 #undef __FUNCT__ 5060 #define __FUNCT__ "MatGetBrowsOfAoCols" 5061 /*@C 5062 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5063 of the OFF-DIAGONAL portion of local A 5064 5065 Collective on Mat 5066 5067 Input Parameters: 5068 + A,B - the matrices in mpiaij format 5069 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5070 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5071 . startsj_r - similar to startsj for receives 5072 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5073 5074 Output Parameter: 5075 + B_oth - the sequential matrix generated 5076 5077 Level: developer 5078 5079 @*/ 5080 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5081 { 5082 VecScatter_MPI_General *gen_to,*gen_from; 5083 PetscErrorCode ierr; 5084 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5085 Mat_SeqAIJ *b_oth; 5086 VecScatter ctx=a->Mvctx; 5087 MPI_Comm comm=((PetscObject)ctx)->comm; 5088 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5089 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5090 PetscScalar *rvalues,*svalues; 5091 MatScalar *b_otha,*bufa,*bufA; 5092 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5093 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5094 MPI_Status *sstatus,rstatus; 5095 PetscMPIInt jj; 5096 PetscInt *cols,sbs,rbs; 5097 PetscScalar *vals; 5098 5099 PetscFunctionBegin; 5100 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5101 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5102 } 5103 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5104 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5105 5106 gen_to = (VecScatter_MPI_General*)ctx->todata; 5107 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5108 rvalues = gen_from->values; /* holds the length of receiving row */ 5109 svalues = gen_to->values; /* holds the length of sending row */ 5110 nrecvs = gen_from->n; 5111 nsends = gen_to->n; 5112 5113 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5114 srow = gen_to->indices; /* local row index to be sent */ 5115 sstarts = gen_to->starts; 5116 sprocs = gen_to->procs; 5117 sstatus = gen_to->sstatus; 5118 sbs = gen_to->bs; 5119 rstarts = gen_from->starts; 5120 rprocs = gen_from->procs; 5121 rbs = gen_from->bs; 5122 5123 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5124 if (scall == MAT_INITIAL_MATRIX){ 5125 /* i-array */ 5126 /*---------*/ 5127 /* post receives */ 5128 for (i=0; i<nrecvs; i++){ 5129 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5130 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5131 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5132 } 5133 5134 /* pack the outgoing message */ 5135 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5136 sstartsj[0] = 0; rstartsj[0] = 0; 5137 len = 0; /* total length of j or a array to be sent */ 5138 k = 0; 5139 for (i=0; i<nsends; i++){ 5140 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5141 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5142 for (j=0; j<nrows; j++) { 5143 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5144 for (l=0; l<sbs; l++){ 5145 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5146 rowlen[j*sbs+l] = ncols; 5147 len += ncols; 5148 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5149 } 5150 k++; 5151 } 5152 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5153 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5154 } 5155 /* recvs and sends of i-array are completed */ 5156 i = nrecvs; 5157 while (i--) { 5158 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5159 } 5160 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5161 5162 /* allocate buffers for sending j and a arrays */ 5163 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5164 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5165 5166 /* create i-array of B_oth */ 5167 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5168 b_othi[0] = 0; 5169 len = 0; /* total length of j or a array to be received */ 5170 k = 0; 5171 for (i=0; i<nrecvs; i++){ 5172 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5173 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5174 for (j=0; j<nrows; j++) { 5175 b_othi[k+1] = b_othi[k] + rowlen[j]; 5176 len += rowlen[j]; k++; 5177 } 5178 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5179 } 5180 5181 /* allocate space for j and a arrrays of B_oth */ 5182 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5183 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5184 5185 /* j-array */ 5186 /*---------*/ 5187 /* post receives of j-array */ 5188 for (i=0; i<nrecvs; i++){ 5189 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5190 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5191 } 5192 5193 /* pack the outgoing message j-array */ 5194 k = 0; 5195 for (i=0; i<nsends; i++){ 5196 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5197 bufJ = bufj+sstartsj[i]; 5198 for (j=0; j<nrows; j++) { 5199 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5200 for (ll=0; ll<sbs; ll++){ 5201 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5202 for (l=0; l<ncols; l++){ 5203 *bufJ++ = cols[l]; 5204 } 5205 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5206 } 5207 } 5208 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5209 } 5210 5211 /* recvs and sends of j-array are completed */ 5212 i = nrecvs; 5213 while (i--) { 5214 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5215 } 5216 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5217 } else if (scall == MAT_REUSE_MATRIX){ 5218 sstartsj = *startsj; 5219 rstartsj = *startsj_r; 5220 bufa = *bufa_ptr; 5221 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5222 b_otha = b_oth->a; 5223 } else { 5224 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5225 } 5226 5227 /* a-array */ 5228 /*---------*/ 5229 /* post receives of a-array */ 5230 for (i=0; i<nrecvs; i++){ 5231 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5232 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5233 } 5234 5235 /* pack the outgoing message a-array */ 5236 k = 0; 5237 for (i=0; i<nsends; i++){ 5238 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5239 bufA = bufa+sstartsj[i]; 5240 for (j=0; j<nrows; j++) { 5241 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5242 for (ll=0; ll<sbs; ll++){ 5243 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5244 for (l=0; l<ncols; l++){ 5245 *bufA++ = vals[l]; 5246 } 5247 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5248 } 5249 } 5250 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5251 } 5252 /* recvs and sends of a-array are completed */ 5253 i = nrecvs; 5254 while (i--) { 5255 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5256 } 5257 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5258 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5259 5260 if (scall == MAT_INITIAL_MATRIX){ 5261 /* put together the new matrix */ 5262 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5263 5264 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5265 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5266 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5267 b_oth->free_a = PETSC_TRUE; 5268 b_oth->free_ij = PETSC_TRUE; 5269 b_oth->nonew = 0; 5270 5271 ierr = PetscFree(bufj);CHKERRQ(ierr); 5272 if (!startsj || !bufa_ptr){ 5273 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5274 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5275 } else { 5276 *startsj = sstartsj; 5277 *startsj_r = rstartsj; 5278 *bufa_ptr = bufa; 5279 } 5280 } 5281 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5282 PetscFunctionReturn(0); 5283 } 5284 5285 #undef __FUNCT__ 5286 #define __FUNCT__ "MatGetCommunicationStructs" 5287 /*@C 5288 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5289 5290 Not Collective 5291 5292 Input Parameters: 5293 . A - The matrix in mpiaij format 5294 5295 Output Parameter: 5296 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5297 . colmap - A map from global column index to local index into lvec 5298 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5299 5300 Level: developer 5301 5302 @*/ 5303 #if defined (PETSC_USE_CTABLE) 5304 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5305 #else 5306 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5307 #endif 5308 { 5309 Mat_MPIAIJ *a; 5310 5311 PetscFunctionBegin; 5312 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5313 PetscValidPointer(lvec, 2); 5314 PetscValidPointer(colmap, 3); 5315 PetscValidPointer(multScatter, 4); 5316 a = (Mat_MPIAIJ *) A->data; 5317 if (lvec) *lvec = a->lvec; 5318 if (colmap) *colmap = a->colmap; 5319 if (multScatter) *multScatter = a->Mvctx; 5320 PetscFunctionReturn(0); 5321 } 5322 5323 EXTERN_C_BEGIN 5324 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5325 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5326 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5327 EXTERN_C_END 5328 5329 #undef __FUNCT__ 5330 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5331 /* 5332 Computes (B'*A')' since computing B*A directly is untenable 5333 5334 n p p 5335 ( ) ( ) ( ) 5336 m ( A ) * n ( B ) = m ( C ) 5337 ( ) ( ) ( ) 5338 5339 */ 5340 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5341 { 5342 PetscErrorCode ierr; 5343 Mat At,Bt,Ct; 5344 5345 PetscFunctionBegin; 5346 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5347 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5348 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5349 ierr = MatDestroy(&At);CHKERRQ(ierr); 5350 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5351 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5352 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5353 PetscFunctionReturn(0); 5354 } 5355 5356 #undef __FUNCT__ 5357 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5358 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5359 { 5360 PetscErrorCode ierr; 5361 PetscInt m=A->rmap->n,n=B->cmap->n; 5362 Mat Cmat; 5363 5364 PetscFunctionBegin; 5365 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5366 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5367 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5368 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5369 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5370 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5371 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5372 *C = Cmat; 5373 PetscFunctionReturn(0); 5374 } 5375 5376 /* ----------------------------------------------------------------*/ 5377 #undef __FUNCT__ 5378 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5379 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5380 { 5381 PetscErrorCode ierr; 5382 5383 PetscFunctionBegin; 5384 if (scall == MAT_INITIAL_MATRIX){ 5385 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5386 } 5387 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5388 PetscFunctionReturn(0); 5389 } 5390 5391 EXTERN_C_BEGIN 5392 #if defined(PETSC_HAVE_MUMPS) 5393 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5394 #endif 5395 #if defined(PETSC_HAVE_PASTIX) 5396 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5397 #endif 5398 #if defined(PETSC_HAVE_SUPERLU_DIST) 5399 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5400 #endif 5401 #if defined(PETSC_HAVE_SPOOLES) 5402 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5403 #endif 5404 EXTERN_C_END 5405 5406 /*MC 5407 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5408 5409 Options Database Keys: 5410 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5411 5412 Level: beginner 5413 5414 .seealso: MatCreateMPIAIJ() 5415 M*/ 5416 5417 EXTERN_C_BEGIN 5418 #undef __FUNCT__ 5419 #define __FUNCT__ "MatCreate_MPIAIJ" 5420 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5421 { 5422 Mat_MPIAIJ *b; 5423 PetscErrorCode ierr; 5424 PetscMPIInt size; 5425 5426 PetscFunctionBegin; 5427 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5428 5429 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5430 B->data = (void*)b; 5431 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5432 B->rmap->bs = 1; 5433 B->assembled = PETSC_FALSE; 5434 5435 B->insertmode = NOT_SET_VALUES; 5436 b->size = size; 5437 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5438 5439 /* build cache for off array entries formed */ 5440 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5441 b->donotstash = PETSC_FALSE; 5442 b->colmap = 0; 5443 b->garray = 0; 5444 b->roworiented = PETSC_TRUE; 5445 5446 /* stuff used for matrix vector multiply */ 5447 b->lvec = PETSC_NULL; 5448 b->Mvctx = PETSC_NULL; 5449 5450 /* stuff for MatGetRow() */ 5451 b->rowindices = 0; 5452 b->rowvalues = 0; 5453 b->getrowactive = PETSC_FALSE; 5454 5455 #if defined(PETSC_HAVE_SPOOLES) 5456 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5457 "MatGetFactor_mpiaij_spooles", 5458 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5459 #endif 5460 #if defined(PETSC_HAVE_MUMPS) 5461 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5462 "MatGetFactor_aij_mumps", 5463 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5464 #endif 5465 #if defined(PETSC_HAVE_PASTIX) 5466 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5467 "MatGetFactor_mpiaij_pastix", 5468 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5469 #endif 5470 #if defined(PETSC_HAVE_SUPERLU_DIST) 5471 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5472 "MatGetFactor_mpiaij_superlu_dist", 5473 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5474 #endif 5475 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5476 "MatStoreValues_MPIAIJ", 5477 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5478 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5479 "MatRetrieveValues_MPIAIJ", 5480 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5481 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5482 "MatGetDiagonalBlock_MPIAIJ", 5483 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5484 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5485 "MatIsTranspose_MPIAIJ", 5486 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5487 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5488 "MatMPIAIJSetPreallocation_MPIAIJ", 5489 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5491 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5492 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5494 "MatDiagonalScaleLocal_MPIAIJ", 5495 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5497 "MatConvert_MPIAIJ_MPIAIJPERM", 5498 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5500 "MatConvert_MPIAIJ_MPIAIJCRL", 5501 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5503 "MatConvert_MPIAIJ_MPISBAIJ", 5504 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5506 "MatMatMult_MPIDense_MPIAIJ", 5507 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5509 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5510 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5512 "MatMatMultNumeric_MPIDense_MPIAIJ", 5513 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5515 PetscFunctionReturn(0); 5516 } 5517 EXTERN_C_END 5518 5519 #undef __FUNCT__ 5520 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5521 /*@ 5522 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5523 and "off-diagonal" part of the matrix in CSR format. 5524 5525 Collective on MPI_Comm 5526 5527 Input Parameters: 5528 + comm - MPI communicator 5529 . m - number of local rows (Cannot be PETSC_DECIDE) 5530 . n - This value should be the same as the local size used in creating the 5531 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5532 calculated if N is given) For square matrices n is almost always m. 5533 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5534 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5535 . i - row indices for "diagonal" portion of matrix 5536 . j - column indices 5537 . a - matrix values 5538 . oi - row indices for "off-diagonal" portion of matrix 5539 . oj - column indices 5540 - oa - matrix values 5541 5542 Output Parameter: 5543 . mat - the matrix 5544 5545 Level: advanced 5546 5547 Notes: 5548 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5549 must free the arrays once the matrix has been destroyed and not before. 5550 5551 The i and j indices are 0 based 5552 5553 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5554 5555 This sets local rows and cannot be used to set off-processor values. 5556 5557 You cannot later use MatSetValues() to change values in this matrix. 5558 5559 .keywords: matrix, aij, compressed row, sparse, parallel 5560 5561 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5562 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5563 @*/ 5564 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5565 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5566 { 5567 PetscErrorCode ierr; 5568 Mat_MPIAIJ *maij; 5569 5570 PetscFunctionBegin; 5571 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5572 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5573 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5574 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5575 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5576 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5577 maij = (Mat_MPIAIJ*) (*mat)->data; 5578 maij->donotstash = PETSC_TRUE; 5579 (*mat)->preallocated = PETSC_TRUE; 5580 5581 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5582 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5583 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5584 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5585 5586 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5587 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5588 5589 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5590 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5591 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5592 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5593 5594 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5595 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5596 PetscFunctionReturn(0); 5597 } 5598 5599 /* 5600 Special version for direct calls from Fortran 5601 */ 5602 #include <private/fortranimpl.h> 5603 5604 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5605 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5607 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5608 #endif 5609 5610 /* Change these macros so can be used in void function */ 5611 #undef CHKERRQ 5612 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5613 #undef SETERRQ2 5614 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5615 #undef SETERRQ 5616 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5617 5618 EXTERN_C_BEGIN 5619 #undef __FUNCT__ 5620 #define __FUNCT__ "matsetvaluesmpiaij_" 5621 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5622 { 5623 Mat mat = *mmat; 5624 PetscInt m = *mm, n = *mn; 5625 InsertMode addv = *maddv; 5626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5627 PetscScalar value; 5628 PetscErrorCode ierr; 5629 5630 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5631 if (mat->insertmode == NOT_SET_VALUES) { 5632 mat->insertmode = addv; 5633 } 5634 #if defined(PETSC_USE_DEBUG) 5635 else if (mat->insertmode != addv) { 5636 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5637 } 5638 #endif 5639 { 5640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5642 PetscBool roworiented = aij->roworiented; 5643 5644 /* Some Variables required in the macro */ 5645 Mat A = aij->A; 5646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5647 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5648 MatScalar *aa = a->a; 5649 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5650 Mat B = aij->B; 5651 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5652 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5653 MatScalar *ba = b->a; 5654 5655 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5656 PetscInt nonew = a->nonew; 5657 MatScalar *ap1,*ap2; 5658 5659 PetscFunctionBegin; 5660 for (i=0; i<m; i++) { 5661 if (im[i] < 0) continue; 5662 #if defined(PETSC_USE_DEBUG) 5663 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5664 #endif 5665 if (im[i] >= rstart && im[i] < rend) { 5666 row = im[i] - rstart; 5667 lastcol1 = -1; 5668 rp1 = aj + ai[row]; 5669 ap1 = aa + ai[row]; 5670 rmax1 = aimax[row]; 5671 nrow1 = ailen[row]; 5672 low1 = 0; 5673 high1 = nrow1; 5674 lastcol2 = -1; 5675 rp2 = bj + bi[row]; 5676 ap2 = ba + bi[row]; 5677 rmax2 = bimax[row]; 5678 nrow2 = bilen[row]; 5679 low2 = 0; 5680 high2 = nrow2; 5681 5682 for (j=0; j<n; j++) { 5683 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5684 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5685 if (in[j] >= cstart && in[j] < cend){ 5686 col = in[j] - cstart; 5687 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5688 } else if (in[j] < 0) continue; 5689 #if defined(PETSC_USE_DEBUG) 5690 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5691 #endif 5692 else { 5693 if (mat->was_assembled) { 5694 if (!aij->colmap) { 5695 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5696 } 5697 #if defined (PETSC_USE_CTABLE) 5698 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5699 col--; 5700 #else 5701 col = aij->colmap[in[j]] - 1; 5702 #endif 5703 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5704 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5705 col = in[j]; 5706 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5707 B = aij->B; 5708 b = (Mat_SeqAIJ*)B->data; 5709 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5710 rp2 = bj + bi[row]; 5711 ap2 = ba + bi[row]; 5712 rmax2 = bimax[row]; 5713 nrow2 = bilen[row]; 5714 low2 = 0; 5715 high2 = nrow2; 5716 bm = aij->B->rmap->n; 5717 ba = b->a; 5718 } 5719 } else col = in[j]; 5720 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5721 } 5722 } 5723 } else { 5724 if (!aij->donotstash) { 5725 if (roworiented) { 5726 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5727 } else { 5728 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5729 } 5730 } 5731 } 5732 }} 5733 PetscFunctionReturnVoid(); 5734 } 5735 EXTERN_C_END 5736 5737