1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1521 1522 /* copy over the A part */ 1523 Aloc = (Mat_SeqAIJ*)aij->A->data; 1524 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1525 row = mat->rmap->rstart; 1526 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1527 for (i=0; i<m; i++) { 1528 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1529 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1530 } 1531 aj = Aloc->j; 1532 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1533 1534 /* copy over the B part */ 1535 Aloc = (Mat_SeqAIJ*)aij->B->data; 1536 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 row = mat->rmap->rstart; 1538 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1539 ct = cols; 1540 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1541 for (i=0; i<m; i++) { 1542 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1543 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1544 } 1545 ierr = PetscFree(ct);CHKERRQ(ierr); 1546 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 /* 1549 Everyone has to call to draw the matrix since the graphics waits are 1550 synchronized across all processors that share the PetscDraw object 1551 */ 1552 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1553 if (!rank) { 1554 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1555 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1556 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1557 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1558 } 1559 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1560 ierr = MatDestroy(&A);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatView_MPIAIJ" 1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1568 { 1569 PetscErrorCode ierr; 1570 PetscBool iascii,isdraw,issocket,isbinary; 1571 1572 PetscFunctionBegin; 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1577 if (iascii || isdraw || isbinary || issocket) { 1578 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1579 } else { 1580 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSOR_MPIAIJ" 1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1588 { 1589 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1590 PetscErrorCode ierr; 1591 Vec bb1 = 0; 1592 PetscBool hasop; 1593 1594 PetscFunctionBegin; 1595 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1596 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1597 } 1598 1599 if (flag == SOR_APPLY_UPPER) { 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1605 if (flag & SOR_ZERO_INITIAL_GUESS) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 its--; 1608 } 1609 1610 while (its--) { 1611 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 1614 /* update rhs: bb1 = bb - B*x */ 1615 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1616 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1620 } 1621 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 while (its--) { 1627 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* update rhs: bb1 = bb - B*x */ 1631 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1632 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1636 } 1637 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1638 if (flag & SOR_ZERO_INITIAL_GUESS) { 1639 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1640 its--; 1641 } 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_EISENSTAT) { 1654 Vec xx1; 1655 1656 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1658 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 if (!mat->diag) { 1662 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1663 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1664 } 1665 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1666 if (hasop) { 1667 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1668 } else { 1669 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1670 } 1671 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1672 1673 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1674 1675 /* local sweep */ 1676 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1677 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1678 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1679 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1680 1681 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatPermute_MPIAIJ" 1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1688 { 1689 MPI_Comm comm; 1690 PetscInt first,local_rowsize,local_colsize; 1691 const PetscInt *rows; 1692 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1697 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1698 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1699 /* collect the global row permutation and invert it */ 1700 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1701 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1702 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1703 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1704 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1705 /* get the local target indices */ 1706 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1707 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1708 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1709 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1710 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1711 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1712 /* the column permutation is so much easier; 1713 make a local version of 'colp' and invert it */ 1714 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1715 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1718 /* now we just get the submatrix */ 1719 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1720 /* clean up */ 1721 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1722 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1728 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1729 { 1730 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1731 Mat A = mat->A,B = mat->B; 1732 PetscErrorCode ierr; 1733 PetscReal isend[5],irecv[5]; 1734 1735 PetscFunctionBegin; 1736 info->block_size = 1.0; 1737 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1738 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1739 isend[3] = info->memory; isend[4] = info->mallocs; 1740 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1741 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1742 isend[3] += info->memory; isend[4] += info->mallocs; 1743 if (flag == MAT_LOCAL) { 1744 info->nz_used = isend[0]; 1745 info->nz_allocated = isend[1]; 1746 info->nz_unneeded = isend[2]; 1747 info->memory = isend[3]; 1748 info->mallocs = isend[4]; 1749 } else if (flag == MAT_GLOBAL_MAX) { 1750 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1751 info->nz_used = irecv[0]; 1752 info->nz_allocated = irecv[1]; 1753 info->nz_unneeded = irecv[2]; 1754 info->memory = irecv[3]; 1755 info->mallocs = irecv[4]; 1756 } else if (flag == MAT_GLOBAL_SUM) { 1757 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1758 info->nz_used = irecv[0]; 1759 info->nz_allocated = irecv[1]; 1760 info->nz_unneeded = irecv[2]; 1761 info->memory = irecv[3]; 1762 info->mallocs = irecv[4]; 1763 } 1764 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1765 info->fill_ratio_needed = 0; 1766 info->factor_mallocs = 0; 1767 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatSetOption_MPIAIJ" 1773 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1774 { 1775 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 switch (op) { 1780 case MAT_NEW_NONZERO_LOCATIONS: 1781 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1782 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1783 case MAT_KEEP_NONZERO_PATTERN: 1784 case MAT_NEW_NONZERO_LOCATION_ERR: 1785 case MAT_USE_INODES: 1786 case MAT_IGNORE_ZERO_ENTRIES: 1787 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1788 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1789 break; 1790 case MAT_ROW_ORIENTED: 1791 a->roworiented = flg; 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_NEW_DIAGONALS: 1796 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1797 break; 1798 case MAT_IGNORE_OFF_PROC_ENTRIES: 1799 a->donotstash = flg; 1800 break; 1801 case MAT_SPD: 1802 A->spd_set = PETSC_TRUE; 1803 A->spd = flg; 1804 if (flg) { 1805 A->symmetric = PETSC_TRUE; 1806 A->structurally_symmetric = PETSC_TRUE; 1807 A->symmetric_set = PETSC_TRUE; 1808 A->structurally_symmetric_set = PETSC_TRUE; 1809 } 1810 break; 1811 case MAT_SYMMETRIC: 1812 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_STRUCTURALLY_SYMMETRIC: 1815 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_HERMITIAN: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_SYMMETRY_ETERNAL: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 default: 1824 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1825 } 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatGetRow_MPIAIJ" 1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1834 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1835 PetscErrorCode ierr; 1836 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1837 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1838 PetscInt *cmap,*idx_p; 1839 1840 PetscFunctionBegin; 1841 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1842 mat->getrowactive = PETSC_TRUE; 1843 1844 if (!mat->rowvalues && (idx || v)) { 1845 /* 1846 allocate enough space to hold information from the longest row. 1847 */ 1848 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1849 PetscInt max = 1,tmp; 1850 for (i=0; i<matin->rmap->n; i++) { 1851 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1852 if (max < tmp) { max = tmp; } 1853 } 1854 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1855 } 1856 1857 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1858 lrow = row - rstart; 1859 1860 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1861 if (!v) {pvA = 0; pvB = 0;} 1862 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1863 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1864 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1865 nztot = nzA + nzB; 1866 1867 cmap = mat->garray; 1868 if (v || idx) { 1869 if (nztot) { 1870 /* Sort by increasing column numbers, assuming A and B already sorted */ 1871 PetscInt imark = -1; 1872 if (v) { 1873 *v = v_p = mat->rowvalues; 1874 for (i=0; i<nzB; i++) { 1875 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1876 else break; 1877 } 1878 imark = i; 1879 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1880 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1881 } 1882 if (idx) { 1883 *idx = idx_p = mat->rowindices; 1884 if (imark > -1) { 1885 for (i=0; i<imark; i++) { 1886 idx_p[i] = cmap[cworkB[i]]; 1887 } 1888 } else { 1889 for (i=0; i<nzB; i++) { 1890 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1891 else break; 1892 } 1893 imark = i; 1894 } 1895 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1896 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1897 } 1898 } else { 1899 if (idx) *idx = 0; 1900 if (v) *v = 0; 1901 } 1902 } 1903 *nz = nztot; 1904 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1905 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1911 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1912 { 1913 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1914 1915 PetscFunctionBegin; 1916 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1917 aij->getrowactive = PETSC_FALSE; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatNorm_MPIAIJ" 1923 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1924 { 1925 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1926 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1927 PetscErrorCode ierr; 1928 PetscInt i,j,cstart = mat->cmap->rstart; 1929 PetscReal sum = 0.0; 1930 MatScalar *v; 1931 1932 PetscFunctionBegin; 1933 if (aij->size == 1) { 1934 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1935 } else { 1936 if (type == NORM_FROBENIUS) { 1937 v = amat->a; 1938 for (i=0; i<amat->nz; i++) { 1939 #if defined(PETSC_USE_COMPLEX) 1940 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1941 #else 1942 sum += (*v)*(*v); v++; 1943 #endif 1944 } 1945 v = bmat->a; 1946 for (i=0; i<bmat->nz; i++) { 1947 #if defined(PETSC_USE_COMPLEX) 1948 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1949 #else 1950 sum += (*v)*(*v); v++; 1951 #endif 1952 } 1953 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 } else if (type == NORM_1) { /* max column norm */ 1956 PetscReal *tmp,*tmp2; 1957 PetscInt *jj,*garray = aij->garray; 1958 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1959 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1960 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 } else if (type == NORM_INFINITY) { /* max row norm */ 1977 PetscReal ntemp = 0.0; 1978 for (j=0; j<aij->A->rmap->n; j++) { 1979 v = amat->a + amat->i[j]; 1980 sum = 0.0; 1981 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1982 sum += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a + bmat->i[j]; 1985 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1986 sum += PetscAbsScalar(*v); v++; 1987 } 1988 if (sum > ntemp) ntemp = sum; 1989 } 1990 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1991 } else { 1992 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1993 } 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatTranspose_MPIAIJ" 2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2001 { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2003 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2004 PetscErrorCode ierr; 2005 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2006 PetscInt cstart=A->cmap->rstart,ncol; 2007 Mat B; 2008 MatScalar *array; 2009 2010 PetscFunctionBegin; 2011 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2012 2013 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2014 ai = Aloc->i; aj = Aloc->j; 2015 bi = Bloc->i; bj = Bloc->j; 2016 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2017 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2018 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2019 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++){ 2021 d_nnz[aj[i]] ++; 2022 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2023 } 2024 2025 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2026 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2027 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2028 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2029 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2030 } else { 2031 B = *matout; 2032 } 2033 2034 /* copy over the A part */ 2035 array = Aloc->a; 2036 row = A->rmap->rstart; 2037 for (i=0; i<ma; i++) { 2038 ncol = ai[i+1]-ai[i]; 2039 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2040 row++; array += ncol; aj += ncol; 2041 } 2042 aj = Aloc->j; 2043 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2044 2045 /* copy over the B part */ 2046 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2047 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2048 array = Bloc->a; 2049 row = A->rmap->rstart; 2050 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2051 cols_tmp = cols; 2052 for (i=0; i<mb; i++) { 2053 ncol = bi[i+1]-bi[i]; 2054 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2055 row++; array += ncol; cols_tmp += ncol; 2056 } 2057 ierr = PetscFree(cols);CHKERRQ(ierr); 2058 2059 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2060 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2061 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2062 *matout = B; 2063 } else { 2064 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2071 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2072 { 2073 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2074 Mat a = aij->A,b = aij->B; 2075 PetscErrorCode ierr; 2076 PetscInt s1,s2,s3; 2077 2078 PetscFunctionBegin; 2079 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2080 if (rr) { 2081 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2082 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2083 /* Overlap communication with computation. */ 2084 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2085 } 2086 if (ll) { 2087 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2088 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2089 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2090 } 2091 /* scale the diagonal block */ 2092 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2093 2094 if (rr) { 2095 /* Do a scatter end and then right scale the off-diagonal block */ 2096 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2097 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2098 } 2099 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2105 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2106 { 2107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2112 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2113 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2114 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2120 { 2121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2122 PetscErrorCode ierr; 2123 2124 PetscFunctionBegin; 2125 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatEqual_MPIAIJ" 2131 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2132 { 2133 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2134 Mat a,b,c,d; 2135 PetscBool flg; 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 a = matA->A; b = matA->B; 2140 c = matB->A; d = matB->B; 2141 2142 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2143 if (flg) { 2144 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2145 } 2146 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatCopy_MPIAIJ" 2152 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2153 { 2154 PetscErrorCode ierr; 2155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2156 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2157 2158 PetscFunctionBegin; 2159 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2160 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2161 /* because of the column compression in the off-processor part of the matrix a->B, 2162 the number of columns in a->B and b->B may be different, hence we cannot call 2163 the MatCopy() directly on the two parts. If need be, we can provide a more 2164 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2165 then copying the submatrices */ 2166 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2167 } else { 2168 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2169 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2170 } 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2176 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2177 { 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatAXPY_MPIAIJ" 2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2188 { 2189 PetscErrorCode ierr; 2190 PetscInt i; 2191 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2192 PetscBLASInt bnz,one=1; 2193 Mat_SeqAIJ *x,*y; 2194 2195 PetscFunctionBegin; 2196 if (str == SAME_NONZERO_PATTERN) { 2197 PetscScalar alpha = a; 2198 x = (Mat_SeqAIJ *)xx->A->data; 2199 y = (Mat_SeqAIJ *)yy->A->data; 2200 bnz = PetscBLASIntCast(x->nz); 2201 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2202 x = (Mat_SeqAIJ *)xx->B->data; 2203 y = (Mat_SeqAIJ *)yy->B->data; 2204 bnz = PetscBLASIntCast(x->nz); 2205 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2206 } else if (str == SUBSET_NONZERO_PATTERN) { 2207 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2208 2209 x = (Mat_SeqAIJ *)xx->B->data; 2210 y = (Mat_SeqAIJ *)yy->B->data; 2211 if (y->xtoy && y->XtoY != xx->B) { 2212 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2213 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2214 } 2215 if (!y->xtoy) { /* get xtoy */ 2216 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2217 y->XtoY = xx->B; 2218 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2219 } 2220 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2221 } else { 2222 Mat B; 2223 PetscInt *nnz_d,*nnz_o; 2224 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2225 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2226 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2227 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2228 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2229 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2230 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2231 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2232 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2233 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2234 ierr = MatHeaderReplace(Y,B); 2235 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2236 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2237 } 2238 PetscFunctionReturn(0); 2239 } 2240 2241 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatConjugate_MPIAIJ" 2245 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2246 { 2247 #if defined(PETSC_USE_COMPLEX) 2248 PetscErrorCode ierr; 2249 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2250 2251 PetscFunctionBegin; 2252 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2253 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2254 #else 2255 PetscFunctionBegin; 2256 #endif 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatRealPart_MPIAIJ" 2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2263 { 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2265 PetscErrorCode ierr; 2266 2267 PetscFunctionBegin; 2268 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2269 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2276 { 2277 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2278 PetscErrorCode ierr; 2279 2280 PetscFunctionBegin; 2281 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2282 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #ifdef PETSC_HAVE_PBGL 2287 2288 #include <boost/parallel/mpi/bsp_process_group.hpp> 2289 #include <boost/graph/distributed/ilu_default_graph.hpp> 2290 #include <boost/graph/distributed/ilu_0_block.hpp> 2291 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2292 #include <boost/graph/distributed/petsc/interface.hpp> 2293 #include <boost/multi_array.hpp> 2294 #include <boost/parallel/distributed_property_map->hpp> 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2298 /* 2299 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2300 */ 2301 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2302 { 2303 namespace petsc = boost::distributed::petsc; 2304 2305 namespace graph_dist = boost::graph::distributed; 2306 using boost::graph::distributed::ilu_default::process_group_type; 2307 using boost::graph::ilu_permuted; 2308 2309 PetscBool row_identity, col_identity; 2310 PetscContainer c; 2311 PetscInt m, n, M, N; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2316 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2317 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2318 if (!row_identity || !col_identity) { 2319 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2320 } 2321 2322 process_group_type pg; 2323 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2324 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2325 lgraph_type& level_graph = *lgraph_p; 2326 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2327 2328 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2329 ilu_permuted(level_graph); 2330 2331 /* put together the new matrix */ 2332 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2333 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2334 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2335 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2336 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2337 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2338 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 2340 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2341 ierr = PetscContainerSetPointer(c, lgraph_p); 2342 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2343 ierr = PetscContainerDestroy(&c); 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2349 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2350 { 2351 PetscFunctionBegin; 2352 PetscFunctionReturn(0); 2353 } 2354 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatSolve_MPIAIJ" 2357 /* 2358 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2359 */ 2360 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2361 { 2362 namespace graph_dist = boost::graph::distributed; 2363 2364 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2365 lgraph_type* lgraph_p; 2366 PetscContainer c; 2367 PetscErrorCode ierr; 2368 2369 PetscFunctionBegin; 2370 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2371 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2372 ierr = VecCopy(b, x);CHKERRQ(ierr); 2373 2374 PetscScalar* array_x; 2375 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2376 PetscInt sx; 2377 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2378 2379 PetscScalar* array_b; 2380 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2381 PetscInt sb; 2382 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2383 2384 lgraph_type& level_graph = *lgraph_p; 2385 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2386 2387 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2388 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2389 ref_x(array_x, boost::extents[num_vertices(graph)]); 2390 2391 typedef boost::iterator_property_map<array_ref_type::iterator, 2392 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2393 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2394 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2395 2396 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2397 2398 PetscFunctionReturn(0); 2399 } 2400 #endif 2401 2402 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2403 PetscInt nzlocal,nsends,nrecvs; 2404 PetscMPIInt *send_rank,*recv_rank; 2405 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2406 PetscScalar *sbuf_a,**rbuf_a; 2407 PetscErrorCode (*Destroy)(Mat); 2408 } Mat_Redundant; 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2412 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2413 { 2414 PetscErrorCode ierr; 2415 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2416 PetscInt i; 2417 2418 PetscFunctionBegin; 2419 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2420 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2421 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2422 for (i=0; i<redund->nrecvs; i++){ 2423 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2424 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2425 } 2426 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2427 ierr = PetscFree(redund);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatDestroy_MatRedundant" 2433 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2434 { 2435 PetscErrorCode ierr; 2436 PetscContainer container; 2437 Mat_Redundant *redund=PETSC_NULL; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2441 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2442 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2443 A->ops->destroy = redund->Destroy; 2444 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2445 if (A->ops->destroy) { 2446 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2447 } 2448 PetscFunctionReturn(0); 2449 } 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2453 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2454 { 2455 PetscMPIInt rank,size; 2456 MPI_Comm comm=((PetscObject)mat)->comm; 2457 PetscErrorCode ierr; 2458 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2459 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2460 PetscInt *rowrange=mat->rmap->range; 2461 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2462 Mat A=aij->A,B=aij->B,C=*matredundant; 2463 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2464 PetscScalar *sbuf_a; 2465 PetscInt nzlocal=a->nz+b->nz; 2466 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2467 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2468 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2469 MatScalar *aworkA,*aworkB; 2470 PetscScalar *vals; 2471 PetscMPIInt tag1,tag2,tag3,imdex; 2472 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2473 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2474 MPI_Status recv_status,*send_status; 2475 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2476 PetscInt **rbuf_j=PETSC_NULL; 2477 PetscScalar **rbuf_a=PETSC_NULL; 2478 Mat_Redundant *redund=PETSC_NULL; 2479 PetscContainer container; 2480 2481 PetscFunctionBegin; 2482 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2483 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2484 2485 if (reuse == MAT_REUSE_MATRIX) { 2486 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2487 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2488 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2489 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2490 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2491 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2492 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2493 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2494 2495 nsends = redund->nsends; 2496 nrecvs = redund->nrecvs; 2497 send_rank = redund->send_rank; 2498 recv_rank = redund->recv_rank; 2499 sbuf_nz = redund->sbuf_nz; 2500 rbuf_nz = redund->rbuf_nz; 2501 sbuf_j = redund->sbuf_j; 2502 sbuf_a = redund->sbuf_a; 2503 rbuf_j = redund->rbuf_j; 2504 rbuf_a = redund->rbuf_a; 2505 } 2506 2507 if (reuse == MAT_INITIAL_MATRIX){ 2508 PetscMPIInt subrank,subsize; 2509 PetscInt nleftover,np_subcomm; 2510 /* get the destination processors' id send_rank, nsends and nrecvs */ 2511 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2512 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2513 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2514 np_subcomm = size/nsubcomm; 2515 nleftover = size - nsubcomm*np_subcomm; 2516 nsends = 0; nrecvs = 0; 2517 for (i=0; i<size; i++){ /* i=rank*/ 2518 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2519 send_rank[nsends] = i; nsends++; 2520 recv_rank[nrecvs++] = i; 2521 } 2522 } 2523 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2524 i = size-nleftover-1; 2525 j = 0; 2526 while (j < nsubcomm - nleftover){ 2527 send_rank[nsends++] = i; 2528 i--; j++; 2529 } 2530 } 2531 2532 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2533 for (i=0; i<nleftover; i++){ 2534 recv_rank[nrecvs++] = size-nleftover+i; 2535 } 2536 } 2537 2538 /* allocate sbuf_j, sbuf_a */ 2539 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2540 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2541 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2542 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2543 2544 /* copy mat's local entries into the buffers */ 2545 if (reuse == MAT_INITIAL_MATRIX){ 2546 rownz_max = 0; 2547 rptr = sbuf_j; 2548 cols = sbuf_j + rend-rstart + 1; 2549 vals = sbuf_a; 2550 rptr[0] = 0; 2551 for (i=0; i<rend-rstart; i++){ 2552 row = i + rstart; 2553 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2554 ncols = nzA + nzB; 2555 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2556 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2557 /* load the column indices for this row into cols */ 2558 lwrite = 0; 2559 for (l=0; l<nzB; l++) { 2560 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2561 vals[lwrite] = aworkB[l]; 2562 cols[lwrite++] = ctmp; 2563 } 2564 } 2565 for (l=0; l<nzA; l++){ 2566 vals[lwrite] = aworkA[l]; 2567 cols[lwrite++] = cstart + cworkA[l]; 2568 } 2569 for (l=0; l<nzB; l++) { 2570 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2571 vals[lwrite] = aworkB[l]; 2572 cols[lwrite++] = ctmp; 2573 } 2574 } 2575 vals += ncols; 2576 cols += ncols; 2577 rptr[i+1] = rptr[i] + ncols; 2578 if (rownz_max < ncols) rownz_max = ncols; 2579 } 2580 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2581 } else { /* only copy matrix values into sbuf_a */ 2582 rptr = sbuf_j; 2583 vals = sbuf_a; 2584 rptr[0] = 0; 2585 for (i=0; i<rend-rstart; i++){ 2586 row = i + rstart; 2587 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2588 ncols = nzA + nzB; 2589 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2590 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2591 lwrite = 0; 2592 for (l=0; l<nzB; l++) { 2593 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2594 } 2595 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2596 for (l=0; l<nzB; l++) { 2597 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2598 } 2599 vals += ncols; 2600 rptr[i+1] = rptr[i] + ncols; 2601 } 2602 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2603 2604 /* send nzlocal to others, and recv other's nzlocal */ 2605 /*--------------------------------------------------*/ 2606 if (reuse == MAT_INITIAL_MATRIX){ 2607 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2608 s_waits2 = s_waits3 + nsends; 2609 s_waits1 = s_waits2 + nsends; 2610 r_waits1 = s_waits1 + nsends; 2611 r_waits2 = r_waits1 + nrecvs; 2612 r_waits3 = r_waits2 + nrecvs; 2613 } else { 2614 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2615 r_waits3 = s_waits3 + nsends; 2616 } 2617 2618 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2619 if (reuse == MAT_INITIAL_MATRIX){ 2620 /* get new tags to keep the communication clean */ 2621 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2622 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2623 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2624 2625 /* post receives of other's nzlocal */ 2626 for (i=0; i<nrecvs; i++){ 2627 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2628 } 2629 /* send nzlocal to others */ 2630 for (i=0; i<nsends; i++){ 2631 sbuf_nz[i] = nzlocal; 2632 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2633 } 2634 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2635 count = nrecvs; 2636 while (count) { 2637 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2638 recv_rank[imdex] = recv_status.MPI_SOURCE; 2639 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2640 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2641 2642 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2643 rbuf_nz[imdex] += i + 2; 2644 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2645 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2646 count--; 2647 } 2648 /* wait on sends of nzlocal */ 2649 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2650 /* send mat->i,j to others, and recv from other's */ 2651 /*------------------------------------------------*/ 2652 for (i=0; i<nsends; i++){ 2653 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2654 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of mat->i,j */ 2657 /*------------------------------*/ 2658 count = nrecvs; 2659 while (count) { 2660 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2661 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2662 count--; 2663 } 2664 /* wait on sends of mat->i,j */ 2665 /*---------------------------*/ 2666 if (nsends) { 2667 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2668 } 2669 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2670 2671 /* post receives, send and receive mat->a */ 2672 /*----------------------------------------*/ 2673 for (imdex=0; imdex<nrecvs; imdex++) { 2674 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2675 } 2676 for (i=0; i<nsends; i++){ 2677 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2678 } 2679 count = nrecvs; 2680 while (count) { 2681 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2682 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2683 count--; 2684 } 2685 if (nsends) { 2686 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2687 } 2688 2689 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2690 2691 /* create redundant matrix */ 2692 /*-------------------------*/ 2693 if (reuse == MAT_INITIAL_MATRIX){ 2694 /* compute rownz_max for preallocation */ 2695 for (imdex=0; imdex<nrecvs; imdex++){ 2696 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2697 rptr = rbuf_j[imdex]; 2698 for (i=0; i<j; i++){ 2699 ncols = rptr[i+1] - rptr[i]; 2700 if (rownz_max < ncols) rownz_max = ncols; 2701 } 2702 } 2703 2704 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2705 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2706 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2707 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2708 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2709 } else { 2710 C = *matredundant; 2711 } 2712 2713 /* insert local matrix entries */ 2714 rptr = sbuf_j; 2715 cols = sbuf_j + rend-rstart + 1; 2716 vals = sbuf_a; 2717 for (i=0; i<rend-rstart; i++){ 2718 row = i + rstart; 2719 ncols = rptr[i+1] - rptr[i]; 2720 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2721 vals += ncols; 2722 cols += ncols; 2723 } 2724 /* insert received matrix entries */ 2725 for (imdex=0; imdex<nrecvs; imdex++){ 2726 rstart = rowrange[recv_rank[imdex]]; 2727 rend = rowrange[recv_rank[imdex]+1]; 2728 rptr = rbuf_j[imdex]; 2729 cols = rbuf_j[imdex] + rend-rstart + 1; 2730 vals = rbuf_a[imdex]; 2731 for (i=0; i<rend-rstart; i++){ 2732 row = i + rstart; 2733 ncols = rptr[i+1] - rptr[i]; 2734 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2735 vals += ncols; 2736 cols += ncols; 2737 } 2738 } 2739 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2740 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2741 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2742 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2743 if (reuse == MAT_INITIAL_MATRIX) { 2744 PetscContainer container; 2745 *matredundant = C; 2746 /* create a supporting struct and attach it to C for reuse */ 2747 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2748 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2749 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2750 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2751 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2752 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2753 2754 redund->nzlocal = nzlocal; 2755 redund->nsends = nsends; 2756 redund->nrecvs = nrecvs; 2757 redund->send_rank = send_rank; 2758 redund->recv_rank = recv_rank; 2759 redund->sbuf_nz = sbuf_nz; 2760 redund->rbuf_nz = rbuf_nz; 2761 redund->sbuf_j = sbuf_j; 2762 redund->sbuf_a = sbuf_a; 2763 redund->rbuf_j = rbuf_j; 2764 redund->rbuf_a = rbuf_a; 2765 2766 redund->Destroy = C->ops->destroy; 2767 C->ops->destroy = MatDestroy_MatRedundant; 2768 } 2769 PetscFunctionReturn(0); 2770 } 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2774 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2775 { 2776 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2777 PetscErrorCode ierr; 2778 PetscInt i,*idxb = 0; 2779 PetscScalar *va,*vb; 2780 Vec vtmp; 2781 2782 PetscFunctionBegin; 2783 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2784 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2785 if (idx) { 2786 for (i=0; i<A->rmap->n; i++) { 2787 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2788 } 2789 } 2790 2791 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2792 if (idx) { 2793 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2794 } 2795 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2796 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2797 2798 for (i=0; i<A->rmap->n; i++){ 2799 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2800 va[i] = vb[i]; 2801 if (idx) idx[i] = a->garray[idxb[i]]; 2802 } 2803 } 2804 2805 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2806 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2807 ierr = PetscFree(idxb);CHKERRQ(ierr); 2808 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 #undef __FUNCT__ 2813 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2814 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2815 { 2816 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2817 PetscErrorCode ierr; 2818 PetscInt i,*idxb = 0; 2819 PetscScalar *va,*vb; 2820 Vec vtmp; 2821 2822 PetscFunctionBegin; 2823 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2824 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2825 if (idx) { 2826 for (i=0; i<A->cmap->n; i++) { 2827 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2828 } 2829 } 2830 2831 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2832 if (idx) { 2833 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2834 } 2835 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2836 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2837 2838 for (i=0; i<A->rmap->n; i++){ 2839 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2840 va[i] = vb[i]; 2841 if (idx) idx[i] = a->garray[idxb[i]]; 2842 } 2843 } 2844 2845 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2846 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2847 ierr = PetscFree(idxb);CHKERRQ(ierr); 2848 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2849 PetscFunctionReturn(0); 2850 } 2851 2852 #undef __FUNCT__ 2853 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2854 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2855 { 2856 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2857 PetscInt n = A->rmap->n; 2858 PetscInt cstart = A->cmap->rstart; 2859 PetscInt *cmap = mat->garray; 2860 PetscInt *diagIdx, *offdiagIdx; 2861 Vec diagV, offdiagV; 2862 PetscScalar *a, *diagA, *offdiagA; 2863 PetscInt r; 2864 PetscErrorCode ierr; 2865 2866 PetscFunctionBegin; 2867 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2868 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2869 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2870 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2871 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2872 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2873 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2874 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2875 for(r = 0; r < n; ++r) { 2876 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2877 a[r] = diagA[r]; 2878 idx[r] = cstart + diagIdx[r]; 2879 } else { 2880 a[r] = offdiagA[r]; 2881 idx[r] = cmap[offdiagIdx[r]]; 2882 } 2883 } 2884 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2885 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2886 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2887 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2888 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2889 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2895 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2898 PetscInt n = A->rmap->n; 2899 PetscInt cstart = A->cmap->rstart; 2900 PetscInt *cmap = mat->garray; 2901 PetscInt *diagIdx, *offdiagIdx; 2902 Vec diagV, offdiagV; 2903 PetscScalar *a, *diagA, *offdiagA; 2904 PetscInt r; 2905 PetscErrorCode ierr; 2906 2907 PetscFunctionBegin; 2908 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2909 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2910 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2911 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2912 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2913 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2914 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2915 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2916 for(r = 0; r < n; ++r) { 2917 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2918 a[r] = diagA[r]; 2919 idx[r] = cstart + diagIdx[r]; 2920 } else { 2921 a[r] = offdiagA[r]; 2922 idx[r] = cmap[offdiagIdx[r]]; 2923 } 2924 } 2925 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2926 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2928 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2929 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2930 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2936 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2937 { 2938 PetscErrorCode ierr; 2939 Mat *dummy; 2940 2941 PetscFunctionBegin; 2942 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2943 *newmat = *dummy; 2944 ierr = PetscFree(dummy);CHKERRQ(ierr); 2945 PetscFunctionReturn(0); 2946 } 2947 2948 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2952 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2953 { 2954 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 2963 /* -------------------------------------------------------------------*/ 2964 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2965 MatGetRow_MPIAIJ, 2966 MatRestoreRow_MPIAIJ, 2967 MatMult_MPIAIJ, 2968 /* 4*/ MatMultAdd_MPIAIJ, 2969 MatMultTranspose_MPIAIJ, 2970 MatMultTransposeAdd_MPIAIJ, 2971 #ifdef PETSC_HAVE_PBGL 2972 MatSolve_MPIAIJ, 2973 #else 2974 0, 2975 #endif 2976 0, 2977 0, 2978 /*10*/ 0, 2979 0, 2980 0, 2981 MatSOR_MPIAIJ, 2982 MatTranspose_MPIAIJ, 2983 /*15*/ MatGetInfo_MPIAIJ, 2984 MatEqual_MPIAIJ, 2985 MatGetDiagonal_MPIAIJ, 2986 MatDiagonalScale_MPIAIJ, 2987 MatNorm_MPIAIJ, 2988 /*20*/ MatAssemblyBegin_MPIAIJ, 2989 MatAssemblyEnd_MPIAIJ, 2990 MatSetOption_MPIAIJ, 2991 MatZeroEntries_MPIAIJ, 2992 /*24*/ MatZeroRows_MPIAIJ, 2993 0, 2994 #ifdef PETSC_HAVE_PBGL 2995 0, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 /*29*/ MatSetUpPreallocation_MPIAIJ, 3002 #ifdef PETSC_HAVE_PBGL 3003 0, 3004 #else 3005 0, 3006 #endif 3007 0, 3008 0, 3009 0, 3010 /*34*/ MatDuplicate_MPIAIJ, 3011 0, 3012 0, 3013 0, 3014 0, 3015 /*39*/ MatAXPY_MPIAIJ, 3016 MatGetSubMatrices_MPIAIJ, 3017 MatIncreaseOverlap_MPIAIJ, 3018 MatGetValues_MPIAIJ, 3019 MatCopy_MPIAIJ, 3020 /*44*/ MatGetRowMax_MPIAIJ, 3021 MatScale_MPIAIJ, 3022 0, 3023 0, 3024 MatZeroRowsColumns_MPIAIJ, 3025 /*49*/ MatSetBlockSize_MPIAIJ, 3026 0, 3027 0, 3028 0, 3029 0, 3030 /*54*/ MatFDColoringCreate_MPIAIJ, 3031 0, 3032 MatSetUnfactored_MPIAIJ, 3033 MatPermute_MPIAIJ, 3034 0, 3035 /*59*/ MatGetSubMatrix_MPIAIJ, 3036 MatDestroy_MPIAIJ, 3037 MatView_MPIAIJ, 3038 0, 3039 0, 3040 /*64*/ 0, 3041 0, 3042 0, 3043 0, 3044 0, 3045 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3046 MatGetRowMinAbs_MPIAIJ, 3047 0, 3048 MatSetColoring_MPIAIJ, 3049 #if defined(PETSC_HAVE_ADIC) 3050 MatSetValuesAdic_MPIAIJ, 3051 #else 3052 0, 3053 #endif 3054 MatSetValuesAdifor_MPIAIJ, 3055 /*75*/ MatFDColoringApply_AIJ, 3056 0, 3057 0, 3058 0, 3059 0, 3060 /*80*/ 0, 3061 0, 3062 0, 3063 /*83*/ MatLoad_MPIAIJ, 3064 0, 3065 0, 3066 0, 3067 0, 3068 0, 3069 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3070 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3071 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3072 MatPtAP_Basic, 3073 MatPtAPSymbolic_MPIAIJ, 3074 /*94*/ MatPtAPNumeric_MPIAIJ, 3075 0, 3076 0, 3077 0, 3078 0, 3079 /*99*/ 0, 3080 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3081 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3082 MatConjugate_MPIAIJ, 3083 0, 3084 /*104*/MatSetValuesRow_MPIAIJ, 3085 MatRealPart_MPIAIJ, 3086 MatImaginaryPart_MPIAIJ, 3087 0, 3088 0, 3089 /*109*/0, 3090 MatGetRedundantMatrix_MPIAIJ, 3091 MatGetRowMin_MPIAIJ, 3092 0, 3093 0, 3094 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3095 0, 3096 0, 3097 0, 3098 0, 3099 /*119*/0, 3100 0, 3101 0, 3102 0, 3103 MatGetMultiProcBlock_MPIAIJ, 3104 /*124*/MatFindNonZeroRows_MPIAIJ, 3105 MatGetColumnNorms_MPIAIJ, 3106 MatInvertBlockDiagonal_MPIAIJ, 3107 0, 3108 MatGetSubMatricesParallel_MPIAIJ, 3109 /*129*/0, 3110 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3111 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3112 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3113 0, 3114 /*134*/0, 3115 0, 3116 0, 3117 0, 3118 0 3119 }; 3120 3121 /* ----------------------------------------------------------------------------------------*/ 3122 3123 EXTERN_C_BEGIN 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3126 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3127 { 3128 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3129 PetscErrorCode ierr; 3130 3131 PetscFunctionBegin; 3132 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3133 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3134 PetscFunctionReturn(0); 3135 } 3136 EXTERN_C_END 3137 3138 EXTERN_C_BEGIN 3139 #undef __FUNCT__ 3140 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3141 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3142 { 3143 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3144 PetscErrorCode ierr; 3145 3146 PetscFunctionBegin; 3147 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3148 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3149 PetscFunctionReturn(0); 3150 } 3151 EXTERN_C_END 3152 3153 EXTERN_C_BEGIN 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3156 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3157 { 3158 Mat_MPIAIJ *b; 3159 PetscErrorCode ierr; 3160 PetscInt i; 3161 3162 PetscFunctionBegin; 3163 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3164 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3165 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3166 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3167 3168 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3169 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3170 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3171 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3172 if (d_nnz) { 3173 for (i=0; i<B->rmap->n; i++) { 3174 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3175 } 3176 } 3177 if (o_nnz) { 3178 for (i=0; i<B->rmap->n; i++) { 3179 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3180 } 3181 } 3182 b = (Mat_MPIAIJ*)B->data; 3183 3184 if (!B->preallocated) { 3185 /* Explicitly create 2 MATSEQAIJ matrices. */ 3186 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3187 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3188 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3189 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3190 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3191 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3192 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3193 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3194 } 3195 3196 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3197 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3198 B->preallocated = PETSC_TRUE; 3199 PetscFunctionReturn(0); 3200 } 3201 EXTERN_C_END 3202 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3205 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3206 { 3207 Mat mat; 3208 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3209 PetscErrorCode ierr; 3210 3211 PetscFunctionBegin; 3212 *newmat = 0; 3213 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3214 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3215 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3216 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3217 a = (Mat_MPIAIJ*)mat->data; 3218 3219 mat->factortype = matin->factortype; 3220 mat->rmap->bs = matin->rmap->bs; 3221 mat->assembled = PETSC_TRUE; 3222 mat->insertmode = NOT_SET_VALUES; 3223 mat->preallocated = PETSC_TRUE; 3224 3225 a->size = oldmat->size; 3226 a->rank = oldmat->rank; 3227 a->donotstash = oldmat->donotstash; 3228 a->roworiented = oldmat->roworiented; 3229 a->rowindices = 0; 3230 a->rowvalues = 0; 3231 a->getrowactive = PETSC_FALSE; 3232 3233 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3234 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3235 3236 if (oldmat->colmap) { 3237 #if defined (PETSC_USE_CTABLE) 3238 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3239 #else 3240 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3241 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3242 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3243 #endif 3244 } else a->colmap = 0; 3245 if (oldmat->garray) { 3246 PetscInt len; 3247 len = oldmat->B->cmap->n; 3248 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3249 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3250 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3251 } else a->garray = 0; 3252 3253 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3254 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3255 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3257 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3258 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3259 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3260 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3261 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3262 *newmat = mat; 3263 PetscFunctionReturn(0); 3264 } 3265 3266 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatLoad_MPIAIJ" 3270 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3271 { 3272 PetscScalar *vals,*svals; 3273 MPI_Comm comm = ((PetscObject)viewer)->comm; 3274 PetscErrorCode ierr; 3275 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3276 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3277 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3278 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3279 PetscInt cend,cstart,n,*rowners,sizesset=1; 3280 int fd; 3281 3282 PetscFunctionBegin; 3283 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3284 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3285 if (!rank) { 3286 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3287 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3288 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3289 } 3290 3291 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3292 3293 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3294 M = header[1]; N = header[2]; 3295 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3296 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3297 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3298 3299 /* If global sizes are set, check if they are consistent with that given in the file */ 3300 if (sizesset) { 3301 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3302 } 3303 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3304 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3305 3306 /* determine ownership of all rows */ 3307 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3308 else m = newMat->rmap->n; /* Set by user */ 3309 3310 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3311 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3312 3313 /* First process needs enough room for process with most rows */ 3314 if (!rank) { 3315 mmax = rowners[1]; 3316 for (i=2; i<size; i++) { 3317 mmax = PetscMax(mmax,rowners[i]); 3318 } 3319 } else mmax = m; 3320 3321 rowners[0] = 0; 3322 for (i=2; i<=size; i++) { 3323 rowners[i] += rowners[i-1]; 3324 } 3325 rstart = rowners[rank]; 3326 rend = rowners[rank+1]; 3327 3328 /* distribute row lengths to all processors */ 3329 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3330 if (!rank) { 3331 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3332 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3333 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3334 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3335 for (j=0; j<m; j++) { 3336 procsnz[0] += ourlens[j]; 3337 } 3338 for (i=1; i<size; i++) { 3339 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3340 /* calculate the number of nonzeros on each processor */ 3341 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3342 procsnz[i] += rowlengths[j]; 3343 } 3344 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3345 } 3346 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3347 } else { 3348 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3349 } 3350 3351 if (!rank) { 3352 /* determine max buffer needed and allocate it */ 3353 maxnz = 0; 3354 for (i=0; i<size; i++) { 3355 maxnz = PetscMax(maxnz,procsnz[i]); 3356 } 3357 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3358 3359 /* read in my part of the matrix column indices */ 3360 nz = procsnz[0]; 3361 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3362 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3363 3364 /* read in every one elses and ship off */ 3365 for (i=1; i<size; i++) { 3366 nz = procsnz[i]; 3367 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3368 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3369 } 3370 ierr = PetscFree(cols);CHKERRQ(ierr); 3371 } else { 3372 /* determine buffer space needed for message */ 3373 nz = 0; 3374 for (i=0; i<m; i++) { 3375 nz += ourlens[i]; 3376 } 3377 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3378 3379 /* receive message of column indices*/ 3380 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3381 } 3382 3383 /* determine column ownership if matrix is not square */ 3384 if (N != M) { 3385 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3386 else n = newMat->cmap->n; 3387 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3388 cstart = cend - n; 3389 } else { 3390 cstart = rstart; 3391 cend = rend; 3392 n = cend - cstart; 3393 } 3394 3395 /* loop over local rows, determining number of off diagonal entries */ 3396 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3397 jj = 0; 3398 for (i=0; i<m; i++) { 3399 for (j=0; j<ourlens[i]; j++) { 3400 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3401 jj++; 3402 } 3403 } 3404 3405 for (i=0; i<m; i++) { 3406 ourlens[i] -= offlens[i]; 3407 } 3408 if (!sizesset) { 3409 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3410 } 3411 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3412 3413 for (i=0; i<m; i++) { 3414 ourlens[i] += offlens[i]; 3415 } 3416 3417 if (!rank) { 3418 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3419 3420 /* read in my part of the matrix numerical values */ 3421 nz = procsnz[0]; 3422 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3423 3424 /* insert into matrix */ 3425 jj = rstart; 3426 smycols = mycols; 3427 svals = vals; 3428 for (i=0; i<m; i++) { 3429 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3430 smycols += ourlens[i]; 3431 svals += ourlens[i]; 3432 jj++; 3433 } 3434 3435 /* read in other processors and ship out */ 3436 for (i=1; i<size; i++) { 3437 nz = procsnz[i]; 3438 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3439 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3440 } 3441 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3442 } else { 3443 /* receive numeric values */ 3444 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3445 3446 /* receive message of values*/ 3447 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3448 3449 /* insert into matrix */ 3450 jj = rstart; 3451 smycols = mycols; 3452 svals = vals; 3453 for (i=0; i<m; i++) { 3454 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3455 smycols += ourlens[i]; 3456 svals += ourlens[i]; 3457 jj++; 3458 } 3459 } 3460 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3461 ierr = PetscFree(vals);CHKERRQ(ierr); 3462 ierr = PetscFree(mycols);CHKERRQ(ierr); 3463 ierr = PetscFree(rowners);CHKERRQ(ierr); 3464 3465 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3466 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3467 PetscFunctionReturn(0); 3468 } 3469 3470 #undef __FUNCT__ 3471 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3472 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3473 { 3474 PetscErrorCode ierr; 3475 IS iscol_local; 3476 PetscInt csize; 3477 3478 PetscFunctionBegin; 3479 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3480 if (call == MAT_REUSE_MATRIX) { 3481 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3482 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3483 } else { 3484 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3485 } 3486 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3487 if (call == MAT_INITIAL_MATRIX) { 3488 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3489 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3490 } 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3496 /* 3497 Not great since it makes two copies of the submatrix, first an SeqAIJ 3498 in local and then by concatenating the local matrices the end result. 3499 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3500 3501 Note: This requires a sequential iscol with all indices. 3502 */ 3503 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3504 { 3505 PetscErrorCode ierr; 3506 PetscMPIInt rank,size; 3507 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3508 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3509 Mat *local,M,Mreuse; 3510 MatScalar *vwork,*aa; 3511 MPI_Comm comm = ((PetscObject)mat)->comm; 3512 Mat_SeqAIJ *aij; 3513 3514 3515 PetscFunctionBegin; 3516 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3518 3519 if (call == MAT_REUSE_MATRIX) { 3520 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3521 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3522 local = &Mreuse; 3523 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3524 } else { 3525 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3526 Mreuse = *local; 3527 ierr = PetscFree(local);CHKERRQ(ierr); 3528 } 3529 3530 /* 3531 m - number of local rows 3532 n - number of columns (same on all processors) 3533 rstart - first row in new global matrix generated 3534 */ 3535 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3536 if (call == MAT_INITIAL_MATRIX) { 3537 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3538 ii = aij->i; 3539 jj = aij->j; 3540 3541 /* 3542 Determine the number of non-zeros in the diagonal and off-diagonal 3543 portions of the matrix in order to do correct preallocation 3544 */ 3545 3546 /* first get start and end of "diagonal" columns */ 3547 if (csize == PETSC_DECIDE) { 3548 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3549 if (mglobal == n) { /* square matrix */ 3550 nlocal = m; 3551 } else { 3552 nlocal = n/size + ((n % size) > rank); 3553 } 3554 } else { 3555 nlocal = csize; 3556 } 3557 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3558 rstart = rend - nlocal; 3559 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3560 3561 /* next, compute all the lengths */ 3562 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3563 olens = dlens + m; 3564 for (i=0; i<m; i++) { 3565 jend = ii[i+1] - ii[i]; 3566 olen = 0; 3567 dlen = 0; 3568 for (j=0; j<jend; j++) { 3569 if (*jj < rstart || *jj >= rend) olen++; 3570 else dlen++; 3571 jj++; 3572 } 3573 olens[i] = olen; 3574 dlens[i] = dlen; 3575 } 3576 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3577 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3578 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3579 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3580 ierr = PetscFree(dlens);CHKERRQ(ierr); 3581 } else { 3582 PetscInt ml,nl; 3583 3584 M = *newmat; 3585 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3586 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3587 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3588 /* 3589 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3590 rather than the slower MatSetValues(). 3591 */ 3592 M->was_assembled = PETSC_TRUE; 3593 M->assembled = PETSC_FALSE; 3594 } 3595 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3596 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3597 ii = aij->i; 3598 jj = aij->j; 3599 aa = aij->a; 3600 for (i=0; i<m; i++) { 3601 row = rstart + i; 3602 nz = ii[i+1] - ii[i]; 3603 cwork = jj; jj += nz; 3604 vwork = aa; aa += nz; 3605 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3606 } 3607 3608 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3609 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 *newmat = M; 3611 3612 /* save submatrix used in processor for next request */ 3613 if (call == MAT_INITIAL_MATRIX) { 3614 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3615 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3616 } 3617 3618 PetscFunctionReturn(0); 3619 } 3620 3621 EXTERN_C_BEGIN 3622 #undef __FUNCT__ 3623 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3624 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3625 { 3626 PetscInt m,cstart, cend,j,nnz,i,d; 3627 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3628 const PetscInt *JJ; 3629 PetscScalar *values; 3630 PetscErrorCode ierr; 3631 3632 PetscFunctionBegin; 3633 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3634 3635 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3636 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3637 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3638 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3639 m = B->rmap->n; 3640 cstart = B->cmap->rstart; 3641 cend = B->cmap->rend; 3642 rstart = B->rmap->rstart; 3643 3644 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3645 3646 #if defined(PETSC_USE_DEBUGGING) 3647 for (i=0; i<m; i++) { 3648 nnz = Ii[i+1]- Ii[i]; 3649 JJ = J + Ii[i]; 3650 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3651 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3652 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3653 } 3654 #endif 3655 3656 for (i=0; i<m; i++) { 3657 nnz = Ii[i+1]- Ii[i]; 3658 JJ = J + Ii[i]; 3659 nnz_max = PetscMax(nnz_max,nnz); 3660 d = 0; 3661 for (j=0; j<nnz; j++) { 3662 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3663 } 3664 d_nnz[i] = d; 3665 o_nnz[i] = nnz - d; 3666 } 3667 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3668 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3669 3670 if (v) values = (PetscScalar*)v; 3671 else { 3672 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3673 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3674 } 3675 3676 for (i=0; i<m; i++) { 3677 ii = i + rstart; 3678 nnz = Ii[i+1]- Ii[i]; 3679 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3680 } 3681 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3682 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3683 3684 if (!v) { 3685 ierr = PetscFree(values);CHKERRQ(ierr); 3686 } 3687 PetscFunctionReturn(0); 3688 } 3689 EXTERN_C_END 3690 3691 #undef __FUNCT__ 3692 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3693 /*@ 3694 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3695 (the default parallel PETSc format). 3696 3697 Collective on MPI_Comm 3698 3699 Input Parameters: 3700 + B - the matrix 3701 . i - the indices into j for the start of each local row (starts with zero) 3702 . j - the column indices for each local row (starts with zero) 3703 - v - optional values in the matrix 3704 3705 Level: developer 3706 3707 Notes: 3708 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3709 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3710 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3711 3712 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3713 3714 The format which is used for the sparse matrix input, is equivalent to a 3715 row-major ordering.. i.e for the following matrix, the input data expected is 3716 as shown: 3717 3718 1 0 0 3719 2 0 3 P0 3720 ------- 3721 4 5 6 P1 3722 3723 Process0 [P0]: rows_owned=[0,1] 3724 i = {0,1,3} [size = nrow+1 = 2+1] 3725 j = {0,0,2} [size = nz = 6] 3726 v = {1,2,3} [size = nz = 6] 3727 3728 Process1 [P1]: rows_owned=[2] 3729 i = {0,3} [size = nrow+1 = 1+1] 3730 j = {0,1,2} [size = nz = 6] 3731 v = {4,5,6} [size = nz = 6] 3732 3733 .keywords: matrix, aij, compressed row, sparse, parallel 3734 3735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3736 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3737 @*/ 3738 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3739 { 3740 PetscErrorCode ierr; 3741 3742 PetscFunctionBegin; 3743 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3744 PetscFunctionReturn(0); 3745 } 3746 3747 #undef __FUNCT__ 3748 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3749 /*@C 3750 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3751 (the default parallel PETSc format). For good matrix assembly performance 3752 the user should preallocate the matrix storage by setting the parameters 3753 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3754 performance can be increased by more than a factor of 50. 3755 3756 Collective on MPI_Comm 3757 3758 Input Parameters: 3759 + A - the matrix 3760 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3761 (same value is used for all local rows) 3762 . d_nnz - array containing the number of nonzeros in the various rows of the 3763 DIAGONAL portion of the local submatrix (possibly different for each row) 3764 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3765 The size of this array is equal to the number of local rows, i.e 'm'. 3766 You must leave room for the diagonal entry even if it is zero. 3767 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3768 submatrix (same value is used for all local rows). 3769 - o_nnz - array containing the number of nonzeros in the various rows of the 3770 OFF-DIAGONAL portion of the local submatrix (possibly different for 3771 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3772 structure. The size of this array is equal to the number 3773 of local rows, i.e 'm'. 3774 3775 If the *_nnz parameter is given then the *_nz parameter is ignored 3776 3777 The AIJ format (also called the Yale sparse matrix format or 3778 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3779 storage. The stored row and column indices begin with zero. 3780 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3781 3782 The parallel matrix is partitioned such that the first m0 rows belong to 3783 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3784 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3785 3786 The DIAGONAL portion of the local submatrix of a processor can be defined 3787 as the submatrix which is obtained by extraction the part corresponding to 3788 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3789 first row that belongs to the processor, r2 is the last row belonging to 3790 the this processor, and c1-c2 is range of indices of the local part of a 3791 vector suitable for applying the matrix to. This is an mxn matrix. In the 3792 common case of a square matrix, the row and column ranges are the same and 3793 the DIAGONAL part is also square. The remaining portion of the local 3794 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3795 3796 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3797 3798 You can call MatGetInfo() to get information on how effective the preallocation was; 3799 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3800 You can also run with the option -info and look for messages with the string 3801 malloc in them to see if additional memory allocation was needed. 3802 3803 Example usage: 3804 3805 Consider the following 8x8 matrix with 34 non-zero values, that is 3806 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3807 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3808 as follows: 3809 3810 .vb 3811 1 2 0 | 0 3 0 | 0 4 3812 Proc0 0 5 6 | 7 0 0 | 8 0 3813 9 0 10 | 11 0 0 | 12 0 3814 ------------------------------------- 3815 13 0 14 | 15 16 17 | 0 0 3816 Proc1 0 18 0 | 19 20 21 | 0 0 3817 0 0 0 | 22 23 0 | 24 0 3818 ------------------------------------- 3819 Proc2 25 26 27 | 0 0 28 | 29 0 3820 30 0 0 | 31 32 33 | 0 34 3821 .ve 3822 3823 This can be represented as a collection of submatrices as: 3824 3825 .vb 3826 A B C 3827 D E F 3828 G H I 3829 .ve 3830 3831 Where the submatrices A,B,C are owned by proc0, D,E,F are 3832 owned by proc1, G,H,I are owned by proc2. 3833 3834 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3835 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3836 The 'M','N' parameters are 8,8, and have the same values on all procs. 3837 3838 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3839 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3840 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3841 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3842 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3843 matrix, ans [DF] as another SeqAIJ matrix. 3844 3845 When d_nz, o_nz parameters are specified, d_nz storage elements are 3846 allocated for every row of the local diagonal submatrix, and o_nz 3847 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3848 One way to choose d_nz and o_nz is to use the max nonzerors per local 3849 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3850 In this case, the values of d_nz,o_nz are: 3851 .vb 3852 proc0 : dnz = 2, o_nz = 2 3853 proc1 : dnz = 3, o_nz = 2 3854 proc2 : dnz = 1, o_nz = 4 3855 .ve 3856 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3857 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3858 for proc3. i.e we are using 12+15+10=37 storage locations to store 3859 34 values. 3860 3861 When d_nnz, o_nnz parameters are specified, the storage is specified 3862 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3863 In the above case the values for d_nnz,o_nnz are: 3864 .vb 3865 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3866 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3867 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3868 .ve 3869 Here the space allocated is sum of all the above values i.e 34, and 3870 hence pre-allocation is perfect. 3871 3872 Level: intermediate 3873 3874 .keywords: matrix, aij, compressed row, sparse, parallel 3875 3876 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3877 MPIAIJ, MatGetInfo() 3878 @*/ 3879 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3880 { 3881 PetscErrorCode ierr; 3882 3883 PetscFunctionBegin; 3884 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3885 PetscValidType(B,1); 3886 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3887 PetscFunctionReturn(0); 3888 } 3889 3890 #undef __FUNCT__ 3891 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3892 /*@ 3893 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3894 CSR format the local rows. 3895 3896 Collective on MPI_Comm 3897 3898 Input Parameters: 3899 + comm - MPI communicator 3900 . m - number of local rows (Cannot be PETSC_DECIDE) 3901 . n - This value should be the same as the local size used in creating the 3902 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3903 calculated if N is given) For square matrices n is almost always m. 3904 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3905 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3906 . i - row indices 3907 . j - column indices 3908 - a - matrix values 3909 3910 Output Parameter: 3911 . mat - the matrix 3912 3913 Level: intermediate 3914 3915 Notes: 3916 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3917 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3918 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3919 3920 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3921 3922 The format which is used for the sparse matrix input, is equivalent to a 3923 row-major ordering.. i.e for the following matrix, the input data expected is 3924 as shown: 3925 3926 1 0 0 3927 2 0 3 P0 3928 ------- 3929 4 5 6 P1 3930 3931 Process0 [P0]: rows_owned=[0,1] 3932 i = {0,1,3} [size = nrow+1 = 2+1] 3933 j = {0,0,2} [size = nz = 6] 3934 v = {1,2,3} [size = nz = 6] 3935 3936 Process1 [P1]: rows_owned=[2] 3937 i = {0,3} [size = nrow+1 = 1+1] 3938 j = {0,1,2} [size = nz = 6] 3939 v = {4,5,6} [size = nz = 6] 3940 3941 .keywords: matrix, aij, compressed row, sparse, parallel 3942 3943 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3944 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3945 @*/ 3946 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3947 { 3948 PetscErrorCode ierr; 3949 3950 PetscFunctionBegin; 3951 if (i[0]) { 3952 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3953 } 3954 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3955 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3956 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3957 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3958 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3959 PetscFunctionReturn(0); 3960 } 3961 3962 #undef __FUNCT__ 3963 #define __FUNCT__ "MatCreateMPIAIJ" 3964 /*@C 3965 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3966 (the default parallel PETSc format). For good matrix assembly performance 3967 the user should preallocate the matrix storage by setting the parameters 3968 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3969 performance can be increased by more than a factor of 50. 3970 3971 Collective on MPI_Comm 3972 3973 Input Parameters: 3974 + comm - MPI communicator 3975 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3976 This value should be the same as the local size used in creating the 3977 y vector for the matrix-vector product y = Ax. 3978 . n - This value should be the same as the local size used in creating the 3979 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3980 calculated if N is given) For square matrices n is almost always m. 3981 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3982 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3983 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3984 (same value is used for all local rows) 3985 . d_nnz - array containing the number of nonzeros in the various rows of the 3986 DIAGONAL portion of the local submatrix (possibly different for each row) 3987 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3988 The size of this array is equal to the number of local rows, i.e 'm'. 3989 You must leave room for the diagonal entry even if it is zero. 3990 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3991 submatrix (same value is used for all local rows). 3992 - o_nnz - array containing the number of nonzeros in the various rows of the 3993 OFF-DIAGONAL portion of the local submatrix (possibly different for 3994 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3995 structure. The size of this array is equal to the number 3996 of local rows, i.e 'm'. 3997 3998 Output Parameter: 3999 . A - the matrix 4000 4001 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4002 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4003 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4004 4005 Notes: 4006 If the *_nnz parameter is given then the *_nz parameter is ignored 4007 4008 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4009 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4010 storage requirements for this matrix. 4011 4012 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4013 processor than it must be used on all processors that share the object for 4014 that argument. 4015 4016 The user MUST specify either the local or global matrix dimensions 4017 (possibly both). 4018 4019 The parallel matrix is partitioned across processors such that the 4020 first m0 rows belong to process 0, the next m1 rows belong to 4021 process 1, the next m2 rows belong to process 2 etc.. where 4022 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4023 values corresponding to [m x N] submatrix. 4024 4025 The columns are logically partitioned with the n0 columns belonging 4026 to 0th partition, the next n1 columns belonging to the next 4027 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4028 4029 The DIAGONAL portion of the local submatrix on any given processor 4030 is the submatrix corresponding to the rows and columns m,n 4031 corresponding to the given processor. i.e diagonal matrix on 4032 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4033 etc. The remaining portion of the local submatrix [m x (N-n)] 4034 constitute the OFF-DIAGONAL portion. The example below better 4035 illustrates this concept. 4036 4037 For a square global matrix we define each processor's diagonal portion 4038 to be its local rows and the corresponding columns (a square submatrix); 4039 each processor's off-diagonal portion encompasses the remainder of the 4040 local matrix (a rectangular submatrix). 4041 4042 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4043 4044 When calling this routine with a single process communicator, a matrix of 4045 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4046 type of communicator, use the construction mechanism: 4047 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4048 4049 By default, this format uses inodes (identical nodes) when possible. 4050 We search for consecutive rows with the same nonzero structure, thereby 4051 reusing matrix information to achieve increased efficiency. 4052 4053 Options Database Keys: 4054 + -mat_no_inode - Do not use inodes 4055 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4056 - -mat_aij_oneindex - Internally use indexing starting at 1 4057 rather than 0. Note that when calling MatSetValues(), 4058 the user still MUST index entries starting at 0! 4059 4060 4061 Example usage: 4062 4063 Consider the following 8x8 matrix with 34 non-zero values, that is 4064 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4065 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4066 as follows: 4067 4068 .vb 4069 1 2 0 | 0 3 0 | 0 4 4070 Proc0 0 5 6 | 7 0 0 | 8 0 4071 9 0 10 | 11 0 0 | 12 0 4072 ------------------------------------- 4073 13 0 14 | 15 16 17 | 0 0 4074 Proc1 0 18 0 | 19 20 21 | 0 0 4075 0 0 0 | 22 23 0 | 24 0 4076 ------------------------------------- 4077 Proc2 25 26 27 | 0 0 28 | 29 0 4078 30 0 0 | 31 32 33 | 0 34 4079 .ve 4080 4081 This can be represented as a collection of submatrices as: 4082 4083 .vb 4084 A B C 4085 D E F 4086 G H I 4087 .ve 4088 4089 Where the submatrices A,B,C are owned by proc0, D,E,F are 4090 owned by proc1, G,H,I are owned by proc2. 4091 4092 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4093 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4094 The 'M','N' parameters are 8,8, and have the same values on all procs. 4095 4096 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4097 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4098 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4099 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4100 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4101 matrix, ans [DF] as another SeqAIJ matrix. 4102 4103 When d_nz, o_nz parameters are specified, d_nz storage elements are 4104 allocated for every row of the local diagonal submatrix, and o_nz 4105 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4106 One way to choose d_nz and o_nz is to use the max nonzerors per local 4107 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4108 In this case, the values of d_nz,o_nz are: 4109 .vb 4110 proc0 : dnz = 2, o_nz = 2 4111 proc1 : dnz = 3, o_nz = 2 4112 proc2 : dnz = 1, o_nz = 4 4113 .ve 4114 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4115 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4116 for proc3. i.e we are using 12+15+10=37 storage locations to store 4117 34 values. 4118 4119 When d_nnz, o_nnz parameters are specified, the storage is specified 4120 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4121 In the above case the values for d_nnz,o_nnz are: 4122 .vb 4123 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4124 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4125 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4126 .ve 4127 Here the space allocated is sum of all the above values i.e 34, and 4128 hence pre-allocation is perfect. 4129 4130 Level: intermediate 4131 4132 .keywords: matrix, aij, compressed row, sparse, parallel 4133 4134 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4135 MPIAIJ, MatCreateMPIAIJWithArrays() 4136 @*/ 4137 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4138 { 4139 PetscErrorCode ierr; 4140 PetscMPIInt size; 4141 4142 PetscFunctionBegin; 4143 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4144 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4145 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4146 if (size > 1) { 4147 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4148 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4149 } else { 4150 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4151 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4152 } 4153 PetscFunctionReturn(0); 4154 } 4155 4156 #undef __FUNCT__ 4157 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4158 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4159 { 4160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4161 4162 PetscFunctionBegin; 4163 *Ad = a->A; 4164 *Ao = a->B; 4165 *colmap = a->garray; 4166 PetscFunctionReturn(0); 4167 } 4168 4169 #undef __FUNCT__ 4170 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4171 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4172 { 4173 PetscErrorCode ierr; 4174 PetscInt i; 4175 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4176 4177 PetscFunctionBegin; 4178 if (coloring->ctype == IS_COLORING_GLOBAL) { 4179 ISColoringValue *allcolors,*colors; 4180 ISColoring ocoloring; 4181 4182 /* set coloring for diagonal portion */ 4183 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4184 4185 /* set coloring for off-diagonal portion */ 4186 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4187 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4188 for (i=0; i<a->B->cmap->n; i++) { 4189 colors[i] = allcolors[a->garray[i]]; 4190 } 4191 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4192 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4193 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4194 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4195 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4196 ISColoringValue *colors; 4197 PetscInt *larray; 4198 ISColoring ocoloring; 4199 4200 /* set coloring for diagonal portion */ 4201 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4202 for (i=0; i<a->A->cmap->n; i++) { 4203 larray[i] = i + A->cmap->rstart; 4204 } 4205 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4206 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4207 for (i=0; i<a->A->cmap->n; i++) { 4208 colors[i] = coloring->colors[larray[i]]; 4209 } 4210 ierr = PetscFree(larray);CHKERRQ(ierr); 4211 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4212 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4213 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4214 4215 /* set coloring for off-diagonal portion */ 4216 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4217 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4218 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4219 for (i=0; i<a->B->cmap->n; i++) { 4220 colors[i] = coloring->colors[larray[i]]; 4221 } 4222 ierr = PetscFree(larray);CHKERRQ(ierr); 4223 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4224 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4225 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4226 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4227 4228 PetscFunctionReturn(0); 4229 } 4230 4231 #if defined(PETSC_HAVE_ADIC) 4232 #undef __FUNCT__ 4233 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4234 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4235 { 4236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4237 PetscErrorCode ierr; 4238 4239 PetscFunctionBegin; 4240 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4241 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4242 PetscFunctionReturn(0); 4243 } 4244 #endif 4245 4246 #undef __FUNCT__ 4247 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4248 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4249 { 4250 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4251 PetscErrorCode ierr; 4252 4253 PetscFunctionBegin; 4254 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4255 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4256 PetscFunctionReturn(0); 4257 } 4258 4259 #undef __FUNCT__ 4260 #define __FUNCT__ "MatMergeSymbolic" 4261 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4262 { 4263 PetscErrorCode ierr; 4264 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4265 PetscInt *indx; 4266 4267 PetscFunctionBegin; 4268 /* This routine will ONLY return MPIAIJ type matrix */ 4269 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4270 if (n == PETSC_DECIDE){ 4271 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4272 } 4273 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4274 rstart -= m; 4275 4276 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4277 for (i=0;i<m;i++) { 4278 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4279 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4280 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4281 } 4282 4283 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4284 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4285 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4286 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4287 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4288 PetscFunctionReturn(0); 4289 } 4290 4291 #undef __FUNCT__ 4292 #define __FUNCT__ "MatMergeNumeric" 4293 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4294 { 4295 PetscErrorCode ierr; 4296 PetscInt m,N,i,rstart,nnz,Ii; 4297 PetscInt *indx; 4298 PetscScalar *values; 4299 4300 PetscFunctionBegin; 4301 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4302 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4303 for (i=0;i<m;i++) { 4304 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4305 Ii = i + rstart; 4306 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4307 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4308 } 4309 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4310 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4311 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4312 PetscFunctionReturn(0); 4313 } 4314 4315 #undef __FUNCT__ 4316 #define __FUNCT__ "MatMerge" 4317 /*@ 4318 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4319 matrices from each processor 4320 4321 Collective on MPI_Comm 4322 4323 Input Parameters: 4324 + comm - the communicators the parallel matrix will live on 4325 . inmat - the input sequential matrices 4326 . n - number of local columns (or PETSC_DECIDE) 4327 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4328 4329 Output Parameter: 4330 . outmat - the parallel matrix generated 4331 4332 Level: advanced 4333 4334 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4335 4336 @*/ 4337 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4338 { 4339 PetscErrorCode ierr; 4340 4341 PetscFunctionBegin; 4342 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4343 if (scall == MAT_INITIAL_MATRIX){ 4344 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4345 } 4346 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4347 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4348 PetscFunctionReturn(0); 4349 } 4350 4351 #undef __FUNCT__ 4352 #define __FUNCT__ "MatFileSplit" 4353 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4354 { 4355 PetscErrorCode ierr; 4356 PetscMPIInt rank; 4357 PetscInt m,N,i,rstart,nnz; 4358 size_t len; 4359 const PetscInt *indx; 4360 PetscViewer out; 4361 char *name; 4362 Mat B; 4363 const PetscScalar *values; 4364 4365 PetscFunctionBegin; 4366 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4367 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4368 /* Should this be the type of the diagonal block of A? */ 4369 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4370 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4371 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4372 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4373 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4374 for (i=0;i<m;i++) { 4375 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4376 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4377 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4378 } 4379 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4380 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4381 4382 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4383 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4384 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4385 sprintf(name,"%s.%d",outfile,rank); 4386 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4387 ierr = PetscFree(name); 4388 ierr = MatView(B,out);CHKERRQ(ierr); 4389 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4390 ierr = MatDestroy(&B);CHKERRQ(ierr); 4391 PetscFunctionReturn(0); 4392 } 4393 4394 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4395 #undef __FUNCT__ 4396 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4397 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4398 { 4399 PetscErrorCode ierr; 4400 Mat_Merge_SeqsToMPI *merge; 4401 PetscContainer container; 4402 4403 PetscFunctionBegin; 4404 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4405 if (container) { 4406 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4407 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4408 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4409 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4410 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4411 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4412 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4419 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4420 ierr = PetscFree(merge);CHKERRQ(ierr); 4421 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4422 } 4423 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 #include <../src/mat/utils/freespace.h> 4428 #include <petscbt.h> 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4432 /*@C 4433 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4434 matrices from each processor 4435 4436 Collective on MPI_Comm 4437 4438 Input Parameters: 4439 + comm - the communicators the parallel matrix will live on 4440 . seqmat - the input sequential matrices 4441 . m - number of local rows (or PETSC_DECIDE) 4442 . n - number of local columns (or PETSC_DECIDE) 4443 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4444 4445 Output Parameter: 4446 . mpimat - the parallel matrix generated 4447 4448 Level: advanced 4449 4450 Notes: 4451 The dimensions of the sequential matrix in each processor MUST be the same. 4452 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4453 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4454 @*/ 4455 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4456 { 4457 PetscErrorCode ierr; 4458 MPI_Comm comm=((PetscObject)mpimat)->comm; 4459 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4460 PetscMPIInt size,rank,taga,*len_s; 4461 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4462 PetscInt proc,m; 4463 PetscInt **buf_ri,**buf_rj; 4464 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4465 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4466 MPI_Request *s_waits,*r_waits; 4467 MPI_Status *status; 4468 MatScalar *aa=a->a; 4469 MatScalar **abuf_r,*ba_i; 4470 Mat_Merge_SeqsToMPI *merge; 4471 PetscContainer container; 4472 4473 PetscFunctionBegin; 4474 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4475 4476 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4477 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4478 4479 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4480 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4481 4482 bi = merge->bi; 4483 bj = merge->bj; 4484 buf_ri = merge->buf_ri; 4485 buf_rj = merge->buf_rj; 4486 4487 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4488 owners = merge->rowmap->range; 4489 len_s = merge->len_s; 4490 4491 /* send and recv matrix values */ 4492 /*-----------------------------*/ 4493 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4494 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4495 4496 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4497 for (proc=0,k=0; proc<size; proc++){ 4498 if (!len_s[proc]) continue; 4499 i = owners[proc]; 4500 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4501 k++; 4502 } 4503 4504 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4505 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4506 ierr = PetscFree(status);CHKERRQ(ierr); 4507 4508 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4509 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4510 4511 /* insert mat values of mpimat */ 4512 /*----------------------------*/ 4513 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4514 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4515 4516 for (k=0; k<merge->nrecv; k++){ 4517 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4518 nrows = *(buf_ri_k[k]); 4519 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4520 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4521 } 4522 4523 /* set values of ba */ 4524 m = merge->rowmap->n; 4525 for (i=0; i<m; i++) { 4526 arow = owners[rank] + i; 4527 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4528 bnzi = bi[i+1] - bi[i]; 4529 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4530 4531 /* add local non-zero vals of this proc's seqmat into ba */ 4532 anzi = ai[arow+1] - ai[arow]; 4533 aj = a->j + ai[arow]; 4534 aa = a->a + ai[arow]; 4535 nextaj = 0; 4536 for (j=0; nextaj<anzi; j++){ 4537 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4538 ba_i[j] += aa[nextaj++]; 4539 } 4540 } 4541 4542 /* add received vals into ba */ 4543 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4544 /* i-th row */ 4545 if (i == *nextrow[k]) { 4546 anzi = *(nextai[k]+1) - *nextai[k]; 4547 aj = buf_rj[k] + *(nextai[k]); 4548 aa = abuf_r[k] + *(nextai[k]); 4549 nextaj = 0; 4550 for (j=0; nextaj<anzi; j++){ 4551 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4552 ba_i[j] += aa[nextaj++]; 4553 } 4554 } 4555 nextrow[k]++; nextai[k]++; 4556 } 4557 } 4558 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4559 } 4560 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4561 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4562 4563 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4564 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4565 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4566 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4567 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4572 4573 #undef __FUNCT__ 4574 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4575 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4576 { 4577 PetscErrorCode ierr; 4578 Mat B_mpi; 4579 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4580 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4581 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4582 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4583 PetscInt len,proc,*dnz,*onz; 4584 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4585 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4586 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4587 MPI_Status *status; 4588 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4589 PetscBT lnkbt; 4590 Mat_Merge_SeqsToMPI *merge; 4591 PetscContainer container; 4592 4593 PetscFunctionBegin; 4594 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4595 4596 /* make sure it is a PETSc comm */ 4597 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4600 4601 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4602 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4603 4604 /* determine row ownership */ 4605 /*---------------------------------------------------------*/ 4606 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4607 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4608 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4609 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4610 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4611 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4612 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4613 4614 m = merge->rowmap->n; 4615 M = merge->rowmap->N; 4616 owners = merge->rowmap->range; 4617 4618 /* determine the number of messages to send, their lengths */ 4619 /*---------------------------------------------------------*/ 4620 len_s = merge->len_s; 4621 4622 len = 0; /* length of buf_si[] */ 4623 merge->nsend = 0; 4624 for (proc=0; proc<size; proc++){ 4625 len_si[proc] = 0; 4626 if (proc == rank){ 4627 len_s[proc] = 0; 4628 } else { 4629 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4630 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4631 } 4632 if (len_s[proc]) { 4633 merge->nsend++; 4634 nrows = 0; 4635 for (i=owners[proc]; i<owners[proc+1]; i++){ 4636 if (ai[i+1] > ai[i]) nrows++; 4637 } 4638 len_si[proc] = 2*(nrows+1); 4639 len += len_si[proc]; 4640 } 4641 } 4642 4643 /* determine the number and length of messages to receive for ij-structure */ 4644 /*-------------------------------------------------------------------------*/ 4645 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4646 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4647 4648 /* post the Irecv of j-structure */ 4649 /*-------------------------------*/ 4650 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4651 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4652 4653 /* post the Isend of j-structure */ 4654 /*--------------------------------*/ 4655 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4656 4657 for (proc=0, k=0; proc<size; proc++){ 4658 if (!len_s[proc]) continue; 4659 i = owners[proc]; 4660 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4661 k++; 4662 } 4663 4664 /* receives and sends of j-structure are complete */ 4665 /*------------------------------------------------*/ 4666 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4667 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4668 4669 /* send and recv i-structure */ 4670 /*---------------------------*/ 4671 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4672 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4673 4674 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4675 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4676 for (proc=0,k=0; proc<size; proc++){ 4677 if (!len_s[proc]) continue; 4678 /* form outgoing message for i-structure: 4679 buf_si[0]: nrows to be sent 4680 [1:nrows]: row index (global) 4681 [nrows+1:2*nrows+1]: i-structure index 4682 */ 4683 /*-------------------------------------------*/ 4684 nrows = len_si[proc]/2 - 1; 4685 buf_si_i = buf_si + nrows+1; 4686 buf_si[0] = nrows; 4687 buf_si_i[0] = 0; 4688 nrows = 0; 4689 for (i=owners[proc]; i<owners[proc+1]; i++){ 4690 anzi = ai[i+1] - ai[i]; 4691 if (anzi) { 4692 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4693 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4694 nrows++; 4695 } 4696 } 4697 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4698 k++; 4699 buf_si += len_si[proc]; 4700 } 4701 4702 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4703 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4704 4705 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4706 for (i=0; i<merge->nrecv; i++){ 4707 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4708 } 4709 4710 ierr = PetscFree(len_si);CHKERRQ(ierr); 4711 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4712 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4713 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4714 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4715 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4716 ierr = PetscFree(status);CHKERRQ(ierr); 4717 4718 /* compute a local seq matrix in each processor */ 4719 /*----------------------------------------------*/ 4720 /* allocate bi array and free space for accumulating nonzero column info */ 4721 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4722 bi[0] = 0; 4723 4724 /* create and initialize a linked list */ 4725 nlnk = N+1; 4726 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4727 4728 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4729 len = 0; 4730 len = ai[owners[rank+1]] - ai[owners[rank]]; 4731 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4732 current_space = free_space; 4733 4734 /* determine symbolic info for each local row */ 4735 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4736 4737 for (k=0; k<merge->nrecv; k++){ 4738 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4739 nrows = *buf_ri_k[k]; 4740 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4741 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4742 } 4743 4744 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4745 len = 0; 4746 for (i=0;i<m;i++) { 4747 bnzi = 0; 4748 /* add local non-zero cols of this proc's seqmat into lnk */ 4749 arow = owners[rank] + i; 4750 anzi = ai[arow+1] - ai[arow]; 4751 aj = a->j + ai[arow]; 4752 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4753 bnzi += nlnk; 4754 /* add received col data into lnk */ 4755 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4756 if (i == *nextrow[k]) { /* i-th row */ 4757 anzi = *(nextai[k]+1) - *nextai[k]; 4758 aj = buf_rj[k] + *nextai[k]; 4759 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4760 bnzi += nlnk; 4761 nextrow[k]++; nextai[k]++; 4762 } 4763 } 4764 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4765 4766 /* if free space is not available, make more free space */ 4767 if (current_space->local_remaining<bnzi) { 4768 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4769 nspacedouble++; 4770 } 4771 /* copy data into free space, then initialize lnk */ 4772 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4773 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4774 4775 current_space->array += bnzi; 4776 current_space->local_used += bnzi; 4777 current_space->local_remaining -= bnzi; 4778 4779 bi[i+1] = bi[i] + bnzi; 4780 } 4781 4782 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4783 4784 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4785 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4786 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4787 4788 /* create symbolic parallel matrix B_mpi */ 4789 /*---------------------------------------*/ 4790 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4791 if (n==PETSC_DECIDE) { 4792 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4793 } else { 4794 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4795 } 4796 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4797 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4798 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4799 4800 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4801 B_mpi->assembled = PETSC_FALSE; 4802 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4803 merge->bi = bi; 4804 merge->bj = bj; 4805 merge->buf_ri = buf_ri; 4806 merge->buf_rj = buf_rj; 4807 merge->coi = PETSC_NULL; 4808 merge->coj = PETSC_NULL; 4809 merge->owners_co = PETSC_NULL; 4810 4811 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4812 4813 /* attach the supporting struct to B_mpi for reuse */ 4814 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4815 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4816 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4817 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4818 *mpimat = B_mpi; 4819 4820 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4821 PetscFunctionReturn(0); 4822 } 4823 4824 #undef __FUNCT__ 4825 #define __FUNCT__ "MatMerge_SeqsToMPI" 4826 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4827 { 4828 PetscErrorCode ierr; 4829 4830 PetscFunctionBegin; 4831 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4832 if (scall == MAT_INITIAL_MATRIX){ 4833 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4834 } 4835 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4836 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4837 PetscFunctionReturn(0); 4838 } 4839 4840 #undef __FUNCT__ 4841 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4842 /*@ 4843 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4844 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4845 with MatGetSize() 4846 4847 Not Collective 4848 4849 Input Parameters: 4850 + A - the matrix 4851 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4852 4853 Output Parameter: 4854 . A_loc - the local sequential matrix generated 4855 4856 Level: developer 4857 4858 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4859 4860 @*/ 4861 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4862 { 4863 PetscErrorCode ierr; 4864 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4865 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4866 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4867 MatScalar *aa=a->a,*ba=b->a,*cam; 4868 PetscScalar *ca; 4869 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4870 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4871 PetscBool match; 4872 4873 PetscFunctionBegin; 4874 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4875 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4876 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4877 if (scall == MAT_INITIAL_MATRIX){ 4878 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4879 ci[0] = 0; 4880 for (i=0; i<am; i++){ 4881 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4882 } 4883 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4884 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4885 k = 0; 4886 for (i=0; i<am; i++) { 4887 ncols_o = bi[i+1] - bi[i]; 4888 ncols_d = ai[i+1] - ai[i]; 4889 /* off-diagonal portion of A */ 4890 for (jo=0; jo<ncols_o; jo++) { 4891 col = cmap[*bj]; 4892 if (col >= cstart) break; 4893 cj[k] = col; bj++; 4894 ca[k++] = *ba++; 4895 } 4896 /* diagonal portion of A */ 4897 for (j=0; j<ncols_d; j++) { 4898 cj[k] = cstart + *aj++; 4899 ca[k++] = *aa++; 4900 } 4901 /* off-diagonal portion of A */ 4902 for (j=jo; j<ncols_o; j++) { 4903 cj[k] = cmap[*bj++]; 4904 ca[k++] = *ba++; 4905 } 4906 } 4907 /* put together the new matrix */ 4908 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4909 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4910 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4911 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4912 mat->free_a = PETSC_TRUE; 4913 mat->free_ij = PETSC_TRUE; 4914 mat->nonew = 0; 4915 } else if (scall == MAT_REUSE_MATRIX){ 4916 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4917 ci = mat->i; cj = mat->j; cam = mat->a; 4918 for (i=0; i<am; i++) { 4919 /* off-diagonal portion of A */ 4920 ncols_o = bi[i+1] - bi[i]; 4921 for (jo=0; jo<ncols_o; jo++) { 4922 col = cmap[*bj]; 4923 if (col >= cstart) break; 4924 *cam++ = *ba++; bj++; 4925 } 4926 /* diagonal portion of A */ 4927 ncols_d = ai[i+1] - ai[i]; 4928 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4929 /* off-diagonal portion of A */ 4930 for (j=jo; j<ncols_o; j++) { 4931 *cam++ = *ba++; bj++; 4932 } 4933 } 4934 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4935 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4936 PetscFunctionReturn(0); 4937 } 4938 4939 #undef __FUNCT__ 4940 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4941 /*@C 4942 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4943 4944 Not Collective 4945 4946 Input Parameters: 4947 + A - the matrix 4948 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4949 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4950 4951 Output Parameter: 4952 . A_loc - the local sequential matrix generated 4953 4954 Level: developer 4955 4956 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4957 4958 @*/ 4959 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4960 { 4961 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4962 PetscErrorCode ierr; 4963 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4964 IS isrowa,iscola; 4965 Mat *aloc; 4966 PetscBool match; 4967 4968 PetscFunctionBegin; 4969 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4970 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4971 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4972 if (!row){ 4973 start = A->rmap->rstart; end = A->rmap->rend; 4974 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4975 } else { 4976 isrowa = *row; 4977 } 4978 if (!col){ 4979 start = A->cmap->rstart; 4980 cmap = a->garray; 4981 nzA = a->A->cmap->n; 4982 nzB = a->B->cmap->n; 4983 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4984 ncols = 0; 4985 for (i=0; i<nzB; i++) { 4986 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4987 else break; 4988 } 4989 imark = i; 4990 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4991 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4992 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4993 } else { 4994 iscola = *col; 4995 } 4996 if (scall != MAT_INITIAL_MATRIX){ 4997 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4998 aloc[0] = *A_loc; 4999 } 5000 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5001 *A_loc = aloc[0]; 5002 ierr = PetscFree(aloc);CHKERRQ(ierr); 5003 if (!row){ 5004 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5005 } 5006 if (!col){ 5007 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5008 } 5009 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5010 PetscFunctionReturn(0); 5011 } 5012 5013 #undef __FUNCT__ 5014 #define __FUNCT__ "MatGetBrowsOfAcols" 5015 /*@C 5016 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5017 5018 Collective on Mat 5019 5020 Input Parameters: 5021 + A,B - the matrices in mpiaij format 5022 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5023 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5024 5025 Output Parameter: 5026 + rowb, colb - index sets of rows and columns of B to extract 5027 - B_seq - the sequential matrix generated 5028 5029 Level: developer 5030 5031 @*/ 5032 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5033 { 5034 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5035 PetscErrorCode ierr; 5036 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5037 IS isrowb,iscolb; 5038 Mat *bseq=PETSC_NULL; 5039 5040 PetscFunctionBegin; 5041 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5042 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5043 } 5044 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5045 5046 if (scall == MAT_INITIAL_MATRIX){ 5047 start = A->cmap->rstart; 5048 cmap = a->garray; 5049 nzA = a->A->cmap->n; 5050 nzB = a->B->cmap->n; 5051 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5052 ncols = 0; 5053 for (i=0; i<nzB; i++) { /* row < local row index */ 5054 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5055 else break; 5056 } 5057 imark = i; 5058 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5059 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5060 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5061 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5062 } else { 5063 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5064 isrowb = *rowb; iscolb = *colb; 5065 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5066 bseq[0] = *B_seq; 5067 } 5068 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5069 *B_seq = bseq[0]; 5070 ierr = PetscFree(bseq);CHKERRQ(ierr); 5071 if (!rowb){ 5072 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5073 } else { 5074 *rowb = isrowb; 5075 } 5076 if (!colb){ 5077 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5078 } else { 5079 *colb = iscolb; 5080 } 5081 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5082 PetscFunctionReturn(0); 5083 } 5084 5085 #undef __FUNCT__ 5086 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5087 /* 5088 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5089 of the OFF-DIAGONAL portion of local A 5090 5091 Collective on Mat 5092 5093 Input Parameters: 5094 + A,B - the matrices in mpiaij format 5095 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5096 5097 Output Parameter: 5098 + startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5099 . startsj_r - similar to startsj for receives 5100 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5101 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5102 5103 Level: developer 5104 5105 */ 5106 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5107 { 5108 VecScatter_MPI_General *gen_to,*gen_from; 5109 PetscErrorCode ierr; 5110 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5111 Mat_SeqAIJ *b_oth; 5112 VecScatter ctx=a->Mvctx; 5113 MPI_Comm comm=((PetscObject)ctx)->comm; 5114 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5115 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5116 PetscScalar *rvalues,*svalues; 5117 MatScalar *b_otha,*bufa,*bufA; 5118 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5119 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5120 MPI_Status *sstatus,rstatus; 5121 PetscMPIInt jj; 5122 PetscInt *cols,sbs,rbs; 5123 PetscScalar *vals; 5124 5125 PetscFunctionBegin; 5126 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5127 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5128 } 5129 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5130 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5131 5132 gen_to = (VecScatter_MPI_General*)ctx->todata; 5133 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5134 rvalues = gen_from->values; /* holds the length of receiving row */ 5135 svalues = gen_to->values; /* holds the length of sending row */ 5136 nrecvs = gen_from->n; 5137 nsends = gen_to->n; 5138 5139 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5140 srow = gen_to->indices; /* local row index to be sent */ 5141 sstarts = gen_to->starts; 5142 sprocs = gen_to->procs; 5143 sstatus = gen_to->sstatus; 5144 sbs = gen_to->bs; 5145 rstarts = gen_from->starts; 5146 rprocs = gen_from->procs; 5147 rbs = gen_from->bs; 5148 5149 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5150 if (scall == MAT_INITIAL_MATRIX){ 5151 /* i-array */ 5152 /*---------*/ 5153 /* post receives */ 5154 for (i=0; i<nrecvs; i++){ 5155 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5156 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5157 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5158 } 5159 5160 /* pack the outgoing message */ 5161 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5162 sstartsj[0] = 0; rstartsj[0] = 0; 5163 len = 0; /* total length of j or a array to be sent */ 5164 k = 0; 5165 for (i=0; i<nsends; i++){ 5166 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5167 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5168 for (j=0; j<nrows; j++) { 5169 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5170 for (l=0; l<sbs; l++){ 5171 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5172 rowlen[j*sbs+l] = ncols; 5173 len += ncols; 5174 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5175 } 5176 k++; 5177 } 5178 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5179 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5180 } 5181 /* recvs and sends of i-array are completed */ 5182 i = nrecvs; 5183 while (i--) { 5184 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5185 } 5186 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5187 5188 /* allocate buffers for sending j and a arrays */ 5189 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5190 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5191 5192 /* create i-array of B_oth */ 5193 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5194 b_othi[0] = 0; 5195 len = 0; /* total length of j or a array to be received */ 5196 k = 0; 5197 for (i=0; i<nrecvs; i++){ 5198 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5199 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5200 for (j=0; j<nrows; j++) { 5201 b_othi[k+1] = b_othi[k] + rowlen[j]; 5202 len += rowlen[j]; k++; 5203 } 5204 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5205 } 5206 5207 /* allocate space for j and a arrrays of B_oth */ 5208 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5209 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5210 5211 /* j-array */ 5212 /*---------*/ 5213 /* post receives of j-array */ 5214 for (i=0; i<nrecvs; i++){ 5215 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5216 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5217 } 5218 5219 /* pack the outgoing message j-array */ 5220 k = 0; 5221 for (i=0; i<nsends; i++){ 5222 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5223 bufJ = bufj+sstartsj[i]; 5224 for (j=0; j<nrows; j++) { 5225 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5226 for (ll=0; ll<sbs; ll++){ 5227 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5228 for (l=0; l<ncols; l++){ 5229 *bufJ++ = cols[l]; 5230 } 5231 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5232 } 5233 } 5234 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5235 } 5236 5237 /* recvs and sends of j-array are completed */ 5238 i = nrecvs; 5239 while (i--) { 5240 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5241 } 5242 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5243 } else if (scall == MAT_REUSE_MATRIX){ 5244 sstartsj = *startsj; 5245 rstartsj = *startsj_r; 5246 bufa = *bufa_ptr; 5247 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5248 b_otha = b_oth->a; 5249 } else { 5250 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5251 } 5252 5253 /* a-array */ 5254 /*---------*/ 5255 /* post receives of a-array */ 5256 for (i=0; i<nrecvs; i++){ 5257 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5258 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5259 } 5260 5261 /* pack the outgoing message a-array */ 5262 k = 0; 5263 for (i=0; i<nsends; i++){ 5264 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5265 bufA = bufa+sstartsj[i]; 5266 for (j=0; j<nrows; j++) { 5267 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5268 for (ll=0; ll<sbs; ll++){ 5269 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5270 for (l=0; l<ncols; l++){ 5271 *bufA++ = vals[l]; 5272 } 5273 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5274 } 5275 } 5276 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5277 } 5278 /* recvs and sends of a-array are completed */ 5279 i = nrecvs; 5280 while (i--) { 5281 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5282 } 5283 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5284 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5285 5286 if (scall == MAT_INITIAL_MATRIX){ 5287 /* put together the new matrix */ 5288 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5289 5290 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5291 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5292 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5293 b_oth->free_a = PETSC_TRUE; 5294 b_oth->free_ij = PETSC_TRUE; 5295 b_oth->nonew = 0; 5296 5297 ierr = PetscFree(bufj);CHKERRQ(ierr); 5298 if (!startsj || !bufa_ptr){ 5299 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5300 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5301 } else { 5302 *startsj = sstartsj; 5303 *startsj_r = rstartsj; 5304 *bufa_ptr = bufa; 5305 } 5306 } 5307 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5308 PetscFunctionReturn(0); 5309 } 5310 5311 #undef __FUNCT__ 5312 #define __FUNCT__ "MatGetCommunicationStructs" 5313 /*@C 5314 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5315 5316 Not Collective 5317 5318 Input Parameters: 5319 . A - The matrix in mpiaij format 5320 5321 Output Parameter: 5322 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5323 . colmap - A map from global column index to local index into lvec 5324 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5325 5326 Level: developer 5327 5328 @*/ 5329 #if defined (PETSC_USE_CTABLE) 5330 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5331 #else 5332 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5333 #endif 5334 { 5335 Mat_MPIAIJ *a; 5336 5337 PetscFunctionBegin; 5338 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5339 PetscValidPointer(lvec, 2); 5340 PetscValidPointer(colmap, 3); 5341 PetscValidPointer(multScatter, 4); 5342 a = (Mat_MPIAIJ *) A->data; 5343 if (lvec) *lvec = a->lvec; 5344 if (colmap) *colmap = a->colmap; 5345 if (multScatter) *multScatter = a->Mvctx; 5346 PetscFunctionReturn(0); 5347 } 5348 5349 EXTERN_C_BEGIN 5350 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5351 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5352 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5353 EXTERN_C_END 5354 5355 #undef __FUNCT__ 5356 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5357 /* 5358 Computes (B'*A')' since computing B*A directly is untenable 5359 5360 n p p 5361 ( ) ( ) ( ) 5362 m ( A ) * n ( B ) = m ( C ) 5363 ( ) ( ) ( ) 5364 5365 */ 5366 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5367 { 5368 PetscErrorCode ierr; 5369 Mat At,Bt,Ct; 5370 5371 PetscFunctionBegin; 5372 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5373 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5374 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5375 ierr = MatDestroy(&At);CHKERRQ(ierr); 5376 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5377 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5378 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5379 PetscFunctionReturn(0); 5380 } 5381 5382 #undef __FUNCT__ 5383 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5384 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5385 { 5386 PetscErrorCode ierr; 5387 PetscInt m=A->rmap->n,n=B->cmap->n; 5388 Mat Cmat; 5389 5390 PetscFunctionBegin; 5391 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5392 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5393 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5394 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5395 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5396 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5397 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5398 *C = Cmat; 5399 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5400 PetscFunctionReturn(0); 5401 } 5402 5403 /* ----------------------------------------------------------------*/ 5404 #undef __FUNCT__ 5405 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5406 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5407 { 5408 PetscErrorCode ierr; 5409 5410 PetscFunctionBegin; 5411 if (scall == MAT_INITIAL_MATRIX){ 5412 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5413 } 5414 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5415 PetscFunctionReturn(0); 5416 } 5417 5418 EXTERN_C_BEGIN 5419 #if defined(PETSC_HAVE_MUMPS) 5420 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5421 #endif 5422 #if defined(PETSC_HAVE_PASTIX) 5423 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5424 #endif 5425 #if defined(PETSC_HAVE_SUPERLU_DIST) 5426 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5427 #endif 5428 #if defined(PETSC_HAVE_SPOOLES) 5429 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5430 #endif 5431 EXTERN_C_END 5432 5433 /*MC 5434 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5435 5436 Options Database Keys: 5437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5438 5439 Level: beginner 5440 5441 .seealso: MatCreateMPIAIJ() 5442 M*/ 5443 5444 EXTERN_C_BEGIN 5445 #undef __FUNCT__ 5446 #define __FUNCT__ "MatCreate_MPIAIJ" 5447 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5448 { 5449 Mat_MPIAIJ *b; 5450 PetscErrorCode ierr; 5451 PetscMPIInt size; 5452 5453 PetscFunctionBegin; 5454 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5455 5456 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5457 B->data = (void*)b; 5458 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5459 B->rmap->bs = 1; 5460 B->assembled = PETSC_FALSE; 5461 5462 B->insertmode = NOT_SET_VALUES; 5463 b->size = size; 5464 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5465 5466 /* build cache for off array entries formed */ 5467 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5468 b->donotstash = PETSC_FALSE; 5469 b->colmap = 0; 5470 b->garray = 0; 5471 b->roworiented = PETSC_TRUE; 5472 5473 /* stuff used for matrix vector multiply */ 5474 b->lvec = PETSC_NULL; 5475 b->Mvctx = PETSC_NULL; 5476 5477 /* stuff for MatGetRow() */ 5478 b->rowindices = 0; 5479 b->rowvalues = 0; 5480 b->getrowactive = PETSC_FALSE; 5481 5482 #if defined(PETSC_HAVE_SPOOLES) 5483 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5484 "MatGetFactor_mpiaij_spooles", 5485 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5486 #endif 5487 #if defined(PETSC_HAVE_MUMPS) 5488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5489 "MatGetFactor_aij_mumps", 5490 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5491 #endif 5492 #if defined(PETSC_HAVE_PASTIX) 5493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5494 "MatGetFactor_mpiaij_pastix", 5495 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5496 #endif 5497 #if defined(PETSC_HAVE_SUPERLU_DIST) 5498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5499 "MatGetFactor_mpiaij_superlu_dist", 5500 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5501 #endif 5502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5503 "MatStoreValues_MPIAIJ", 5504 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5506 "MatRetrieveValues_MPIAIJ", 5507 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5509 "MatGetDiagonalBlock_MPIAIJ", 5510 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5512 "MatIsTranspose_MPIAIJ", 5513 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5515 "MatMPIAIJSetPreallocation_MPIAIJ", 5516 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5518 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5519 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5521 "MatDiagonalScaleLocal_MPIAIJ", 5522 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5524 "MatConvert_MPIAIJ_MPIAIJPERM", 5525 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5527 "MatConvert_MPIAIJ_MPIAIJCRL", 5528 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5530 "MatConvert_MPIAIJ_MPISBAIJ", 5531 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5533 "MatMatMult_MPIDense_MPIAIJ", 5534 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5536 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5537 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5539 "MatMatMultNumeric_MPIDense_MPIAIJ", 5540 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5541 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5542 PetscFunctionReturn(0); 5543 } 5544 EXTERN_C_END 5545 5546 #undef __FUNCT__ 5547 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5548 /*@ 5549 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5550 and "off-diagonal" part of the matrix in CSR format. 5551 5552 Collective on MPI_Comm 5553 5554 Input Parameters: 5555 + comm - MPI communicator 5556 . m - number of local rows (Cannot be PETSC_DECIDE) 5557 . n - This value should be the same as the local size used in creating the 5558 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5559 calculated if N is given) For square matrices n is almost always m. 5560 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5561 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5562 . i - row indices for "diagonal" portion of matrix 5563 . j - column indices 5564 . a - matrix values 5565 . oi - row indices for "off-diagonal" portion of matrix 5566 . oj - column indices 5567 - oa - matrix values 5568 5569 Output Parameter: 5570 . mat - the matrix 5571 5572 Level: advanced 5573 5574 Notes: 5575 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5576 must free the arrays once the matrix has been destroyed and not before. 5577 5578 The i and j indices are 0 based 5579 5580 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5581 5582 This sets local rows and cannot be used to set off-processor values. 5583 5584 You cannot later use MatSetValues() to change values in this matrix. 5585 5586 .keywords: matrix, aij, compressed row, sparse, parallel 5587 5588 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5589 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5590 @*/ 5591 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5592 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5593 { 5594 PetscErrorCode ierr; 5595 Mat_MPIAIJ *maij; 5596 5597 PetscFunctionBegin; 5598 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5599 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5600 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5601 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5602 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5603 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5604 maij = (Mat_MPIAIJ*) (*mat)->data; 5605 maij->donotstash = PETSC_TRUE; 5606 (*mat)->preallocated = PETSC_TRUE; 5607 5608 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5609 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5610 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5611 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5612 5613 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5614 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5615 5616 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5617 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5618 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5619 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5620 5621 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5622 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5623 PetscFunctionReturn(0); 5624 } 5625 5626 /* 5627 Special version for direct calls from Fortran 5628 */ 5629 #include <private/fortranimpl.h> 5630 5631 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5632 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5633 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5634 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5635 #endif 5636 5637 /* Change these macros so can be used in void function */ 5638 #undef CHKERRQ 5639 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5640 #undef SETERRQ2 5641 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5642 #undef SETERRQ 5643 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5644 5645 EXTERN_C_BEGIN 5646 #undef __FUNCT__ 5647 #define __FUNCT__ "matsetvaluesmpiaij_" 5648 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5649 { 5650 Mat mat = *mmat; 5651 PetscInt m = *mm, n = *mn; 5652 InsertMode addv = *maddv; 5653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5654 PetscScalar value; 5655 PetscErrorCode ierr; 5656 5657 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5658 if (mat->insertmode == NOT_SET_VALUES) { 5659 mat->insertmode = addv; 5660 } 5661 #if defined(PETSC_USE_DEBUG) 5662 else if (mat->insertmode != addv) { 5663 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5664 } 5665 #endif 5666 { 5667 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5668 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5669 PetscBool roworiented = aij->roworiented; 5670 5671 /* Some Variables required in the macro */ 5672 Mat A = aij->A; 5673 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5674 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5675 MatScalar *aa = a->a; 5676 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5677 Mat B = aij->B; 5678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5679 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5680 MatScalar *ba = b->a; 5681 5682 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5683 PetscInt nonew = a->nonew; 5684 MatScalar *ap1,*ap2; 5685 5686 PetscFunctionBegin; 5687 for (i=0; i<m; i++) { 5688 if (im[i] < 0) continue; 5689 #if defined(PETSC_USE_DEBUG) 5690 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5691 #endif 5692 if (im[i] >= rstart && im[i] < rend) { 5693 row = im[i] - rstart; 5694 lastcol1 = -1; 5695 rp1 = aj + ai[row]; 5696 ap1 = aa + ai[row]; 5697 rmax1 = aimax[row]; 5698 nrow1 = ailen[row]; 5699 low1 = 0; 5700 high1 = nrow1; 5701 lastcol2 = -1; 5702 rp2 = bj + bi[row]; 5703 ap2 = ba + bi[row]; 5704 rmax2 = bimax[row]; 5705 nrow2 = bilen[row]; 5706 low2 = 0; 5707 high2 = nrow2; 5708 5709 for (j=0; j<n; j++) { 5710 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5711 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5712 if (in[j] >= cstart && in[j] < cend){ 5713 col = in[j] - cstart; 5714 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5715 } else if (in[j] < 0) continue; 5716 #if defined(PETSC_USE_DEBUG) 5717 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5718 #endif 5719 else { 5720 if (mat->was_assembled) { 5721 if (!aij->colmap) { 5722 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5723 } 5724 #if defined (PETSC_USE_CTABLE) 5725 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5726 col--; 5727 #else 5728 col = aij->colmap[in[j]] - 1; 5729 #endif 5730 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5731 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5732 col = in[j]; 5733 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5734 B = aij->B; 5735 b = (Mat_SeqAIJ*)B->data; 5736 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5737 rp2 = bj + bi[row]; 5738 ap2 = ba + bi[row]; 5739 rmax2 = bimax[row]; 5740 nrow2 = bilen[row]; 5741 low2 = 0; 5742 high2 = nrow2; 5743 bm = aij->B->rmap->n; 5744 ba = b->a; 5745 } 5746 } else col = in[j]; 5747 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5748 } 5749 } 5750 } else { 5751 if (!aij->donotstash) { 5752 if (roworiented) { 5753 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5754 } else { 5755 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5756 } 5757 } 5758 } 5759 }} 5760 PetscFunctionReturnVoid(); 5761 } 5762 EXTERN_C_END 5763 5764