1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 nonew = a->nonew; 506 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 507 } else if (in[j] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 510 #endif 511 else { 512 if (mat->was_assembled) { 513 if (!aij->colmap) { 514 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 515 } 516 #if defined (PETSC_USE_CTABLE) 517 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 518 col--; 519 #else 520 col = aij->colmap[in[j]] - 1; 521 #endif 522 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 col = in[j]; 525 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 526 B = aij->B; 527 b = (Mat_SeqAIJ*)B->data; 528 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 529 rp2 = bj + bi[row]; 530 ap2 = ba + bi[row]; 531 rmax2 = bimax[row]; 532 nrow2 = bilen[row]; 533 low2 = 0; 534 high2 = nrow2; 535 bm = aij->B->rmap->n; 536 ba = b->a; 537 } 538 } else col = in[j]; 539 nonew = b->nonew; 540 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 541 } 542 } 543 } else { 544 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 545 if (!aij->donotstash) { 546 mat->assembled = PETSC_FALSE; 547 if (roworiented) { 548 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatGetValues_MPIAIJ" 560 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 561 { 562 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 563 PetscErrorCode ierr; 564 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 565 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 566 567 PetscFunctionBegin; 568 for (i=0; i<m; i++) { 569 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 570 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 571 if (idxm[i] >= rstart && idxm[i] < rend) { 572 row = idxm[i] - rstart; 573 for (j=0; j<n; j++) { 574 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 575 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 576 if (idxn[j] >= cstart && idxn[j] < cend){ 577 col = idxn[j] - cstart; 578 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 579 } else { 580 if (!aij->colmap) { 581 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 582 } 583 #if defined (PETSC_USE_CTABLE) 584 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 585 col --; 586 #else 587 col = aij->colmap[idxn[j]] - 1; 588 #endif 589 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 590 else { 591 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 592 } 593 } 594 } 595 } else { 596 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 597 } 598 } 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 606 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 607 { 608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 609 PetscErrorCode ierr; 610 PetscInt nstash,reallocs; 611 InsertMode addv; 612 613 PetscFunctionBegin; 614 if (aij->donotstash || mat->nooffprocentries) { 615 PetscFunctionReturn(0); 616 } 617 618 /* make sure all processors are either in INSERTMODE or ADDMODE */ 619 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 620 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 621 mat->insertmode = addv; /* in case this processor had no cache */ 622 623 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 624 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 625 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 631 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 632 { 633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 634 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 635 PetscErrorCode ierr; 636 PetscMPIInt n; 637 PetscInt i,j,rstart,ncols,flg; 638 PetscInt *row,*col; 639 PetscBool other_disassembled; 640 PetscScalar *val; 641 InsertMode addv = mat->insertmode; 642 643 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 644 PetscFunctionBegin; 645 if (!aij->donotstash && !mat->nooffprocentries) { 646 while (1) { 647 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 648 if (!flg) break; 649 650 for (i=0; i<n;) { 651 /* Now identify the consecutive vals belonging to the same row */ 652 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 653 if (j < n) ncols = j-i; 654 else ncols = n-i; 655 /* Now assemble all these values with a single function call */ 656 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 657 i = j; 658 } 659 } 660 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 661 } 662 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 663 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 664 665 /* determine if any processor has disassembled, if so we must 666 also disassemble ourselfs, in order that we may reassemble. */ 667 /* 668 if nonzero structure of submatrix B cannot change then we know that 669 no processor disassembled thus we can skip this stuff 670 */ 671 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 672 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 673 if (mat->was_assembled && !other_disassembled) { 674 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 675 } 676 } 677 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 678 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 679 } 680 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 681 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 682 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 683 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 684 685 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 686 aij->rowvalues = 0; 687 688 /* used by MatAXPY() */ 689 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 690 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 691 692 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 693 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 699 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 700 { 701 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 702 PetscErrorCode ierr; 703 704 PetscFunctionBegin; 705 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 706 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatZeroRows_MPIAIJ" 712 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 713 { 714 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 717 PetscInt i,*owners = A->rmap->range; 718 PetscInt *nprocs,j,idx,nsends,row; 719 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 720 PetscInt *rvalues,count,base,slen,*source; 721 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 722 MPI_Comm comm = ((PetscObject)A)->comm; 723 MPI_Request *send_waits,*recv_waits; 724 MPI_Status recv_status,*send_status; 725 const PetscScalar *xx; 726 PetscScalar *bb; 727 #if defined(PETSC_DEBUG) 728 PetscBool found = PETSC_FALSE; 729 #endif 730 731 PetscFunctionBegin; 732 /* first count number of contributors to each processor */ 733 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 734 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 735 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 736 j = 0; 737 for (i=0; i<N; i++) { 738 if (lastidx > (idx = rows[i])) j = 0; 739 lastidx = idx; 740 for (; j<size; j++) { 741 if (idx >= owners[j] && idx < owners[j+1]) { 742 nprocs[2*j]++; 743 nprocs[2*j+1] = 1; 744 owner[i] = j; 745 #if defined(PETSC_DEBUG) 746 found = PETSC_TRUE; 747 #endif 748 break; 749 } 750 } 751 #if defined(PETSC_DEBUG) 752 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 753 found = PETSC_FALSE; 754 #endif 755 } 756 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 757 758 if (A->nooffproczerorows) { 759 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 760 nrecvs = nsends; 761 nmax = N; 762 } else { 763 /* inform other processors of number of messages and max length*/ 764 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 765 } 766 767 /* post receives: */ 768 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 769 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 770 for (i=0; i<nrecvs; i++) { 771 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 772 } 773 774 /* do sends: 775 1) starts[i] gives the starting index in svalues for stuff going to 776 the ith processor 777 */ 778 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 779 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 780 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 781 starts[0] = 0; 782 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 783 for (i=0; i<N; i++) { 784 svalues[starts[owner[i]]++] = rows[i]; 785 } 786 787 starts[0] = 0; 788 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 789 count = 0; 790 for (i=0; i<size; i++) { 791 if (nprocs[2*i+1]) { 792 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 793 } 794 } 795 ierr = PetscFree(starts);CHKERRQ(ierr); 796 797 base = owners[rank]; 798 799 /* wait on receives */ 800 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 801 count = nrecvs; slen = 0; 802 while (count) { 803 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 804 /* unpack receives into our local space */ 805 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 806 source[imdex] = recv_status.MPI_SOURCE; 807 lens[imdex] = n; 808 slen += n; 809 count--; 810 } 811 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 812 813 /* move the data into the send scatter */ 814 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 815 count = 0; 816 for (i=0; i<nrecvs; i++) { 817 values = rvalues + i*nmax; 818 for (j=0; j<lens[i]; j++) { 819 lrows[count++] = values[j] - base; 820 } 821 } 822 ierr = PetscFree(rvalues);CHKERRQ(ierr); 823 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 824 ierr = PetscFree(owner);CHKERRQ(ierr); 825 ierr = PetscFree(nprocs);CHKERRQ(ierr); 826 827 /* fix right hand side if needed */ 828 if (x && b) { 829 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 830 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 831 for (i=0; i<slen; i++) { 832 bb[lrows[i]] = diag*xx[lrows[i]]; 833 } 834 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 835 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 836 } 837 /* 838 Zero the required rows. If the "diagonal block" of the matrix 839 is square and the user wishes to set the diagonal we use separate 840 code so that MatSetValues() is not called for each diagonal allocating 841 new memory, thus calling lots of mallocs and slowing things down. 842 843 */ 844 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 845 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 846 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 847 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 848 } else if (diag != 0.0) { 849 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 850 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 851 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 852 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 853 } 854 for (i = 0; i < slen; i++) { 855 row = lrows[i] + rstart; 856 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 857 } 858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 } else { 861 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(lrows);CHKERRQ(ierr); 864 865 /* wait on sends */ 866 if (nsends) { 867 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 868 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 869 ierr = PetscFree(send_status);CHKERRQ(ierr); 870 } 871 ierr = PetscFree(send_waits);CHKERRQ(ierr); 872 ierr = PetscFree(svalues);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 878 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 879 { 880 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 881 PetscErrorCode ierr; 882 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 883 PetscInt i,*owners = A->rmap->range; 884 PetscInt *nprocs,j,idx,nsends; 885 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 886 PetscInt *rvalues,count,base,slen,*source; 887 PetscInt *lens,*lrows,*values,m; 888 MPI_Comm comm = ((PetscObject)A)->comm; 889 MPI_Request *send_waits,*recv_waits; 890 MPI_Status recv_status,*send_status; 891 const PetscScalar *xx; 892 PetscScalar *bb,*mask; 893 Vec xmask,lmask; 894 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 895 const PetscInt *aj, *ii,*ridx; 896 PetscScalar *aa; 897 #if defined(PETSC_DEBUG) 898 PetscBool found = PETSC_FALSE; 899 #endif 900 901 PetscFunctionBegin; 902 /* first count number of contributors to each processor */ 903 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 904 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 905 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 906 j = 0; 907 for (i=0; i<N; i++) { 908 if (lastidx > (idx = rows[i])) j = 0; 909 lastidx = idx; 910 for (; j<size; j++) { 911 if (idx >= owners[j] && idx < owners[j+1]) { 912 nprocs[2*j]++; 913 nprocs[2*j+1] = 1; 914 owner[i] = j; 915 #if defined(PETSC_DEBUG) 916 found = PETSC_TRUE; 917 #endif 918 break; 919 } 920 } 921 #if defined(PETSC_DEBUG) 922 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 923 found = PETSC_FALSE; 924 #endif 925 } 926 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 927 928 /* inform other processors of number of messages and max length*/ 929 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 930 931 /* post receives: */ 932 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 933 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 934 for (i=0; i<nrecvs; i++) { 935 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 936 } 937 938 /* do sends: 939 1) starts[i] gives the starting index in svalues for stuff going to 940 the ith processor 941 */ 942 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 943 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 944 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 945 starts[0] = 0; 946 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 947 for (i=0; i<N; i++) { 948 svalues[starts[owner[i]]++] = rows[i]; 949 } 950 951 starts[0] = 0; 952 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 953 count = 0; 954 for (i=0; i<size; i++) { 955 if (nprocs[2*i+1]) { 956 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 957 } 958 } 959 ierr = PetscFree(starts);CHKERRQ(ierr); 960 961 base = owners[rank]; 962 963 /* wait on receives */ 964 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 965 count = nrecvs; slen = 0; 966 while (count) { 967 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 968 /* unpack receives into our local space */ 969 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 970 source[imdex] = recv_status.MPI_SOURCE; 971 lens[imdex] = n; 972 slen += n; 973 count--; 974 } 975 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 976 977 /* move the data into the send scatter */ 978 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 979 count = 0; 980 for (i=0; i<nrecvs; i++) { 981 values = rvalues + i*nmax; 982 for (j=0; j<lens[i]; j++) { 983 lrows[count++] = values[j] - base; 984 } 985 } 986 ierr = PetscFree(rvalues);CHKERRQ(ierr); 987 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 988 ierr = PetscFree(owner);CHKERRQ(ierr); 989 ierr = PetscFree(nprocs);CHKERRQ(ierr); 990 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 991 992 /* zero diagonal part of matrix */ 993 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 994 995 /* handle off diagonal part of matrix */ 996 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 997 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 998 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 999 for (i=0; i<slen; i++) { 1000 bb[lrows[i]] = 1; 1001 } 1002 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1006 if (x) { 1007 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1010 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1011 } 1012 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1013 1014 /* remove zeroed rows of off diagonal matrix */ 1015 ii = aij->i; 1016 for (i=0; i<slen; i++) { 1017 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1018 } 1019 1020 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1021 if (aij->compressedrow.use){ 1022 m = aij->compressedrow.nrows; 1023 ii = aij->compressedrow.i; 1024 ridx = aij->compressedrow.rindex; 1025 for (i=0; i<m; i++){ 1026 n = ii[i+1] - ii[i]; 1027 aj = aij->j + ii[i]; 1028 aa = aij->a + ii[i]; 1029 1030 for (j=0; j<n; j++) { 1031 if (PetscAbsScalar(mask[*aj])) { 1032 if (b) bb[*ridx] -= *aa*xx[*aj]; 1033 *aa = 0.0; 1034 } 1035 aa++; 1036 aj++; 1037 } 1038 ridx++; 1039 } 1040 } else { /* do not use compressed row format */ 1041 m = l->B->rmap->n; 1042 for (i=0; i<m; i++) { 1043 n = ii[i+1] - ii[i]; 1044 aj = aij->j + ii[i]; 1045 aa = aij->a + ii[i]; 1046 for (j=0; j<n; j++) { 1047 if (PetscAbsScalar(mask[*aj])) { 1048 if (b) bb[i] -= *aa*xx[*aj]; 1049 *aa = 0.0; 1050 } 1051 aa++; 1052 aj++; 1053 } 1054 } 1055 } 1056 if (x) { 1057 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1058 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1059 } 1060 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1061 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1062 ierr = PetscFree(lrows);CHKERRQ(ierr); 1063 1064 /* wait on sends */ 1065 if (nsends) { 1066 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1067 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1068 ierr = PetscFree(send_status);CHKERRQ(ierr); 1069 } 1070 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1071 ierr = PetscFree(svalues);CHKERRQ(ierr); 1072 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatMult_MPIAIJ" 1078 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 PetscInt nt; 1083 1084 PetscFunctionBegin; 1085 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1086 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1087 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1096 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1108 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1123 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 PetscBool merged; 1128 1129 PetscFunctionBegin; 1130 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1131 /* do nondiagonal part */ 1132 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1133 if (!merged) { 1134 /* send it on its way */ 1135 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 /* do local part */ 1137 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1138 /* receive remote parts: note this assumes the values are not actually */ 1139 /* added in yy until the next line, */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } else { 1142 /* do local part */ 1143 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1144 /* send it on its way */ 1145 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1146 /* values actually were received in the Begin() but we need to call this nop */ 1147 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 } 1149 PetscFunctionReturn(0); 1150 } 1151 1152 EXTERN_C_BEGIN 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1155 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1156 { 1157 MPI_Comm comm; 1158 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1159 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1160 IS Me,Notme; 1161 PetscErrorCode ierr; 1162 PetscInt M,N,first,last,*notme,i; 1163 PetscMPIInt size; 1164 1165 PetscFunctionBegin; 1166 1167 /* Easy test: symmetric diagonal block */ 1168 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1169 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1170 if (!*f) PetscFunctionReturn(0); 1171 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1173 if (size == 1) PetscFunctionReturn(0); 1174 1175 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1176 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1177 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1178 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1179 for (i=0; i<first; i++) notme[i] = i; 1180 for (i=last; i<M; i++) notme[i-last+first] = i; 1181 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1182 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1183 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1184 Aoff = Aoffs[0]; 1185 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1186 Boff = Boffs[0]; 1187 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1188 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1189 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1190 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1191 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1192 ierr = PetscFree(notme);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 EXTERN_C_END 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1199 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 /* do nondiagonal part */ 1206 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1207 /* send it on its way */ 1208 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1209 /* do local part */ 1210 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1211 /* receive remote parts */ 1212 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* 1217 This only works correctly for square matrices where the subblock A->A is the 1218 diagonal block 1219 */ 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1222 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1223 { 1224 PetscErrorCode ierr; 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 1227 PetscFunctionBegin; 1228 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1229 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1230 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatScale_MPIAIJ" 1236 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1237 { 1238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1243 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatDestroy_MPIAIJ" 1249 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1250 { 1251 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 #if defined(PETSC_USE_LOG) 1256 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1257 #endif 1258 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1259 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1260 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1262 #if defined (PETSC_USE_CTABLE) 1263 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1264 #else 1265 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1266 #endif 1267 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1268 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1269 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1270 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1271 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1272 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1273 1274 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1288 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1289 { 1290 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1291 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1292 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1293 PetscErrorCode ierr; 1294 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1295 int fd; 1296 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1297 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1298 PetscScalar *column_values; 1299 PetscInt message_count,flowcontrolcount; 1300 1301 PetscFunctionBegin; 1302 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1303 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1304 nz = A->nz + B->nz; 1305 if (!rank) { 1306 header[0] = MAT_FILE_CLASSID; 1307 header[1] = mat->rmap->N; 1308 header[2] = mat->cmap->N; 1309 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1310 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1311 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1312 /* get largest number of rows any processor has */ 1313 rlen = mat->rmap->n; 1314 range = mat->rmap->range; 1315 for (i=1; i<size; i++) { 1316 rlen = PetscMax(rlen,range[i+1] - range[i]); 1317 } 1318 } else { 1319 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1320 rlen = mat->rmap->n; 1321 } 1322 1323 /* load up the local row counts */ 1324 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1325 for (i=0; i<mat->rmap->n; i++) { 1326 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1327 } 1328 1329 /* store the row lengths to the file */ 1330 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1331 if (!rank) { 1332 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1333 for (i=1; i<size; i++) { 1334 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1335 rlen = range[i+1] - range[i]; 1336 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1337 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1338 } 1339 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1342 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1343 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1344 } 1345 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1346 1347 /* load up the local column indices */ 1348 nzmax = nz; /* )th processor needs space a largest processor needs */ 1349 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1350 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1351 cnt = 0; 1352 for (i=0; i<mat->rmap->n; i++) { 1353 for (j=B->i[i]; j<B->i[i+1]; j++) { 1354 if ( (col = garray[B->j[j]]) > cstart) break; 1355 column_indices[cnt++] = col; 1356 } 1357 for (k=A->i[i]; k<A->i[i+1]; k++) { 1358 column_indices[cnt++] = A->j[k] + cstart; 1359 } 1360 for (; j<B->i[i+1]; j++) { 1361 column_indices[cnt++] = garray[B->j[j]]; 1362 } 1363 } 1364 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1365 1366 /* store the column indices to the file */ 1367 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1368 if (!rank) { 1369 MPI_Status status; 1370 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1371 for (i=1; i<size; i++) { 1372 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1373 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1374 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1375 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1377 } 1378 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1379 } else { 1380 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1381 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1382 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1384 } 1385 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1386 1387 /* load up the local column values */ 1388 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1389 cnt = 0; 1390 for (i=0; i<mat->rmap->n; i++) { 1391 for (j=B->i[i]; j<B->i[i+1]; j++) { 1392 if ( garray[B->j[j]] > cstart) break; 1393 column_values[cnt++] = B->a[j]; 1394 } 1395 for (k=A->i[i]; k<A->i[i+1]; k++) { 1396 column_values[cnt++] = A->a[k]; 1397 } 1398 for (; j<B->i[i+1]; j++) { 1399 column_values[cnt++] = B->a[j]; 1400 } 1401 } 1402 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1403 1404 /* store the column values to the file */ 1405 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1406 if (!rank) { 1407 MPI_Status status; 1408 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1409 for (i=1; i<size; i++) { 1410 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1411 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1412 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1413 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1415 } 1416 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1419 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1420 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1421 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1422 } 1423 ierr = PetscFree(column_values);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1429 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1430 { 1431 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1432 PetscErrorCode ierr; 1433 PetscMPIInt rank = aij->rank,size = aij->size; 1434 PetscBool isdraw,iascii,isbinary; 1435 PetscViewer sviewer; 1436 PetscViewerFormat format; 1437 1438 PetscFunctionBegin; 1439 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1440 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1441 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1442 if (iascii) { 1443 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1444 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1445 MatInfo info; 1446 PetscBool inodes; 1447 1448 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1449 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1450 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1451 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1452 if (!inodes) { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } else { 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1457 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1458 } 1459 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1461 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1466 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1469 PetscInt inodecount,inodelimit,*inodes; 1470 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1471 if (inodes) { 1472 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1473 } else { 1474 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1475 } 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1478 PetscFunctionReturn(0); 1479 } 1480 } else if (isbinary) { 1481 if (size == 1) { 1482 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1483 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1484 } else { 1485 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1486 } 1487 PetscFunctionReturn(0); 1488 } else if (isdraw) { 1489 PetscDraw draw; 1490 PetscBool isnull; 1491 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1492 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1493 } 1494 1495 if (size == 1) { 1496 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1498 } else { 1499 /* assemble the entire matrix onto first processor. */ 1500 Mat A; 1501 Mat_SeqAIJ *Aloc; 1502 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1503 MatScalar *a; 1504 1505 if (mat->rmap->N > 1024) { 1506 PetscBool flg = PETSC_FALSE; 1507 1508 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1509 if (!flg) { 1510 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1511 } 1512 } 1513 1514 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1515 if (!rank) { 1516 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1519 } 1520 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1521 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1522 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1523 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1524 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1525 1526 /* copy over the A part */ 1527 Aloc = (Mat_SeqAIJ*)aij->A->data; 1528 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1529 row = mat->rmap->rstart; 1530 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1531 for (i=0; i<m; i++) { 1532 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1533 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1534 } 1535 aj = Aloc->j; 1536 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1537 1538 /* copy over the B part */ 1539 Aloc = (Mat_SeqAIJ*)aij->B->data; 1540 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 row = mat->rmap->rstart; 1542 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1543 ct = cols; 1544 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1545 for (i=0; i<m; i++) { 1546 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1547 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1548 } 1549 ierr = PetscFree(ct);CHKERRQ(ierr); 1550 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1551 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 /* 1553 Everyone has to call to draw the matrix since the graphics waits are 1554 synchronized across all processors that share the PetscDraw object 1555 */ 1556 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1557 if (!rank) { 1558 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1559 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1560 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1561 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1562 } 1563 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1564 ierr = MatDestroy(&A);CHKERRQ(ierr); 1565 } 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatView_MPIAIJ" 1571 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1572 { 1573 PetscErrorCode ierr; 1574 PetscBool iascii,isdraw,issocket,isbinary; 1575 1576 PetscFunctionBegin; 1577 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1578 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1579 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1580 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1581 if (iascii || isdraw || isbinary || issocket) { 1582 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1583 } else { 1584 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSOR_MPIAIJ" 1591 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1592 { 1593 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1594 PetscErrorCode ierr; 1595 Vec bb1 = 0; 1596 PetscBool hasop; 1597 1598 PetscFunctionBegin; 1599 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1600 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1601 } 1602 1603 if (flag == SOR_APPLY_UPPER) { 1604 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1609 if (flag & SOR_ZERO_INITIAL_GUESS) { 1610 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1611 its--; 1612 } 1613 1614 while (its--) { 1615 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1616 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 1618 /* update rhs: bb1 = bb - B*x */ 1619 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1620 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1621 1622 /* local sweep */ 1623 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1624 } 1625 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1626 if (flag & SOR_ZERO_INITIAL_GUESS) { 1627 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1628 its--; 1629 } 1630 while (its--) { 1631 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 1634 /* update rhs: bb1 = bb - B*x */ 1635 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1636 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1637 1638 /* local sweep */ 1639 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1640 } 1641 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1642 if (flag & SOR_ZERO_INITIAL_GUESS) { 1643 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1644 its--; 1645 } 1646 while (its--) { 1647 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 1650 /* update rhs: bb1 = bb - B*x */ 1651 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1652 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1653 1654 /* local sweep */ 1655 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1656 } 1657 } else if (flag & SOR_EISENSTAT) { 1658 Vec xx1; 1659 1660 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1661 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1662 1663 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 if (!mat->diag) { 1666 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1667 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1668 } 1669 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1670 if (hasop) { 1671 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1672 } else { 1673 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1674 } 1675 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1676 1677 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1678 1679 /* local sweep */ 1680 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1681 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1682 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1683 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1684 1685 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatPermute_MPIAIJ" 1691 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1692 { 1693 MPI_Comm comm; 1694 PetscInt first,local_rowsize,local_colsize; 1695 const PetscInt *rows; 1696 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1697 PetscErrorCode ierr; 1698 1699 PetscFunctionBegin; 1700 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1701 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1702 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1703 /* collect the global row permutation and invert it */ 1704 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1705 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1706 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1707 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1709 /* get the local target indices */ 1710 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1711 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1712 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1713 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1714 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1716 /* the column permutation is so much easier; 1717 make a local version of 'colp' and invert it */ 1718 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1719 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1720 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1721 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1722 /* now we just get the submatrix */ 1723 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1724 /* clean up */ 1725 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1726 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1732 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1733 { 1734 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1735 Mat A = mat->A,B = mat->B; 1736 PetscErrorCode ierr; 1737 PetscReal isend[5],irecv[5]; 1738 1739 PetscFunctionBegin; 1740 info->block_size = 1.0; 1741 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1742 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1743 isend[3] = info->memory; isend[4] = info->mallocs; 1744 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1745 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1746 isend[3] += info->memory; isend[4] += info->mallocs; 1747 if (flag == MAT_LOCAL) { 1748 info->nz_used = isend[0]; 1749 info->nz_allocated = isend[1]; 1750 info->nz_unneeded = isend[2]; 1751 info->memory = isend[3]; 1752 info->mallocs = isend[4]; 1753 } else if (flag == MAT_GLOBAL_MAX) { 1754 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1755 info->nz_used = irecv[0]; 1756 info->nz_allocated = irecv[1]; 1757 info->nz_unneeded = irecv[2]; 1758 info->memory = irecv[3]; 1759 info->mallocs = irecv[4]; 1760 } else if (flag == MAT_GLOBAL_SUM) { 1761 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1762 info->nz_used = irecv[0]; 1763 info->nz_allocated = irecv[1]; 1764 info->nz_unneeded = irecv[2]; 1765 info->memory = irecv[3]; 1766 info->mallocs = irecv[4]; 1767 } 1768 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1769 info->fill_ratio_needed = 0; 1770 info->factor_mallocs = 0; 1771 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "MatSetOption_MPIAIJ" 1777 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1778 { 1779 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1780 PetscErrorCode ierr; 1781 1782 PetscFunctionBegin; 1783 switch (op) { 1784 case MAT_NEW_NONZERO_LOCATIONS: 1785 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1786 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1787 case MAT_KEEP_NONZERO_PATTERN: 1788 case MAT_NEW_NONZERO_LOCATION_ERR: 1789 case MAT_USE_INODES: 1790 case MAT_IGNORE_ZERO_ENTRIES: 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1793 break; 1794 case MAT_ROW_ORIENTED: 1795 a->roworiented = flg; 1796 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1797 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1798 break; 1799 case MAT_NEW_DIAGONALS: 1800 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1801 break; 1802 case MAT_IGNORE_OFF_PROC_ENTRIES: 1803 a->donotstash = flg; 1804 break; 1805 case MAT_SPD: 1806 A->spd_set = PETSC_TRUE; 1807 A->spd = flg; 1808 if (flg) { 1809 A->symmetric = PETSC_TRUE; 1810 A->structurally_symmetric = PETSC_TRUE; 1811 A->symmetric_set = PETSC_TRUE; 1812 A->structurally_symmetric_set = PETSC_TRUE; 1813 } 1814 break; 1815 case MAT_SYMMETRIC: 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 break; 1818 case MAT_STRUCTURALLY_SYMMETRIC: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_HERMITIAN: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 case MAT_SYMMETRY_ETERNAL: 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 break; 1827 default: 1828 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1829 } 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatGetRow_MPIAIJ" 1835 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1836 { 1837 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1838 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1839 PetscErrorCode ierr; 1840 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1841 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1842 PetscInt *cmap,*idx_p; 1843 1844 PetscFunctionBegin; 1845 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1846 mat->getrowactive = PETSC_TRUE; 1847 1848 if (!mat->rowvalues && (idx || v)) { 1849 /* 1850 allocate enough space to hold information from the longest row. 1851 */ 1852 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1853 PetscInt max = 1,tmp; 1854 for (i=0; i<matin->rmap->n; i++) { 1855 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1856 if (max < tmp) { max = tmp; } 1857 } 1858 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1859 } 1860 1861 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1862 lrow = row - rstart; 1863 1864 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1865 if (!v) {pvA = 0; pvB = 0;} 1866 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1867 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1868 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1869 nztot = nzA + nzB; 1870 1871 cmap = mat->garray; 1872 if (v || idx) { 1873 if (nztot) { 1874 /* Sort by increasing column numbers, assuming A and B already sorted */ 1875 PetscInt imark = -1; 1876 if (v) { 1877 *v = v_p = mat->rowvalues; 1878 for (i=0; i<nzB; i++) { 1879 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1880 else break; 1881 } 1882 imark = i; 1883 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1884 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1885 } 1886 if (idx) { 1887 *idx = idx_p = mat->rowindices; 1888 if (imark > -1) { 1889 for (i=0; i<imark; i++) { 1890 idx_p[i] = cmap[cworkB[i]]; 1891 } 1892 } else { 1893 for (i=0; i<nzB; i++) { 1894 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1895 else break; 1896 } 1897 imark = i; 1898 } 1899 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1900 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1901 } 1902 } else { 1903 if (idx) *idx = 0; 1904 if (v) *v = 0; 1905 } 1906 } 1907 *nz = nztot; 1908 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1909 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1916 { 1917 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1918 1919 PetscFunctionBegin; 1920 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1921 aij->getrowactive = PETSC_FALSE; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatNorm_MPIAIJ" 1927 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1928 { 1929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1930 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1931 PetscErrorCode ierr; 1932 PetscInt i,j,cstart = mat->cmap->rstart; 1933 PetscReal sum = 0.0; 1934 MatScalar *v; 1935 1936 PetscFunctionBegin; 1937 if (aij->size == 1) { 1938 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1939 } else { 1940 if (type == NORM_FROBENIUS) { 1941 v = amat->a; 1942 for (i=0; i<amat->nz; i++) { 1943 #if defined(PETSC_USE_COMPLEX) 1944 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1945 #else 1946 sum += (*v)*(*v); v++; 1947 #endif 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 #if defined(PETSC_USE_COMPLEX) 1952 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1953 #else 1954 sum += (*v)*(*v); v++; 1955 #endif 1956 } 1957 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1958 *norm = PetscSqrtReal(*norm); 1959 } else if (type == NORM_1) { /* max column norm */ 1960 PetscReal *tmp,*tmp2; 1961 PetscInt *jj,*garray = aij->garray; 1962 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1963 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1964 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1965 *norm = 0.0; 1966 v = amat->a; jj = amat->j; 1967 for (j=0; j<amat->nz; j++) { 1968 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1969 } 1970 v = bmat->a; jj = bmat->j; 1971 for (j=0; j<bmat->nz; j++) { 1972 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1973 } 1974 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1975 for (j=0; j<mat->cmap->N; j++) { 1976 if (tmp2[j] > *norm) *norm = tmp2[j]; 1977 } 1978 ierr = PetscFree(tmp);CHKERRQ(ierr); 1979 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1980 } else if (type == NORM_INFINITY) { /* max row norm */ 1981 PetscReal ntemp = 0.0; 1982 for (j=0; j<aij->A->rmap->n; j++) { 1983 v = amat->a + amat->i[j]; 1984 sum = 0.0; 1985 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1986 sum += PetscAbsScalar(*v); v++; 1987 } 1988 v = bmat->a + bmat->i[j]; 1989 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1990 sum += PetscAbsScalar(*v); v++; 1991 } 1992 if (sum > ntemp) ntemp = sum; 1993 } 1994 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1995 } else { 1996 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1997 } 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatTranspose_MPIAIJ" 2004 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2005 { 2006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2007 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2008 PetscErrorCode ierr; 2009 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2010 PetscInt cstart=A->cmap->rstart,ncol; 2011 Mat B; 2012 MatScalar *array; 2013 2014 PetscFunctionBegin; 2015 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2016 2017 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2018 ai = Aloc->i; aj = Aloc->j; 2019 bi = Bloc->i; bj = Bloc->j; 2020 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2021 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2022 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2023 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2024 for (i=0; i<ai[ma]; i++){ 2025 d_nnz[aj[i]] ++; 2026 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2027 } 2028 2029 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2030 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2031 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2032 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2033 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2034 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2035 } else { 2036 B = *matout; 2037 } 2038 2039 /* copy over the A part */ 2040 array = Aloc->a; 2041 row = A->rmap->rstart; 2042 for (i=0; i<ma; i++) { 2043 ncol = ai[i+1]-ai[i]; 2044 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2045 row++; array += ncol; aj += ncol; 2046 } 2047 aj = Aloc->j; 2048 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2049 2050 /* copy over the B part */ 2051 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2052 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2053 array = Bloc->a; 2054 row = A->rmap->rstart; 2055 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2056 cols_tmp = cols; 2057 for (i=0; i<mb; i++) { 2058 ncol = bi[i+1]-bi[i]; 2059 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2060 row++; array += ncol; cols_tmp += ncol; 2061 } 2062 ierr = PetscFree(cols);CHKERRQ(ierr); 2063 2064 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2067 *matout = B; 2068 } else { 2069 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2076 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2077 { 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat a = aij->A,b = aij->B; 2080 PetscErrorCode ierr; 2081 PetscInt s1,s2,s3; 2082 2083 PetscFunctionBegin; 2084 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2085 if (rr) { 2086 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2087 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2088 /* Overlap communication with computation. */ 2089 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 } 2091 if (ll) { 2092 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2093 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2094 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2095 } 2096 /* scale the diagonal block */ 2097 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2098 2099 if (rr) { 2100 /* Do a scatter end and then right scale the off-diagonal block */ 2101 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2102 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2103 } 2104 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2110 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2111 { 2112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2117 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2118 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2119 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2124 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2125 { 2126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatEqual_MPIAIJ" 2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2137 { 2138 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2139 Mat a,b,c,d; 2140 PetscBool flg; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 a = matA->A; b = matA->B; 2145 c = matB->A; d = matB->B; 2146 2147 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2148 if (flg) { 2149 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2150 } 2151 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatCopy_MPIAIJ" 2157 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2158 { 2159 PetscErrorCode ierr; 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2162 2163 PetscFunctionBegin; 2164 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2165 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2166 /* because of the column compression in the off-processor part of the matrix a->B, 2167 the number of columns in a->B and b->B may be different, hence we cannot call 2168 the MatCopy() directly on the two parts. If need be, we can provide a more 2169 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2170 then copying the submatrices */ 2171 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2172 } else { 2173 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2174 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatSetUp_MPIAIJ" 2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2182 { 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatAXPY_MPIAIJ" 2192 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2193 { 2194 PetscErrorCode ierr; 2195 PetscInt i; 2196 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2197 PetscBLASInt bnz,one=1; 2198 Mat_SeqAIJ *x,*y; 2199 2200 PetscFunctionBegin; 2201 if (str == SAME_NONZERO_PATTERN) { 2202 PetscScalar alpha = a; 2203 x = (Mat_SeqAIJ *)xx->A->data; 2204 y = (Mat_SeqAIJ *)yy->A->data; 2205 bnz = PetscBLASIntCast(x->nz); 2206 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2207 x = (Mat_SeqAIJ *)xx->B->data; 2208 y = (Mat_SeqAIJ *)yy->B->data; 2209 bnz = PetscBLASIntCast(x->nz); 2210 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2211 } else if (str == SUBSET_NONZERO_PATTERN) { 2212 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2213 2214 x = (Mat_SeqAIJ *)xx->B->data; 2215 y = (Mat_SeqAIJ *)yy->B->data; 2216 if (y->xtoy && y->XtoY != xx->B) { 2217 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2218 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2219 } 2220 if (!y->xtoy) { /* get xtoy */ 2221 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2222 y->XtoY = xx->B; 2223 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2224 } 2225 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2226 } else { 2227 Mat B; 2228 PetscInt *nnz_d,*nnz_o; 2229 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2230 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2231 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2232 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2233 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2234 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2235 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2236 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2237 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2238 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2239 ierr = MatHeaderReplace(Y,B); 2240 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2241 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatConjugate_MPIAIJ" 2250 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2251 { 2252 #if defined(PETSC_USE_COMPLEX) 2253 PetscErrorCode ierr; 2254 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2255 2256 PetscFunctionBegin; 2257 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2258 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2259 #else 2260 PetscFunctionBegin; 2261 #endif 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatRealPart_MPIAIJ" 2267 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2268 { 2269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 2272 PetscFunctionBegin; 2273 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2274 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2280 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2287 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #ifdef PETSC_HAVE_PBGL 2292 2293 #include <boost/parallel/mpi/bsp_process_group.hpp> 2294 #include <boost/graph/distributed/ilu_default_graph.hpp> 2295 #include <boost/graph/distributed/ilu_0_block.hpp> 2296 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2297 #include <boost/graph/distributed/petsc/interface.hpp> 2298 #include <boost/multi_array.hpp> 2299 #include <boost/parallel/distributed_property_map->hpp> 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2303 /* 2304 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2305 */ 2306 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2307 { 2308 namespace petsc = boost::distributed::petsc; 2309 2310 namespace graph_dist = boost::graph::distributed; 2311 using boost::graph::distributed::ilu_default::process_group_type; 2312 using boost::graph::ilu_permuted; 2313 2314 PetscBool row_identity, col_identity; 2315 PetscContainer c; 2316 PetscInt m, n, M, N; 2317 PetscErrorCode ierr; 2318 2319 PetscFunctionBegin; 2320 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2321 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2322 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2323 if (!row_identity || !col_identity) { 2324 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2325 } 2326 2327 process_group_type pg; 2328 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2329 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2330 lgraph_type& level_graph = *lgraph_p; 2331 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2332 2333 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2334 ilu_permuted(level_graph); 2335 2336 /* put together the new matrix */ 2337 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2338 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2339 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2340 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2341 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2342 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 2345 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2346 ierr = PetscContainerSetPointer(c, lgraph_p); 2347 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2348 ierr = PetscContainerDestroy(&c); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2354 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2355 { 2356 PetscFunctionBegin; 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatSolve_MPIAIJ" 2362 /* 2363 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2364 */ 2365 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2366 { 2367 namespace graph_dist = boost::graph::distributed; 2368 2369 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2370 lgraph_type* lgraph_p; 2371 PetscContainer c; 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2376 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2377 ierr = VecCopy(b, x);CHKERRQ(ierr); 2378 2379 PetscScalar* array_x; 2380 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2381 PetscInt sx; 2382 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2383 2384 PetscScalar* array_b; 2385 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2386 PetscInt sb; 2387 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2388 2389 lgraph_type& level_graph = *lgraph_p; 2390 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2391 2392 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2393 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2394 ref_x(array_x, boost::extents[num_vertices(graph)]); 2395 2396 typedef boost::iterator_property_map<array_ref_type::iterator, 2397 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2398 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2399 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2400 2401 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2402 2403 PetscFunctionReturn(0); 2404 } 2405 #endif 2406 2407 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2408 PetscInt nzlocal,nsends,nrecvs; 2409 PetscMPIInt *send_rank,*recv_rank; 2410 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2411 PetscScalar *sbuf_a,**rbuf_a; 2412 PetscErrorCode (*Destroy)(Mat); 2413 } Mat_Redundant; 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2417 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2418 { 2419 PetscErrorCode ierr; 2420 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2421 PetscInt i; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2425 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2426 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2427 for (i=0; i<redund->nrecvs; i++){ 2428 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2429 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2430 } 2431 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2432 ierr = PetscFree(redund);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatDestroy_MatRedundant" 2438 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2439 { 2440 PetscErrorCode ierr; 2441 PetscContainer container; 2442 Mat_Redundant *redund=PETSC_NULL; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2446 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2447 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2448 A->ops->destroy = redund->Destroy; 2449 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2450 if (A->ops->destroy) { 2451 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2452 } 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2458 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2459 { 2460 PetscMPIInt rank,size; 2461 MPI_Comm comm=((PetscObject)mat)->comm; 2462 PetscErrorCode ierr; 2463 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2464 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2465 PetscInt *rowrange=mat->rmap->range; 2466 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2467 Mat A=aij->A,B=aij->B,C=*matredundant; 2468 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2469 PetscScalar *sbuf_a; 2470 PetscInt nzlocal=a->nz+b->nz; 2471 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2472 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2473 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2474 MatScalar *aworkA,*aworkB; 2475 PetscScalar *vals; 2476 PetscMPIInt tag1,tag2,tag3,imdex; 2477 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2478 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2479 MPI_Status recv_status,*send_status; 2480 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2481 PetscInt **rbuf_j=PETSC_NULL; 2482 PetscScalar **rbuf_a=PETSC_NULL; 2483 Mat_Redundant *redund=PETSC_NULL; 2484 PetscContainer container; 2485 2486 PetscFunctionBegin; 2487 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2488 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2489 2490 if (reuse == MAT_REUSE_MATRIX) { 2491 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2492 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2493 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2494 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2495 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2496 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2497 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2498 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2499 2500 nsends = redund->nsends; 2501 nrecvs = redund->nrecvs; 2502 send_rank = redund->send_rank; 2503 recv_rank = redund->recv_rank; 2504 sbuf_nz = redund->sbuf_nz; 2505 rbuf_nz = redund->rbuf_nz; 2506 sbuf_j = redund->sbuf_j; 2507 sbuf_a = redund->sbuf_a; 2508 rbuf_j = redund->rbuf_j; 2509 rbuf_a = redund->rbuf_a; 2510 } 2511 2512 if (reuse == MAT_INITIAL_MATRIX){ 2513 PetscMPIInt subrank,subsize; 2514 PetscInt nleftover,np_subcomm; 2515 /* get the destination processors' id send_rank, nsends and nrecvs */ 2516 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2517 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2518 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2519 np_subcomm = size/nsubcomm; 2520 nleftover = size - nsubcomm*np_subcomm; 2521 nsends = 0; nrecvs = 0; 2522 for (i=0; i<size; i++){ /* i=rank*/ 2523 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2524 send_rank[nsends] = i; nsends++; 2525 recv_rank[nrecvs++] = i; 2526 } 2527 } 2528 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2529 i = size-nleftover-1; 2530 j = 0; 2531 while (j < nsubcomm - nleftover){ 2532 send_rank[nsends++] = i; 2533 i--; j++; 2534 } 2535 } 2536 2537 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2538 for (i=0; i<nleftover; i++){ 2539 recv_rank[nrecvs++] = size-nleftover+i; 2540 } 2541 } 2542 2543 /* allocate sbuf_j, sbuf_a */ 2544 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2545 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2546 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2547 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2548 2549 /* copy mat's local entries into the buffers */ 2550 if (reuse == MAT_INITIAL_MATRIX){ 2551 rownz_max = 0; 2552 rptr = sbuf_j; 2553 cols = sbuf_j + rend-rstart + 1; 2554 vals = sbuf_a; 2555 rptr[0] = 0; 2556 for (i=0; i<rend-rstart; i++){ 2557 row = i + rstart; 2558 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2559 ncols = nzA + nzB; 2560 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2561 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2562 /* load the column indices for this row into cols */ 2563 lwrite = 0; 2564 for (l=0; l<nzB; l++) { 2565 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2566 vals[lwrite] = aworkB[l]; 2567 cols[lwrite++] = ctmp; 2568 } 2569 } 2570 for (l=0; l<nzA; l++){ 2571 vals[lwrite] = aworkA[l]; 2572 cols[lwrite++] = cstart + cworkA[l]; 2573 } 2574 for (l=0; l<nzB; l++) { 2575 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2576 vals[lwrite] = aworkB[l]; 2577 cols[lwrite++] = ctmp; 2578 } 2579 } 2580 vals += ncols; 2581 cols += ncols; 2582 rptr[i+1] = rptr[i] + ncols; 2583 if (rownz_max < ncols) rownz_max = ncols; 2584 } 2585 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2586 } else { /* only copy matrix values into sbuf_a */ 2587 rptr = sbuf_j; 2588 vals = sbuf_a; 2589 rptr[0] = 0; 2590 for (i=0; i<rend-rstart; i++){ 2591 row = i + rstart; 2592 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2593 ncols = nzA + nzB; 2594 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2595 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2596 lwrite = 0; 2597 for (l=0; l<nzB; l++) { 2598 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2599 } 2600 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2601 for (l=0; l<nzB; l++) { 2602 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2603 } 2604 vals += ncols; 2605 rptr[i+1] = rptr[i] + ncols; 2606 } 2607 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2608 2609 /* send nzlocal to others, and recv other's nzlocal */ 2610 /*--------------------------------------------------*/ 2611 if (reuse == MAT_INITIAL_MATRIX){ 2612 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2613 s_waits2 = s_waits3 + nsends; 2614 s_waits1 = s_waits2 + nsends; 2615 r_waits1 = s_waits1 + nsends; 2616 r_waits2 = r_waits1 + nrecvs; 2617 r_waits3 = r_waits2 + nrecvs; 2618 } else { 2619 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2620 r_waits3 = s_waits3 + nsends; 2621 } 2622 2623 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2624 if (reuse == MAT_INITIAL_MATRIX){ 2625 /* get new tags to keep the communication clean */ 2626 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2627 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2628 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2629 2630 /* post receives of other's nzlocal */ 2631 for (i=0; i<nrecvs; i++){ 2632 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2633 } 2634 /* send nzlocal to others */ 2635 for (i=0; i<nsends; i++){ 2636 sbuf_nz[i] = nzlocal; 2637 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2638 } 2639 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2640 count = nrecvs; 2641 while (count) { 2642 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2643 recv_rank[imdex] = recv_status.MPI_SOURCE; 2644 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2645 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2646 2647 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2648 rbuf_nz[imdex] += i + 2; 2649 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2650 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2651 count--; 2652 } 2653 /* wait on sends of nzlocal */ 2654 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2655 /* send mat->i,j to others, and recv from other's */ 2656 /*------------------------------------------------*/ 2657 for (i=0; i<nsends; i++){ 2658 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2659 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2660 } 2661 /* wait on receives of mat->i,j */ 2662 /*------------------------------*/ 2663 count = nrecvs; 2664 while (count) { 2665 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2666 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2667 count--; 2668 } 2669 /* wait on sends of mat->i,j */ 2670 /*---------------------------*/ 2671 if (nsends) { 2672 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2673 } 2674 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2675 2676 /* post receives, send and receive mat->a */ 2677 /*----------------------------------------*/ 2678 for (imdex=0; imdex<nrecvs; imdex++) { 2679 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2680 } 2681 for (i=0; i<nsends; i++){ 2682 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2683 } 2684 count = nrecvs; 2685 while (count) { 2686 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2687 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2688 count--; 2689 } 2690 if (nsends) { 2691 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2692 } 2693 2694 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2695 2696 /* create redundant matrix */ 2697 /*-------------------------*/ 2698 if (reuse == MAT_INITIAL_MATRIX){ 2699 /* compute rownz_max for preallocation */ 2700 for (imdex=0; imdex<nrecvs; imdex++){ 2701 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2702 rptr = rbuf_j[imdex]; 2703 for (i=0; i<j; i++){ 2704 ncols = rptr[i+1] - rptr[i]; 2705 if (rownz_max < ncols) rownz_max = ncols; 2706 } 2707 } 2708 2709 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2710 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2711 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2712 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2713 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2714 } else { 2715 C = *matredundant; 2716 } 2717 2718 /* insert local matrix entries */ 2719 rptr = sbuf_j; 2720 cols = sbuf_j + rend-rstart + 1; 2721 vals = sbuf_a; 2722 for (i=0; i<rend-rstart; i++){ 2723 row = i + rstart; 2724 ncols = rptr[i+1] - rptr[i]; 2725 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2726 vals += ncols; 2727 cols += ncols; 2728 } 2729 /* insert received matrix entries */ 2730 for (imdex=0; imdex<nrecvs; imdex++){ 2731 rstart = rowrange[recv_rank[imdex]]; 2732 rend = rowrange[recv_rank[imdex]+1]; 2733 rptr = rbuf_j[imdex]; 2734 cols = rbuf_j[imdex] + rend-rstart + 1; 2735 vals = rbuf_a[imdex]; 2736 for (i=0; i<rend-rstart; i++){ 2737 row = i + rstart; 2738 ncols = rptr[i+1] - rptr[i]; 2739 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2740 vals += ncols; 2741 cols += ncols; 2742 } 2743 } 2744 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2745 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2747 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2748 if (reuse == MAT_INITIAL_MATRIX) { 2749 PetscContainer container; 2750 *matredundant = C; 2751 /* create a supporting struct and attach it to C for reuse */ 2752 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2753 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2754 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2755 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2756 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2757 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2758 2759 redund->nzlocal = nzlocal; 2760 redund->nsends = nsends; 2761 redund->nrecvs = nrecvs; 2762 redund->send_rank = send_rank; 2763 redund->recv_rank = recv_rank; 2764 redund->sbuf_nz = sbuf_nz; 2765 redund->rbuf_nz = rbuf_nz; 2766 redund->sbuf_j = sbuf_j; 2767 redund->sbuf_a = sbuf_a; 2768 redund->rbuf_j = rbuf_j; 2769 redund->rbuf_a = rbuf_a; 2770 2771 redund->Destroy = C->ops->destroy; 2772 C->ops->destroy = MatDestroy_MatRedundant; 2773 } 2774 PetscFunctionReturn(0); 2775 } 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2779 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2780 { 2781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2782 PetscErrorCode ierr; 2783 PetscInt i,*idxb = 0; 2784 PetscScalar *va,*vb; 2785 Vec vtmp; 2786 2787 PetscFunctionBegin; 2788 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2789 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2790 if (idx) { 2791 for (i=0; i<A->rmap->n; i++) { 2792 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2793 } 2794 } 2795 2796 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2797 if (idx) { 2798 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2799 } 2800 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2801 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2802 2803 for (i=0; i<A->rmap->n; i++){ 2804 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2805 va[i] = vb[i]; 2806 if (idx) idx[i] = a->garray[idxb[i]]; 2807 } 2808 } 2809 2810 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2811 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2812 ierr = PetscFree(idxb);CHKERRQ(ierr); 2813 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2819 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2820 { 2821 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2822 PetscErrorCode ierr; 2823 PetscInt i,*idxb = 0; 2824 PetscScalar *va,*vb; 2825 Vec vtmp; 2826 2827 PetscFunctionBegin; 2828 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2829 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2830 if (idx) { 2831 for (i=0; i<A->cmap->n; i++) { 2832 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2833 } 2834 } 2835 2836 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2837 if (idx) { 2838 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2839 } 2840 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2841 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2842 2843 for (i=0; i<A->rmap->n; i++){ 2844 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2845 va[i] = vb[i]; 2846 if (idx) idx[i] = a->garray[idxb[i]]; 2847 } 2848 } 2849 2850 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2851 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2852 ierr = PetscFree(idxb);CHKERRQ(ierr); 2853 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2859 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2860 { 2861 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2862 PetscInt n = A->rmap->n; 2863 PetscInt cstart = A->cmap->rstart; 2864 PetscInt *cmap = mat->garray; 2865 PetscInt *diagIdx, *offdiagIdx; 2866 Vec diagV, offdiagV; 2867 PetscScalar *a, *diagA, *offdiagA; 2868 PetscInt r; 2869 PetscErrorCode ierr; 2870 2871 PetscFunctionBegin; 2872 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2873 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2874 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2875 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2876 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2877 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2878 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2879 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2880 for(r = 0; r < n; ++r) { 2881 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2882 a[r] = diagA[r]; 2883 idx[r] = cstart + diagIdx[r]; 2884 } else { 2885 a[r] = offdiagA[r]; 2886 idx[r] = cmap[offdiagIdx[r]]; 2887 } 2888 } 2889 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2890 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2892 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2893 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2894 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2895 PetscFunctionReturn(0); 2896 } 2897 2898 #undef __FUNCT__ 2899 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2900 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2901 { 2902 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2903 PetscInt n = A->rmap->n; 2904 PetscInt cstart = A->cmap->rstart; 2905 PetscInt *cmap = mat->garray; 2906 PetscInt *diagIdx, *offdiagIdx; 2907 Vec diagV, offdiagV; 2908 PetscScalar *a, *diagA, *offdiagA; 2909 PetscInt r; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2914 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2915 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2916 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2917 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2918 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2919 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2920 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2921 for(r = 0; r < n; ++r) { 2922 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2923 a[r] = diagA[r]; 2924 idx[r] = cstart + diagIdx[r]; 2925 } else { 2926 a[r] = offdiagA[r]; 2927 idx[r] = cmap[offdiagIdx[r]]; 2928 } 2929 } 2930 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2931 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2932 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2933 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2934 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2935 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2936 PetscFunctionReturn(0); 2937 } 2938 2939 #undef __FUNCT__ 2940 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2941 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2942 { 2943 PetscErrorCode ierr; 2944 Mat *dummy; 2945 2946 PetscFunctionBegin; 2947 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2948 *newmat = *dummy; 2949 ierr = PetscFree(dummy);CHKERRQ(ierr); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2954 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2957 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2958 { 2959 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 2968 /* -------------------------------------------------------------------*/ 2969 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2970 MatGetRow_MPIAIJ, 2971 MatRestoreRow_MPIAIJ, 2972 MatMult_MPIAIJ, 2973 /* 4*/ MatMultAdd_MPIAIJ, 2974 MatMultTranspose_MPIAIJ, 2975 MatMultTransposeAdd_MPIAIJ, 2976 #ifdef PETSC_HAVE_PBGL 2977 MatSolve_MPIAIJ, 2978 #else 2979 0, 2980 #endif 2981 0, 2982 0, 2983 /*10*/ 0, 2984 0, 2985 0, 2986 MatSOR_MPIAIJ, 2987 MatTranspose_MPIAIJ, 2988 /*15*/ MatGetInfo_MPIAIJ, 2989 MatEqual_MPIAIJ, 2990 MatGetDiagonal_MPIAIJ, 2991 MatDiagonalScale_MPIAIJ, 2992 MatNorm_MPIAIJ, 2993 /*20*/ MatAssemblyBegin_MPIAIJ, 2994 MatAssemblyEnd_MPIAIJ, 2995 MatSetOption_MPIAIJ, 2996 MatZeroEntries_MPIAIJ, 2997 /*24*/ MatZeroRows_MPIAIJ, 2998 0, 2999 #ifdef PETSC_HAVE_PBGL 3000 0, 3001 #else 3002 0, 3003 #endif 3004 0, 3005 0, 3006 /*29*/ MatSetUp_MPIAIJ, 3007 #ifdef PETSC_HAVE_PBGL 3008 0, 3009 #else 3010 0, 3011 #endif 3012 0, 3013 0, 3014 0, 3015 /*34*/ MatDuplicate_MPIAIJ, 3016 0, 3017 0, 3018 0, 3019 0, 3020 /*39*/ MatAXPY_MPIAIJ, 3021 MatGetSubMatrices_MPIAIJ, 3022 MatIncreaseOverlap_MPIAIJ, 3023 MatGetValues_MPIAIJ, 3024 MatCopy_MPIAIJ, 3025 /*44*/ MatGetRowMax_MPIAIJ, 3026 MatScale_MPIAIJ, 3027 0, 3028 0, 3029 MatZeroRowsColumns_MPIAIJ, 3030 /*49*/ MatSetBlockSize_MPIAIJ, 3031 0, 3032 0, 3033 0, 3034 0, 3035 /*54*/ MatFDColoringCreate_MPIAIJ, 3036 0, 3037 MatSetUnfactored_MPIAIJ, 3038 MatPermute_MPIAIJ, 3039 0, 3040 /*59*/ MatGetSubMatrix_MPIAIJ, 3041 MatDestroy_MPIAIJ, 3042 MatView_MPIAIJ, 3043 0, 3044 0, 3045 /*64*/ 0, 3046 0, 3047 0, 3048 0, 3049 0, 3050 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3051 MatGetRowMinAbs_MPIAIJ, 3052 0, 3053 MatSetColoring_MPIAIJ, 3054 #if defined(PETSC_HAVE_ADIC) 3055 MatSetValuesAdic_MPIAIJ, 3056 #else 3057 0, 3058 #endif 3059 MatSetValuesAdifor_MPIAIJ, 3060 /*75*/ MatFDColoringApply_AIJ, 3061 0, 3062 0, 3063 0, 3064 0, 3065 /*80*/ 0, 3066 0, 3067 0, 3068 /*83*/ MatLoad_MPIAIJ, 3069 0, 3070 0, 3071 0, 3072 0, 3073 0, 3074 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3075 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3076 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3077 MatPtAP_Basic, 3078 MatPtAPSymbolic_MPIAIJ, 3079 /*94*/ MatPtAPNumeric_MPIAIJ, 3080 0, 3081 0, 3082 0, 3083 0, 3084 /*99*/ 0, 3085 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3086 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3087 MatConjugate_MPIAIJ, 3088 0, 3089 /*104*/MatSetValuesRow_MPIAIJ, 3090 MatRealPart_MPIAIJ, 3091 MatImaginaryPart_MPIAIJ, 3092 0, 3093 0, 3094 /*109*/0, 3095 MatGetRedundantMatrix_MPIAIJ, 3096 MatGetRowMin_MPIAIJ, 3097 0, 3098 0, 3099 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3100 0, 3101 0, 3102 0, 3103 0, 3104 /*119*/0, 3105 0, 3106 0, 3107 0, 3108 MatGetMultiProcBlock_MPIAIJ, 3109 /*124*/MatFindNonZeroRows_MPIAIJ, 3110 MatGetColumnNorms_MPIAIJ, 3111 MatInvertBlockDiagonal_MPIAIJ, 3112 0, 3113 MatGetSubMatricesParallel_MPIAIJ, 3114 /*129*/0, 3115 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3116 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3117 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3118 0, 3119 /*134*/0, 3120 0, 3121 0, 3122 0, 3123 0 3124 }; 3125 3126 /* ----------------------------------------------------------------------------------------*/ 3127 3128 EXTERN_C_BEGIN 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3131 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3132 { 3133 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3134 PetscErrorCode ierr; 3135 3136 PetscFunctionBegin; 3137 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3138 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3139 PetscFunctionReturn(0); 3140 } 3141 EXTERN_C_END 3142 3143 EXTERN_C_BEGIN 3144 #undef __FUNCT__ 3145 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3146 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3147 { 3148 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3149 PetscErrorCode ierr; 3150 3151 PetscFunctionBegin; 3152 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3153 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 EXTERN_C_END 3157 3158 EXTERN_C_BEGIN 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3161 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3162 { 3163 Mat_MPIAIJ *b; 3164 PetscErrorCode ierr; 3165 PetscInt i; 3166 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3167 3168 PetscFunctionBegin; 3169 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3170 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3171 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3172 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3173 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3174 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3175 3176 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3177 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3178 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3180 if (d_nnz) { 3181 for (i=0; i<B->rmap->n; i++) { 3182 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3183 } 3184 } 3185 if (o_nnz) { 3186 for (i=0; i<B->rmap->n; i++) { 3187 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3188 } 3189 } 3190 b = (Mat_MPIAIJ*)B->data; 3191 3192 if (!B->preallocated) { 3193 /* Explicitly create 2 MATSEQAIJ matrices. */ 3194 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3195 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3196 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3197 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3198 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3199 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3200 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3201 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3202 } 3203 3204 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3205 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3206 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3207 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3208 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3209 B->preallocated = PETSC_TRUE; 3210 PetscFunctionReturn(0); 3211 } 3212 EXTERN_C_END 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3216 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3217 { 3218 Mat mat; 3219 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 *newmat = 0; 3224 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3225 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3226 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3227 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3228 a = (Mat_MPIAIJ*)mat->data; 3229 3230 mat->factortype = matin->factortype; 3231 mat->rmap->bs = matin->rmap->bs; 3232 mat->assembled = PETSC_TRUE; 3233 mat->insertmode = NOT_SET_VALUES; 3234 mat->preallocated = PETSC_TRUE; 3235 3236 a->size = oldmat->size; 3237 a->rank = oldmat->rank; 3238 a->donotstash = oldmat->donotstash; 3239 a->roworiented = oldmat->roworiented; 3240 a->rowindices = 0; 3241 a->rowvalues = 0; 3242 a->getrowactive = PETSC_FALSE; 3243 3244 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3245 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3246 3247 if (oldmat->colmap) { 3248 #if defined (PETSC_USE_CTABLE) 3249 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3250 #else 3251 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3252 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3253 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3254 #endif 3255 } else a->colmap = 0; 3256 if (oldmat->garray) { 3257 PetscInt len; 3258 len = oldmat->B->cmap->n; 3259 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3260 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3261 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3262 } else a->garray = 0; 3263 3264 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3266 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3268 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3270 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3271 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3272 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3273 *newmat = mat; 3274 PetscFunctionReturn(0); 3275 } 3276 3277 3278 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatLoad_MPIAIJ" 3281 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3282 { 3283 PetscScalar *vals,*svals; 3284 MPI_Comm comm = ((PetscObject)viewer)->comm; 3285 PetscErrorCode ierr; 3286 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3287 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3288 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3289 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3290 PetscInt cend,cstart,n,*rowners,sizesset=1; 3291 int fd; 3292 3293 PetscFunctionBegin; 3294 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3295 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3296 if (!rank) { 3297 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3298 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3299 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3300 } 3301 3302 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3303 3304 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3305 M = header[1]; N = header[2]; 3306 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3307 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3308 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3309 3310 /* If global sizes are set, check if they are consistent with that given in the file */ 3311 if (sizesset) { 3312 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3313 } 3314 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3315 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3316 3317 /* determine ownership of all rows */ 3318 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3319 else m = newMat->rmap->n; /* Set by user */ 3320 3321 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3322 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3323 3324 /* First process needs enough room for process with most rows */ 3325 if (!rank) { 3326 mmax = rowners[1]; 3327 for (i=2; i<size; i++) { 3328 mmax = PetscMax(mmax,rowners[i]); 3329 } 3330 } else mmax = m; 3331 3332 rowners[0] = 0; 3333 for (i=2; i<=size; i++) { 3334 rowners[i] += rowners[i-1]; 3335 } 3336 rstart = rowners[rank]; 3337 rend = rowners[rank+1]; 3338 3339 /* distribute row lengths to all processors */ 3340 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3341 if (!rank) { 3342 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3343 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3344 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3345 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3346 for (j=0; j<m; j++) { 3347 procsnz[0] += ourlens[j]; 3348 } 3349 for (i=1; i<size; i++) { 3350 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3351 /* calculate the number of nonzeros on each processor */ 3352 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3353 procsnz[i] += rowlengths[j]; 3354 } 3355 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3356 } 3357 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3358 } else { 3359 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3360 } 3361 3362 if (!rank) { 3363 /* determine max buffer needed and allocate it */ 3364 maxnz = 0; 3365 for (i=0; i<size; i++) { 3366 maxnz = PetscMax(maxnz,procsnz[i]); 3367 } 3368 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3369 3370 /* read in my part of the matrix column indices */ 3371 nz = procsnz[0]; 3372 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3373 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3374 3375 /* read in every one elses and ship off */ 3376 for (i=1; i<size; i++) { 3377 nz = procsnz[i]; 3378 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3379 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3380 } 3381 ierr = PetscFree(cols);CHKERRQ(ierr); 3382 } else { 3383 /* determine buffer space needed for message */ 3384 nz = 0; 3385 for (i=0; i<m; i++) { 3386 nz += ourlens[i]; 3387 } 3388 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3389 3390 /* receive message of column indices*/ 3391 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3392 } 3393 3394 /* determine column ownership if matrix is not square */ 3395 if (N != M) { 3396 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3397 else n = newMat->cmap->n; 3398 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3399 cstart = cend - n; 3400 } else { 3401 cstart = rstart; 3402 cend = rend; 3403 n = cend - cstart; 3404 } 3405 3406 /* loop over local rows, determining number of off diagonal entries */ 3407 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3408 jj = 0; 3409 for (i=0; i<m; i++) { 3410 for (j=0; j<ourlens[i]; j++) { 3411 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3412 jj++; 3413 } 3414 } 3415 3416 for (i=0; i<m; i++) { 3417 ourlens[i] -= offlens[i]; 3418 } 3419 if (!sizesset) { 3420 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3421 } 3422 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3423 3424 for (i=0; i<m; i++) { 3425 ourlens[i] += offlens[i]; 3426 } 3427 3428 if (!rank) { 3429 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3430 3431 /* read in my part of the matrix numerical values */ 3432 nz = procsnz[0]; 3433 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3434 3435 /* insert into matrix */ 3436 jj = rstart; 3437 smycols = mycols; 3438 svals = vals; 3439 for (i=0; i<m; i++) { 3440 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3441 smycols += ourlens[i]; 3442 svals += ourlens[i]; 3443 jj++; 3444 } 3445 3446 /* read in other processors and ship out */ 3447 for (i=1; i<size; i++) { 3448 nz = procsnz[i]; 3449 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3450 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3451 } 3452 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3453 } else { 3454 /* receive numeric values */ 3455 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3456 3457 /* receive message of values*/ 3458 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3459 3460 /* insert into matrix */ 3461 jj = rstart; 3462 smycols = mycols; 3463 svals = vals; 3464 for (i=0; i<m; i++) { 3465 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3466 smycols += ourlens[i]; 3467 svals += ourlens[i]; 3468 jj++; 3469 } 3470 } 3471 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3472 ierr = PetscFree(vals);CHKERRQ(ierr); 3473 ierr = PetscFree(mycols);CHKERRQ(ierr); 3474 ierr = PetscFree(rowners);CHKERRQ(ierr); 3475 3476 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3477 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } 3480 3481 #undef __FUNCT__ 3482 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3483 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3484 { 3485 PetscErrorCode ierr; 3486 IS iscol_local; 3487 PetscInt csize; 3488 3489 PetscFunctionBegin; 3490 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3491 if (call == MAT_REUSE_MATRIX) { 3492 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3493 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3494 } else { 3495 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3496 } 3497 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3498 if (call == MAT_INITIAL_MATRIX) { 3499 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3500 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3501 } 3502 PetscFunctionReturn(0); 3503 } 3504 3505 #undef __FUNCT__ 3506 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3507 /* 3508 Not great since it makes two copies of the submatrix, first an SeqAIJ 3509 in local and then by concatenating the local matrices the end result. 3510 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3511 3512 Note: This requires a sequential iscol with all indices. 3513 */ 3514 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3515 { 3516 PetscErrorCode ierr; 3517 PetscMPIInt rank,size; 3518 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3519 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3520 Mat *local,M,Mreuse; 3521 MatScalar *vwork,*aa; 3522 MPI_Comm comm = ((PetscObject)mat)->comm; 3523 Mat_SeqAIJ *aij; 3524 3525 3526 PetscFunctionBegin; 3527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3529 3530 if (call == MAT_REUSE_MATRIX) { 3531 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3532 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3533 local = &Mreuse; 3534 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3535 } else { 3536 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3537 Mreuse = *local; 3538 ierr = PetscFree(local);CHKERRQ(ierr); 3539 } 3540 3541 /* 3542 m - number of local rows 3543 n - number of columns (same on all processors) 3544 rstart - first row in new global matrix generated 3545 */ 3546 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3547 if (call == MAT_INITIAL_MATRIX) { 3548 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3549 ii = aij->i; 3550 jj = aij->j; 3551 3552 /* 3553 Determine the number of non-zeros in the diagonal and off-diagonal 3554 portions of the matrix in order to do correct preallocation 3555 */ 3556 3557 /* first get start and end of "diagonal" columns */ 3558 if (csize == PETSC_DECIDE) { 3559 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3560 if (mglobal == n) { /* square matrix */ 3561 nlocal = m; 3562 } else { 3563 nlocal = n/size + ((n % size) > rank); 3564 } 3565 } else { 3566 nlocal = csize; 3567 } 3568 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3569 rstart = rend - nlocal; 3570 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3571 3572 /* next, compute all the lengths */ 3573 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3574 olens = dlens + m; 3575 for (i=0; i<m; i++) { 3576 jend = ii[i+1] - ii[i]; 3577 olen = 0; 3578 dlen = 0; 3579 for (j=0; j<jend; j++) { 3580 if (*jj < rstart || *jj >= rend) olen++; 3581 else dlen++; 3582 jj++; 3583 } 3584 olens[i] = olen; 3585 dlens[i] = dlen; 3586 } 3587 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3588 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3589 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3590 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3591 ierr = PetscFree(dlens);CHKERRQ(ierr); 3592 } else { 3593 PetscInt ml,nl; 3594 3595 M = *newmat; 3596 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3597 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3598 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3599 /* 3600 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3601 rather than the slower MatSetValues(). 3602 */ 3603 M->was_assembled = PETSC_TRUE; 3604 M->assembled = PETSC_FALSE; 3605 } 3606 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3607 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3608 ii = aij->i; 3609 jj = aij->j; 3610 aa = aij->a; 3611 for (i=0; i<m; i++) { 3612 row = rstart + i; 3613 nz = ii[i+1] - ii[i]; 3614 cwork = jj; jj += nz; 3615 vwork = aa; aa += nz; 3616 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3617 } 3618 3619 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3620 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3621 *newmat = M; 3622 3623 /* save submatrix used in processor for next request */ 3624 if (call == MAT_INITIAL_MATRIX) { 3625 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3626 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3627 } 3628 3629 PetscFunctionReturn(0); 3630 } 3631 3632 EXTERN_C_BEGIN 3633 #undef __FUNCT__ 3634 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3635 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3636 { 3637 PetscInt m,cstart, cend,j,nnz,i,d; 3638 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3639 const PetscInt *JJ; 3640 PetscScalar *values; 3641 PetscErrorCode ierr; 3642 3643 PetscFunctionBegin; 3644 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3645 3646 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3648 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3649 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3650 m = B->rmap->n; 3651 cstart = B->cmap->rstart; 3652 cend = B->cmap->rend; 3653 rstart = B->rmap->rstart; 3654 3655 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3656 3657 #if defined(PETSC_USE_DEBUGGING) 3658 for (i=0; i<m; i++) { 3659 nnz = Ii[i+1]- Ii[i]; 3660 JJ = J + Ii[i]; 3661 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3662 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3663 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3664 } 3665 #endif 3666 3667 for (i=0; i<m; i++) { 3668 nnz = Ii[i+1]- Ii[i]; 3669 JJ = J + Ii[i]; 3670 nnz_max = PetscMax(nnz_max,nnz); 3671 d = 0; 3672 for (j=0; j<nnz; j++) { 3673 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3674 } 3675 d_nnz[i] = d; 3676 o_nnz[i] = nnz - d; 3677 } 3678 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3679 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3680 3681 if (v) values = (PetscScalar*)v; 3682 else { 3683 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3684 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3685 } 3686 3687 for (i=0; i<m; i++) { 3688 ii = i + rstart; 3689 nnz = Ii[i+1]- Ii[i]; 3690 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3691 } 3692 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3694 3695 if (!v) { 3696 ierr = PetscFree(values);CHKERRQ(ierr); 3697 } 3698 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3699 PetscFunctionReturn(0); 3700 } 3701 EXTERN_C_END 3702 3703 #undef __FUNCT__ 3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3705 /*@ 3706 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3707 (the default parallel PETSc format). 3708 3709 Collective on MPI_Comm 3710 3711 Input Parameters: 3712 + B - the matrix 3713 . i - the indices into j for the start of each local row (starts with zero) 3714 . j - the column indices for each local row (starts with zero) 3715 - v - optional values in the matrix 3716 3717 Level: developer 3718 3719 Notes: 3720 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3721 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3722 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3723 3724 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3725 3726 The format which is used for the sparse matrix input, is equivalent to a 3727 row-major ordering.. i.e for the following matrix, the input data expected is 3728 as shown: 3729 3730 1 0 0 3731 2 0 3 P0 3732 ------- 3733 4 5 6 P1 3734 3735 Process0 [P0]: rows_owned=[0,1] 3736 i = {0,1,3} [size = nrow+1 = 2+1] 3737 j = {0,0,2} [size = nz = 6] 3738 v = {1,2,3} [size = nz = 6] 3739 3740 Process1 [P1]: rows_owned=[2] 3741 i = {0,3} [size = nrow+1 = 1+1] 3742 j = {0,1,2} [size = nz = 6] 3743 v = {4,5,6} [size = nz = 6] 3744 3745 .keywords: matrix, aij, compressed row, sparse, parallel 3746 3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3748 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3749 @*/ 3750 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3751 { 3752 PetscErrorCode ierr; 3753 3754 PetscFunctionBegin; 3755 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3761 /*@C 3762 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3763 (the default parallel PETSc format). For good matrix assembly performance 3764 the user should preallocate the matrix storage by setting the parameters 3765 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3766 performance can be increased by more than a factor of 50. 3767 3768 Collective on MPI_Comm 3769 3770 Input Parameters: 3771 + A - the matrix 3772 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3773 (same value is used for all local rows) 3774 . d_nnz - array containing the number of nonzeros in the various rows of the 3775 DIAGONAL portion of the local submatrix (possibly different for each row) 3776 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3777 The size of this array is equal to the number of local rows, i.e 'm'. 3778 For matrices that will be factored, you must leave room for (and set) 3779 the diagonal entry even if it is zero. 3780 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3781 submatrix (same value is used for all local rows). 3782 - o_nnz - array containing the number of nonzeros in the various rows of the 3783 OFF-DIAGONAL portion of the local submatrix (possibly different for 3784 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3785 structure. The size of this array is equal to the number 3786 of local rows, i.e 'm'. 3787 3788 If the *_nnz parameter is given then the *_nz parameter is ignored 3789 3790 The AIJ format (also called the Yale sparse matrix format or 3791 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3792 storage. The stored row and column indices begin with zero. 3793 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3794 3795 The parallel matrix is partitioned such that the first m0 rows belong to 3796 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3797 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3798 3799 The DIAGONAL portion of the local submatrix of a processor can be defined 3800 as the submatrix which is obtained by extraction the part corresponding to 3801 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3802 first row that belongs to the processor, r2 is the last row belonging to 3803 the this processor, and c1-c2 is range of indices of the local part of a 3804 vector suitable for applying the matrix to. This is an mxn matrix. In the 3805 common case of a square matrix, the row and column ranges are the same and 3806 the DIAGONAL part is also square. The remaining portion of the local 3807 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3808 3809 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3810 3811 You can call MatGetInfo() to get information on how effective the preallocation was; 3812 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3813 You can also run with the option -info and look for messages with the string 3814 malloc in them to see if additional memory allocation was needed. 3815 3816 Example usage: 3817 3818 Consider the following 8x8 matrix with 34 non-zero values, that is 3819 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3820 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3821 as follows: 3822 3823 .vb 3824 1 2 0 | 0 3 0 | 0 4 3825 Proc0 0 5 6 | 7 0 0 | 8 0 3826 9 0 10 | 11 0 0 | 12 0 3827 ------------------------------------- 3828 13 0 14 | 15 16 17 | 0 0 3829 Proc1 0 18 0 | 19 20 21 | 0 0 3830 0 0 0 | 22 23 0 | 24 0 3831 ------------------------------------- 3832 Proc2 25 26 27 | 0 0 28 | 29 0 3833 30 0 0 | 31 32 33 | 0 34 3834 .ve 3835 3836 This can be represented as a collection of submatrices as: 3837 3838 .vb 3839 A B C 3840 D E F 3841 G H I 3842 .ve 3843 3844 Where the submatrices A,B,C are owned by proc0, D,E,F are 3845 owned by proc1, G,H,I are owned by proc2. 3846 3847 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3848 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3849 The 'M','N' parameters are 8,8, and have the same values on all procs. 3850 3851 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3852 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3853 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3854 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3855 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3856 matrix, ans [DF] as another SeqAIJ matrix. 3857 3858 When d_nz, o_nz parameters are specified, d_nz storage elements are 3859 allocated for every row of the local diagonal submatrix, and o_nz 3860 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3861 One way to choose d_nz and o_nz is to use the max nonzerors per local 3862 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3863 In this case, the values of d_nz,o_nz are: 3864 .vb 3865 proc0 : dnz = 2, o_nz = 2 3866 proc1 : dnz = 3, o_nz = 2 3867 proc2 : dnz = 1, o_nz = 4 3868 .ve 3869 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3870 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3871 for proc3. i.e we are using 12+15+10=37 storage locations to store 3872 34 values. 3873 3874 When d_nnz, o_nnz parameters are specified, the storage is specified 3875 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3876 In the above case the values for d_nnz,o_nnz are: 3877 .vb 3878 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3879 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3880 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3881 .ve 3882 Here the space allocated is sum of all the above values i.e 34, and 3883 hence pre-allocation is perfect. 3884 3885 Level: intermediate 3886 3887 .keywords: matrix, aij, compressed row, sparse, parallel 3888 3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3890 MPIAIJ, MatGetInfo() 3891 @*/ 3892 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3893 { 3894 PetscErrorCode ierr; 3895 3896 PetscFunctionBegin; 3897 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3898 PetscValidType(B,1); 3899 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3900 PetscFunctionReturn(0); 3901 } 3902 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3905 /*@ 3906 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3907 CSR format the local rows. 3908 3909 Collective on MPI_Comm 3910 3911 Input Parameters: 3912 + comm - MPI communicator 3913 . m - number of local rows (Cannot be PETSC_DECIDE) 3914 . n - This value should be the same as the local size used in creating the 3915 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3916 calculated if N is given) For square matrices n is almost always m. 3917 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3918 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3919 . i - row indices 3920 . j - column indices 3921 - a - matrix values 3922 3923 Output Parameter: 3924 . mat - the matrix 3925 3926 Level: intermediate 3927 3928 Notes: 3929 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3930 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3931 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3932 3933 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3934 3935 The format which is used for the sparse matrix input, is equivalent to a 3936 row-major ordering.. i.e for the following matrix, the input data expected is 3937 as shown: 3938 3939 1 0 0 3940 2 0 3 P0 3941 ------- 3942 4 5 6 P1 3943 3944 Process0 [P0]: rows_owned=[0,1] 3945 i = {0,1,3} [size = nrow+1 = 2+1] 3946 j = {0,0,2} [size = nz = 6] 3947 v = {1,2,3} [size = nz = 6] 3948 3949 Process1 [P1]: rows_owned=[2] 3950 i = {0,3} [size = nrow+1 = 1+1] 3951 j = {0,1,2} [size = nz = 6] 3952 v = {4,5,6} [size = nz = 6] 3953 3954 .keywords: matrix, aij, compressed row, sparse, parallel 3955 3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3957 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3958 @*/ 3959 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3960 { 3961 PetscErrorCode ierr; 3962 3963 PetscFunctionBegin; 3964 if (i[0]) { 3965 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3966 } 3967 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3968 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3969 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3970 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3971 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3972 PetscFunctionReturn(0); 3973 } 3974 3975 #undef __FUNCT__ 3976 #define __FUNCT__ "MatCreateMPIAIJ" 3977 /*@C 3978 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3979 (the default parallel PETSc format). For good matrix assembly performance 3980 the user should preallocate the matrix storage by setting the parameters 3981 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3982 performance can be increased by more than a factor of 50. 3983 3984 Collective on MPI_Comm 3985 3986 Input Parameters: 3987 + comm - MPI communicator 3988 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3989 This value should be the same as the local size used in creating the 3990 y vector for the matrix-vector product y = Ax. 3991 . n - This value should be the same as the local size used in creating the 3992 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3993 calculated if N is given) For square matrices n is almost always m. 3994 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3995 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3996 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3997 (same value is used for all local rows) 3998 . d_nnz - array containing the number of nonzeros in the various rows of the 3999 DIAGONAL portion of the local submatrix (possibly different for each row) 4000 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4001 The size of this array is equal to the number of local rows, i.e 'm'. 4002 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4003 submatrix (same value is used for all local rows). 4004 - o_nnz - array containing the number of nonzeros in the various rows of the 4005 OFF-DIAGONAL portion of the local submatrix (possibly different for 4006 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4007 structure. The size of this array is equal to the number 4008 of local rows, i.e 'm'. 4009 4010 Output Parameter: 4011 . A - the matrix 4012 4013 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4014 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4015 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4016 4017 Notes: 4018 If the *_nnz parameter is given then the *_nz parameter is ignored 4019 4020 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4021 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4022 storage requirements for this matrix. 4023 4024 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4025 processor than it must be used on all processors that share the object for 4026 that argument. 4027 4028 The user MUST specify either the local or global matrix dimensions 4029 (possibly both). 4030 4031 The parallel matrix is partitioned across processors such that the 4032 first m0 rows belong to process 0, the next m1 rows belong to 4033 process 1, the next m2 rows belong to process 2 etc.. where 4034 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4035 values corresponding to [m x N] submatrix. 4036 4037 The columns are logically partitioned with the n0 columns belonging 4038 to 0th partition, the next n1 columns belonging to the next 4039 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4040 4041 The DIAGONAL portion of the local submatrix on any given processor 4042 is the submatrix corresponding to the rows and columns m,n 4043 corresponding to the given processor. i.e diagonal matrix on 4044 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4045 etc. The remaining portion of the local submatrix [m x (N-n)] 4046 constitute the OFF-DIAGONAL portion. The example below better 4047 illustrates this concept. 4048 4049 For a square global matrix we define each processor's diagonal portion 4050 to be its local rows and the corresponding columns (a square submatrix); 4051 each processor's off-diagonal portion encompasses the remainder of the 4052 local matrix (a rectangular submatrix). 4053 4054 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4055 4056 When calling this routine with a single process communicator, a matrix of 4057 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4058 type of communicator, use the construction mechanism: 4059 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4060 4061 By default, this format uses inodes (identical nodes) when possible. 4062 We search for consecutive rows with the same nonzero structure, thereby 4063 reusing matrix information to achieve increased efficiency. 4064 4065 Options Database Keys: 4066 + -mat_no_inode - Do not use inodes 4067 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4068 - -mat_aij_oneindex - Internally use indexing starting at 1 4069 rather than 0. Note that when calling MatSetValues(), 4070 the user still MUST index entries starting at 0! 4071 4072 4073 Example usage: 4074 4075 Consider the following 8x8 matrix with 34 non-zero values, that is 4076 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4077 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4078 as follows: 4079 4080 .vb 4081 1 2 0 | 0 3 0 | 0 4 4082 Proc0 0 5 6 | 7 0 0 | 8 0 4083 9 0 10 | 11 0 0 | 12 0 4084 ------------------------------------- 4085 13 0 14 | 15 16 17 | 0 0 4086 Proc1 0 18 0 | 19 20 21 | 0 0 4087 0 0 0 | 22 23 0 | 24 0 4088 ------------------------------------- 4089 Proc2 25 26 27 | 0 0 28 | 29 0 4090 30 0 0 | 31 32 33 | 0 34 4091 .ve 4092 4093 This can be represented as a collection of submatrices as: 4094 4095 .vb 4096 A B C 4097 D E F 4098 G H I 4099 .ve 4100 4101 Where the submatrices A,B,C are owned by proc0, D,E,F are 4102 owned by proc1, G,H,I are owned by proc2. 4103 4104 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4105 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4106 The 'M','N' parameters are 8,8, and have the same values on all procs. 4107 4108 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4109 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4110 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4111 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4112 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4113 matrix, ans [DF] as another SeqAIJ matrix. 4114 4115 When d_nz, o_nz parameters are specified, d_nz storage elements are 4116 allocated for every row of the local diagonal submatrix, and o_nz 4117 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4118 One way to choose d_nz and o_nz is to use the max nonzerors per local 4119 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4120 In this case, the values of d_nz,o_nz are: 4121 .vb 4122 proc0 : dnz = 2, o_nz = 2 4123 proc1 : dnz = 3, o_nz = 2 4124 proc2 : dnz = 1, o_nz = 4 4125 .ve 4126 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4127 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4128 for proc3. i.e we are using 12+15+10=37 storage locations to store 4129 34 values. 4130 4131 When d_nnz, o_nnz parameters are specified, the storage is specified 4132 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4133 In the above case the values for d_nnz,o_nnz are: 4134 .vb 4135 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4136 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4137 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4138 .ve 4139 Here the space allocated is sum of all the above values i.e 34, and 4140 hence pre-allocation is perfect. 4141 4142 Level: intermediate 4143 4144 .keywords: matrix, aij, compressed row, sparse, parallel 4145 4146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4147 MPIAIJ, MatCreateMPIAIJWithArrays() 4148 @*/ 4149 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4150 { 4151 PetscErrorCode ierr; 4152 PetscMPIInt size; 4153 4154 PetscFunctionBegin; 4155 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4156 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4157 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4158 if (size > 1) { 4159 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4160 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4161 } else { 4162 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4163 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4164 } 4165 PetscFunctionReturn(0); 4166 } 4167 4168 #undef __FUNCT__ 4169 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4170 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4171 { 4172 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4173 4174 PetscFunctionBegin; 4175 *Ad = a->A; 4176 *Ao = a->B; 4177 *colmap = a->garray; 4178 PetscFunctionReturn(0); 4179 } 4180 4181 #undef __FUNCT__ 4182 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4183 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4184 { 4185 PetscErrorCode ierr; 4186 PetscInt i; 4187 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4188 4189 PetscFunctionBegin; 4190 if (coloring->ctype == IS_COLORING_GLOBAL) { 4191 ISColoringValue *allcolors,*colors; 4192 ISColoring ocoloring; 4193 4194 /* set coloring for diagonal portion */ 4195 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4196 4197 /* set coloring for off-diagonal portion */ 4198 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4199 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4200 for (i=0; i<a->B->cmap->n; i++) { 4201 colors[i] = allcolors[a->garray[i]]; 4202 } 4203 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4204 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4205 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4206 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4207 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4208 ISColoringValue *colors; 4209 PetscInt *larray; 4210 ISColoring ocoloring; 4211 4212 /* set coloring for diagonal portion */ 4213 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4214 for (i=0; i<a->A->cmap->n; i++) { 4215 larray[i] = i + A->cmap->rstart; 4216 } 4217 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4218 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4219 for (i=0; i<a->A->cmap->n; i++) { 4220 colors[i] = coloring->colors[larray[i]]; 4221 } 4222 ierr = PetscFree(larray);CHKERRQ(ierr); 4223 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4224 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4225 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4226 4227 /* set coloring for off-diagonal portion */ 4228 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4229 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4230 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4231 for (i=0; i<a->B->cmap->n; i++) { 4232 colors[i] = coloring->colors[larray[i]]; 4233 } 4234 ierr = PetscFree(larray);CHKERRQ(ierr); 4235 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4236 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4237 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4238 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4239 4240 PetscFunctionReturn(0); 4241 } 4242 4243 #if defined(PETSC_HAVE_ADIC) 4244 #undef __FUNCT__ 4245 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4246 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4247 { 4248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4249 PetscErrorCode ierr; 4250 4251 PetscFunctionBegin; 4252 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4253 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4254 PetscFunctionReturn(0); 4255 } 4256 #endif 4257 4258 #undef __FUNCT__ 4259 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4260 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4261 { 4262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4263 PetscErrorCode ierr; 4264 4265 PetscFunctionBegin; 4266 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4267 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4268 PetscFunctionReturn(0); 4269 } 4270 4271 #undef __FUNCT__ 4272 #define __FUNCT__ "MatMergeSymbolic" 4273 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4274 { 4275 PetscErrorCode ierr; 4276 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum; 4277 PetscInt *indx; 4278 4279 PetscFunctionBegin; 4280 /* This routine will ONLY return MPIAIJ type matrix */ 4281 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4282 if (n == PETSC_DECIDE){ 4283 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4284 } 4285 /* Check sum(n) = N */ 4286 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr); 4287 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4288 4289 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4290 rstart -= m; 4291 4292 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4293 for (i=0;i<m;i++) { 4294 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4295 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4296 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4297 } 4298 4299 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4300 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4301 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4302 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4303 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4304 PetscFunctionReturn(0); 4305 } 4306 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "MatMergeNumeric" 4309 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4310 { 4311 PetscErrorCode ierr; 4312 PetscInt m,N,i,rstart,nnz,Ii; 4313 PetscInt *indx; 4314 PetscScalar *values; 4315 4316 PetscFunctionBegin; 4317 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4318 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4319 for (i=0;i<m;i++) { 4320 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4321 Ii = i + rstart; 4322 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4323 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4324 } 4325 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4326 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4327 PetscFunctionReturn(0); 4328 } 4329 4330 #undef __FUNCT__ 4331 #define __FUNCT__ "MatMerge" 4332 /*@ 4333 MatMerge - Creates a single large PETSc matrix by concatenating sequential 4334 matrices from each processor 4335 4336 Collective on MPI_Comm 4337 4338 Input Parameters: 4339 + comm - the communicators the parallel matrix will live on 4340 . inmat - the input sequential matrices 4341 . n - number of local columns (or PETSC_DECIDE) 4342 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4343 4344 Output Parameter: 4345 . outmat - the parallel matrix generated 4346 4347 Level: advanced 4348 4349 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4350 4351 @*/ 4352 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4353 { 4354 PetscErrorCode ierr; 4355 4356 PetscFunctionBegin; 4357 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4358 if (scall == MAT_INITIAL_MATRIX){ 4359 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4360 } 4361 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4362 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4363 PetscFunctionReturn(0); 4364 } 4365 4366 #undef __FUNCT__ 4367 #define __FUNCT__ "MatFileSplit" 4368 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4369 { 4370 PetscErrorCode ierr; 4371 PetscMPIInt rank; 4372 PetscInt m,N,i,rstart,nnz; 4373 size_t len; 4374 const PetscInt *indx; 4375 PetscViewer out; 4376 char *name; 4377 Mat B; 4378 const PetscScalar *values; 4379 4380 PetscFunctionBegin; 4381 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4382 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4383 /* Should this be the type of the diagonal block of A? */ 4384 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4385 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4386 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4387 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4388 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4389 for (i=0;i<m;i++) { 4390 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4391 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4392 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4393 } 4394 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4395 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4396 4397 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4398 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4399 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4400 sprintf(name,"%s.%d",outfile,rank); 4401 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4402 ierr = PetscFree(name); 4403 ierr = MatView(B,out);CHKERRQ(ierr); 4404 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4405 ierr = MatDestroy(&B);CHKERRQ(ierr); 4406 PetscFunctionReturn(0); 4407 } 4408 4409 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4410 #undef __FUNCT__ 4411 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4412 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4413 { 4414 PetscErrorCode ierr; 4415 Mat_Merge_SeqsToMPI *merge; 4416 PetscContainer container; 4417 4418 PetscFunctionBegin; 4419 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4420 if (container) { 4421 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4433 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4434 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4435 ierr = PetscFree(merge);CHKERRQ(ierr); 4436 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4437 } 4438 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4439 PetscFunctionReturn(0); 4440 } 4441 4442 #include <../src/mat/utils/freespace.h> 4443 #include <petscbt.h> 4444 4445 #undef __FUNCT__ 4446 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4447 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4448 { 4449 PetscErrorCode ierr; 4450 MPI_Comm comm=((PetscObject)mpimat)->comm; 4451 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4452 PetscMPIInt size,rank,taga,*len_s; 4453 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4454 PetscInt proc,m; 4455 PetscInt **buf_ri,**buf_rj; 4456 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4457 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4458 MPI_Request *s_waits,*r_waits; 4459 MPI_Status *status; 4460 MatScalar *aa=a->a; 4461 MatScalar **abuf_r,*ba_i; 4462 Mat_Merge_SeqsToMPI *merge; 4463 PetscContainer container; 4464 4465 PetscFunctionBegin; 4466 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4467 4468 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4469 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4470 4471 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4472 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4473 4474 bi = merge->bi; 4475 bj = merge->bj; 4476 buf_ri = merge->buf_ri; 4477 buf_rj = merge->buf_rj; 4478 4479 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4480 owners = merge->rowmap->range; 4481 len_s = merge->len_s; 4482 4483 /* send and recv matrix values */ 4484 /*-----------------------------*/ 4485 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4486 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4487 4488 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4489 for (proc=0,k=0; proc<size; proc++){ 4490 if (!len_s[proc]) continue; 4491 i = owners[proc]; 4492 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4493 k++; 4494 } 4495 4496 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4497 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4498 ierr = PetscFree(status);CHKERRQ(ierr); 4499 4500 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4501 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4502 4503 /* insert mat values of mpimat */ 4504 /*----------------------------*/ 4505 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4506 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4507 4508 for (k=0; k<merge->nrecv; k++){ 4509 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4510 nrows = *(buf_ri_k[k]); 4511 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4512 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4513 } 4514 4515 /* set values of ba */ 4516 m = merge->rowmap->n; 4517 for (i=0; i<m; i++) { 4518 arow = owners[rank] + i; 4519 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4520 bnzi = bi[i+1] - bi[i]; 4521 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4522 4523 /* add local non-zero vals of this proc's seqmat into ba */ 4524 anzi = ai[arow+1] - ai[arow]; 4525 aj = a->j + ai[arow]; 4526 aa = a->a + ai[arow]; 4527 nextaj = 0; 4528 for (j=0; nextaj<anzi; j++){ 4529 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4530 ba_i[j] += aa[nextaj++]; 4531 } 4532 } 4533 4534 /* add received vals into ba */ 4535 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4536 /* i-th row */ 4537 if (i == *nextrow[k]) { 4538 anzi = *(nextai[k]+1) - *nextai[k]; 4539 aj = buf_rj[k] + *(nextai[k]); 4540 aa = abuf_r[k] + *(nextai[k]); 4541 nextaj = 0; 4542 for (j=0; nextaj<anzi; j++){ 4543 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4544 ba_i[j] += aa[nextaj++]; 4545 } 4546 } 4547 nextrow[k]++; nextai[k]++; 4548 } 4549 } 4550 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4551 } 4552 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4553 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4554 4555 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4556 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4557 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4558 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4559 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4564 4565 #undef __FUNCT__ 4566 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4567 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4568 { 4569 PetscErrorCode ierr; 4570 Mat B_mpi; 4571 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4572 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4573 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4574 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4575 PetscInt len,proc,*dnz,*onz; 4576 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4577 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4578 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4579 MPI_Status *status; 4580 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4581 PetscBT lnkbt; 4582 Mat_Merge_SeqsToMPI *merge; 4583 PetscContainer container; 4584 4585 PetscFunctionBegin; 4586 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4587 4588 /* make sure it is a PETSc comm */ 4589 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4591 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4592 4593 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4594 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4595 4596 /* determine row ownership */ 4597 /*---------------------------------------------------------*/ 4598 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4599 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4600 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4601 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4602 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4603 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4604 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4605 4606 m = merge->rowmap->n; 4607 M = merge->rowmap->N; 4608 owners = merge->rowmap->range; 4609 4610 /* determine the number of messages to send, their lengths */ 4611 /*---------------------------------------------------------*/ 4612 len_s = merge->len_s; 4613 4614 len = 0; /* length of buf_si[] */ 4615 merge->nsend = 0; 4616 for (proc=0; proc<size; proc++){ 4617 len_si[proc] = 0; 4618 if (proc == rank){ 4619 len_s[proc] = 0; 4620 } else { 4621 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4622 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4623 } 4624 if (len_s[proc]) { 4625 merge->nsend++; 4626 nrows = 0; 4627 for (i=owners[proc]; i<owners[proc+1]; i++){ 4628 if (ai[i+1] > ai[i]) nrows++; 4629 } 4630 len_si[proc] = 2*(nrows+1); 4631 len += len_si[proc]; 4632 } 4633 } 4634 4635 /* determine the number and length of messages to receive for ij-structure */ 4636 /*-------------------------------------------------------------------------*/ 4637 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4638 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4639 4640 /* post the Irecv of j-structure */ 4641 /*-------------------------------*/ 4642 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4643 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4644 4645 /* post the Isend of j-structure */ 4646 /*--------------------------------*/ 4647 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4648 4649 for (proc=0, k=0; proc<size; proc++){ 4650 if (!len_s[proc]) continue; 4651 i = owners[proc]; 4652 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4653 k++; 4654 } 4655 4656 /* receives and sends of j-structure are complete */ 4657 /*------------------------------------------------*/ 4658 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4659 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4660 4661 /* send and recv i-structure */ 4662 /*---------------------------*/ 4663 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4664 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4665 4666 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4667 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4668 for (proc=0,k=0; proc<size; proc++){ 4669 if (!len_s[proc]) continue; 4670 /* form outgoing message for i-structure: 4671 buf_si[0]: nrows to be sent 4672 [1:nrows]: row index (global) 4673 [nrows+1:2*nrows+1]: i-structure index 4674 */ 4675 /*-------------------------------------------*/ 4676 nrows = len_si[proc]/2 - 1; 4677 buf_si_i = buf_si + nrows+1; 4678 buf_si[0] = nrows; 4679 buf_si_i[0] = 0; 4680 nrows = 0; 4681 for (i=owners[proc]; i<owners[proc+1]; i++){ 4682 anzi = ai[i+1] - ai[i]; 4683 if (anzi) { 4684 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4685 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4686 nrows++; 4687 } 4688 } 4689 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4690 k++; 4691 buf_si += len_si[proc]; 4692 } 4693 4694 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4695 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4696 4697 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4698 for (i=0; i<merge->nrecv; i++){ 4699 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4700 } 4701 4702 ierr = PetscFree(len_si);CHKERRQ(ierr); 4703 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4704 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4705 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4706 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4707 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4708 ierr = PetscFree(status);CHKERRQ(ierr); 4709 4710 /* compute a local seq matrix in each processor */ 4711 /*----------------------------------------------*/ 4712 /* allocate bi array and free space for accumulating nonzero column info */ 4713 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4714 bi[0] = 0; 4715 4716 /* create and initialize a linked list */ 4717 nlnk = N+1; 4718 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4719 4720 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4721 len = 0; 4722 len = ai[owners[rank+1]] - ai[owners[rank]]; 4723 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4724 current_space = free_space; 4725 4726 /* determine symbolic info for each local row */ 4727 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4728 4729 for (k=0; k<merge->nrecv; k++){ 4730 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4731 nrows = *buf_ri_k[k]; 4732 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4733 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4734 } 4735 4736 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4737 len = 0; 4738 for (i=0;i<m;i++) { 4739 bnzi = 0; 4740 /* add local non-zero cols of this proc's seqmat into lnk */ 4741 arow = owners[rank] + i; 4742 anzi = ai[arow+1] - ai[arow]; 4743 aj = a->j + ai[arow]; 4744 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4745 bnzi += nlnk; 4746 /* add received col data into lnk */ 4747 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4748 if (i == *nextrow[k]) { /* i-th row */ 4749 anzi = *(nextai[k]+1) - *nextai[k]; 4750 aj = buf_rj[k] + *nextai[k]; 4751 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4752 bnzi += nlnk; 4753 nextrow[k]++; nextai[k]++; 4754 } 4755 } 4756 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4757 4758 /* if free space is not available, make more free space */ 4759 if (current_space->local_remaining<bnzi) { 4760 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4761 nspacedouble++; 4762 } 4763 /* copy data into free space, then initialize lnk */ 4764 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4765 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4766 4767 current_space->array += bnzi; 4768 current_space->local_used += bnzi; 4769 current_space->local_remaining -= bnzi; 4770 4771 bi[i+1] = bi[i] + bnzi; 4772 } 4773 4774 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4775 4776 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4777 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4778 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4779 4780 /* create symbolic parallel matrix B_mpi */ 4781 /*---------------------------------------*/ 4782 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4783 if (n==PETSC_DECIDE) { 4784 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4785 } else { 4786 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4787 } 4788 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4789 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4790 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4791 4792 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4793 B_mpi->assembled = PETSC_FALSE; 4794 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4795 merge->bi = bi; 4796 merge->bj = bj; 4797 merge->buf_ri = buf_ri; 4798 merge->buf_rj = buf_rj; 4799 merge->coi = PETSC_NULL; 4800 merge->coj = PETSC_NULL; 4801 merge->owners_co = PETSC_NULL; 4802 4803 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4804 4805 /* attach the supporting struct to B_mpi for reuse */ 4806 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4807 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4808 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4809 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4810 *mpimat = B_mpi; 4811 4812 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4813 PetscFunctionReturn(0); 4814 } 4815 4816 #undef __FUNCT__ 4817 #define __FUNCT__ "MatMerge_SeqsToMPI" 4818 /*@C 4819 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4820 matrices from each processor 4821 4822 Collective on MPI_Comm 4823 4824 Input Parameters: 4825 + comm - the communicators the parallel matrix will live on 4826 . seqmat - the input sequential matrices 4827 . m - number of local rows (or PETSC_DECIDE) 4828 . n - number of local columns (or PETSC_DECIDE) 4829 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4830 4831 Output Parameter: 4832 . mpimat - the parallel matrix generated 4833 4834 Level: advanced 4835 4836 Notes: 4837 The dimensions of the sequential matrix in each processor MUST be the same. 4838 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4839 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4840 @*/ 4841 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4842 { 4843 PetscErrorCode ierr; 4844 4845 PetscFunctionBegin; 4846 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4847 if (scall == MAT_INITIAL_MATRIX){ 4848 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4849 } 4850 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4851 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 #undef __FUNCT__ 4856 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4857 /*@ 4858 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4859 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4860 with MatGetSize() 4861 4862 Not Collective 4863 4864 Input Parameters: 4865 + A - the matrix 4866 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4867 4868 Output Parameter: 4869 . A_loc - the local sequential matrix generated 4870 4871 Level: developer 4872 4873 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4874 4875 @*/ 4876 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4877 { 4878 PetscErrorCode ierr; 4879 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4880 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4881 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4882 MatScalar *aa=a->a,*ba=b->a,*cam; 4883 PetscScalar *ca; 4884 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4885 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4886 PetscBool match; 4887 4888 PetscFunctionBegin; 4889 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4890 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4891 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4892 if (scall == MAT_INITIAL_MATRIX){ 4893 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4894 ci[0] = 0; 4895 for (i=0; i<am; i++){ 4896 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4897 } 4898 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4899 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4900 k = 0; 4901 for (i=0; i<am; i++) { 4902 ncols_o = bi[i+1] - bi[i]; 4903 ncols_d = ai[i+1] - ai[i]; 4904 /* off-diagonal portion of A */ 4905 for (jo=0; jo<ncols_o; jo++) { 4906 col = cmap[*bj]; 4907 if (col >= cstart) break; 4908 cj[k] = col; bj++; 4909 ca[k++] = *ba++; 4910 } 4911 /* diagonal portion of A */ 4912 for (j=0; j<ncols_d; j++) { 4913 cj[k] = cstart + *aj++; 4914 ca[k++] = *aa++; 4915 } 4916 /* off-diagonal portion of A */ 4917 for (j=jo; j<ncols_o; j++) { 4918 cj[k] = cmap[*bj++]; 4919 ca[k++] = *ba++; 4920 } 4921 } 4922 /* put together the new matrix */ 4923 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4924 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4925 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4926 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4927 mat->free_a = PETSC_TRUE; 4928 mat->free_ij = PETSC_TRUE; 4929 mat->nonew = 0; 4930 } else if (scall == MAT_REUSE_MATRIX){ 4931 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4932 ci = mat->i; cj = mat->j; cam = mat->a; 4933 for (i=0; i<am; i++) { 4934 /* off-diagonal portion of A */ 4935 ncols_o = bi[i+1] - bi[i]; 4936 for (jo=0; jo<ncols_o; jo++) { 4937 col = cmap[*bj]; 4938 if (col >= cstart) break; 4939 *cam++ = *ba++; bj++; 4940 } 4941 /* diagonal portion of A */ 4942 ncols_d = ai[i+1] - ai[i]; 4943 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4944 /* off-diagonal portion of A */ 4945 for (j=jo; j<ncols_o; j++) { 4946 *cam++ = *ba++; bj++; 4947 } 4948 } 4949 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4950 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4951 PetscFunctionReturn(0); 4952 } 4953 4954 #undef __FUNCT__ 4955 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4956 /*@C 4957 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4958 4959 Not Collective 4960 4961 Input Parameters: 4962 + A - the matrix 4963 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4964 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4965 4966 Output Parameter: 4967 . A_loc - the local sequential matrix generated 4968 4969 Level: developer 4970 4971 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4972 4973 @*/ 4974 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4975 { 4976 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4977 PetscErrorCode ierr; 4978 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4979 IS isrowa,iscola; 4980 Mat *aloc; 4981 PetscBool match; 4982 4983 PetscFunctionBegin; 4984 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4985 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4986 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4987 if (!row){ 4988 start = A->rmap->rstart; end = A->rmap->rend; 4989 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4990 } else { 4991 isrowa = *row; 4992 } 4993 if (!col){ 4994 start = A->cmap->rstart; 4995 cmap = a->garray; 4996 nzA = a->A->cmap->n; 4997 nzB = a->B->cmap->n; 4998 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4999 ncols = 0; 5000 for (i=0; i<nzB; i++) { 5001 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5002 else break; 5003 } 5004 imark = i; 5005 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5006 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5007 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5008 } else { 5009 iscola = *col; 5010 } 5011 if (scall != MAT_INITIAL_MATRIX){ 5012 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5013 aloc[0] = *A_loc; 5014 } 5015 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5016 *A_loc = aloc[0]; 5017 ierr = PetscFree(aloc);CHKERRQ(ierr); 5018 if (!row){ 5019 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5020 } 5021 if (!col){ 5022 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5023 } 5024 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 5028 #undef __FUNCT__ 5029 #define __FUNCT__ "MatGetBrowsOfAcols" 5030 /*@C 5031 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5032 5033 Collective on Mat 5034 5035 Input Parameters: 5036 + A,B - the matrices in mpiaij format 5037 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5038 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5039 5040 Output Parameter: 5041 + rowb, colb - index sets of rows and columns of B to extract 5042 - B_seq - the sequential matrix generated 5043 5044 Level: developer 5045 5046 @*/ 5047 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5048 { 5049 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5050 PetscErrorCode ierr; 5051 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5052 IS isrowb,iscolb; 5053 Mat *bseq=PETSC_NULL; 5054 5055 PetscFunctionBegin; 5056 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5057 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5058 } 5059 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5060 5061 if (scall == MAT_INITIAL_MATRIX){ 5062 start = A->cmap->rstart; 5063 cmap = a->garray; 5064 nzA = a->A->cmap->n; 5065 nzB = a->B->cmap->n; 5066 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5067 ncols = 0; 5068 for (i=0; i<nzB; i++) { /* row < local row index */ 5069 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5070 else break; 5071 } 5072 imark = i; 5073 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5074 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5075 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5076 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5077 } else { 5078 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5079 isrowb = *rowb; iscolb = *colb; 5080 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5081 bseq[0] = *B_seq; 5082 } 5083 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5084 *B_seq = bseq[0]; 5085 ierr = PetscFree(bseq);CHKERRQ(ierr); 5086 if (!rowb){ 5087 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5088 } else { 5089 *rowb = isrowb; 5090 } 5091 if (!colb){ 5092 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5093 } else { 5094 *colb = iscolb; 5095 } 5096 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 #undef __FUNCT__ 5101 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5102 /* 5103 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5104 of the OFF-DIAGONAL portion of local A 5105 5106 Collective on Mat 5107 5108 Input Parameters: 5109 + A,B - the matrices in mpiaij format 5110 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5111 5112 Output Parameter: 5113 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5114 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5115 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5116 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5117 5118 Level: developer 5119 5120 */ 5121 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5122 { 5123 VecScatter_MPI_General *gen_to,*gen_from; 5124 PetscErrorCode ierr; 5125 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5126 Mat_SeqAIJ *b_oth; 5127 VecScatter ctx=a->Mvctx; 5128 MPI_Comm comm=((PetscObject)ctx)->comm; 5129 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5130 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5131 PetscScalar *rvalues,*svalues; 5132 MatScalar *b_otha,*bufa,*bufA; 5133 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5134 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5135 MPI_Status *sstatus,rstatus; 5136 PetscMPIInt jj; 5137 PetscInt *cols,sbs,rbs; 5138 PetscScalar *vals; 5139 5140 PetscFunctionBegin; 5141 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5142 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5143 } 5144 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5145 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5146 5147 gen_to = (VecScatter_MPI_General*)ctx->todata; 5148 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5149 rvalues = gen_from->values; /* holds the length of receiving row */ 5150 svalues = gen_to->values; /* holds the length of sending row */ 5151 nrecvs = gen_from->n; 5152 nsends = gen_to->n; 5153 5154 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5155 srow = gen_to->indices; /* local row index to be sent */ 5156 sstarts = gen_to->starts; 5157 sprocs = gen_to->procs; 5158 sstatus = gen_to->sstatus; 5159 sbs = gen_to->bs; 5160 rstarts = gen_from->starts; 5161 rprocs = gen_from->procs; 5162 rbs = gen_from->bs; 5163 5164 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5165 if (scall == MAT_INITIAL_MATRIX){ 5166 /* i-array */ 5167 /*---------*/ 5168 /* post receives */ 5169 for (i=0; i<nrecvs; i++){ 5170 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5171 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5172 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5173 } 5174 5175 /* pack the outgoing message */ 5176 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5177 sstartsj[0] = 0; rstartsj[0] = 0; 5178 len = 0; /* total length of j or a array to be sent */ 5179 k = 0; 5180 for (i=0; i<nsends; i++){ 5181 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5182 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5183 for (j=0; j<nrows; j++) { 5184 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5185 for (l=0; l<sbs; l++){ 5186 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5187 rowlen[j*sbs+l] = ncols; 5188 len += ncols; 5189 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5190 } 5191 k++; 5192 } 5193 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5194 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5195 } 5196 /* recvs and sends of i-array are completed */ 5197 i = nrecvs; 5198 while (i--) { 5199 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5200 } 5201 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5202 5203 /* allocate buffers for sending j and a arrays */ 5204 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5205 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5206 5207 /* create i-array of B_oth */ 5208 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5209 b_othi[0] = 0; 5210 len = 0; /* total length of j or a array to be received */ 5211 k = 0; 5212 for (i=0; i<nrecvs; i++){ 5213 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5214 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5215 for (j=0; j<nrows; j++) { 5216 b_othi[k+1] = b_othi[k] + rowlen[j]; 5217 len += rowlen[j]; k++; 5218 } 5219 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5220 } 5221 5222 /* allocate space for j and a arrrays of B_oth */ 5223 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5224 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5225 5226 /* j-array */ 5227 /*---------*/ 5228 /* post receives of j-array */ 5229 for (i=0; i<nrecvs; i++){ 5230 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5231 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5232 } 5233 5234 /* pack the outgoing message j-array */ 5235 k = 0; 5236 for (i=0; i<nsends; i++){ 5237 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5238 bufJ = bufj+sstartsj[i]; 5239 for (j=0; j<nrows; j++) { 5240 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5241 for (ll=0; ll<sbs; ll++){ 5242 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5243 for (l=0; l<ncols; l++){ 5244 *bufJ++ = cols[l]; 5245 } 5246 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5247 } 5248 } 5249 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5250 } 5251 5252 /* recvs and sends of j-array are completed */ 5253 i = nrecvs; 5254 while (i--) { 5255 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5256 } 5257 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5258 } else if (scall == MAT_REUSE_MATRIX){ 5259 sstartsj = *startsj_s; 5260 rstartsj = *startsj_r; 5261 bufa = *bufa_ptr; 5262 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5263 b_otha = b_oth->a; 5264 } else { 5265 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5266 } 5267 5268 /* a-array */ 5269 /*---------*/ 5270 /* post receives of a-array */ 5271 for (i=0; i<nrecvs; i++){ 5272 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5273 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5274 } 5275 5276 /* pack the outgoing message a-array */ 5277 k = 0; 5278 for (i=0; i<nsends; i++){ 5279 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5280 bufA = bufa+sstartsj[i]; 5281 for (j=0; j<nrows; j++) { 5282 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5283 for (ll=0; ll<sbs; ll++){ 5284 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5285 for (l=0; l<ncols; l++){ 5286 *bufA++ = vals[l]; 5287 } 5288 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5289 } 5290 } 5291 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5292 } 5293 /* recvs and sends of a-array are completed */ 5294 i = nrecvs; 5295 while (i--) { 5296 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5297 } 5298 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5299 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5300 5301 if (scall == MAT_INITIAL_MATRIX){ 5302 /* put together the new matrix */ 5303 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5304 5305 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5306 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5307 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5308 b_oth->free_a = PETSC_TRUE; 5309 b_oth->free_ij = PETSC_TRUE; 5310 b_oth->nonew = 0; 5311 5312 ierr = PetscFree(bufj);CHKERRQ(ierr); 5313 if (!startsj_s || !bufa_ptr){ 5314 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5315 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5316 } else { 5317 *startsj_s = sstartsj; 5318 *startsj_r = rstartsj; 5319 *bufa_ptr = bufa; 5320 } 5321 } 5322 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5323 PetscFunctionReturn(0); 5324 } 5325 5326 #undef __FUNCT__ 5327 #define __FUNCT__ "MatGetCommunicationStructs" 5328 /*@C 5329 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5330 5331 Not Collective 5332 5333 Input Parameters: 5334 . A - The matrix in mpiaij format 5335 5336 Output Parameter: 5337 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5338 . colmap - A map from global column index to local index into lvec 5339 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5340 5341 Level: developer 5342 5343 @*/ 5344 #if defined (PETSC_USE_CTABLE) 5345 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5346 #else 5347 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5348 #endif 5349 { 5350 Mat_MPIAIJ *a; 5351 5352 PetscFunctionBegin; 5353 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5354 PetscValidPointer(lvec, 2); 5355 PetscValidPointer(colmap, 3); 5356 PetscValidPointer(multScatter, 4); 5357 a = (Mat_MPIAIJ *) A->data; 5358 if (lvec) *lvec = a->lvec; 5359 if (colmap) *colmap = a->colmap; 5360 if (multScatter) *multScatter = a->Mvctx; 5361 PetscFunctionReturn(0); 5362 } 5363 5364 EXTERN_C_BEGIN 5365 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5366 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5367 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5368 EXTERN_C_END 5369 5370 #undef __FUNCT__ 5371 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5372 /* 5373 Computes (B'*A')' since computing B*A directly is untenable 5374 5375 n p p 5376 ( ) ( ) ( ) 5377 m ( A ) * n ( B ) = m ( C ) 5378 ( ) ( ) ( ) 5379 5380 */ 5381 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5382 { 5383 PetscErrorCode ierr; 5384 Mat At,Bt,Ct; 5385 5386 PetscFunctionBegin; 5387 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5388 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5389 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5390 ierr = MatDestroy(&At);CHKERRQ(ierr); 5391 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5392 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5393 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5394 PetscFunctionReturn(0); 5395 } 5396 5397 #undef __FUNCT__ 5398 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5399 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5400 { 5401 PetscErrorCode ierr; 5402 PetscInt m=A->rmap->n,n=B->cmap->n; 5403 Mat Cmat; 5404 5405 PetscFunctionBegin; 5406 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5407 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5408 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5409 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5410 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5411 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5412 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5413 *C = Cmat; 5414 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5415 PetscFunctionReturn(0); 5416 } 5417 5418 /* ----------------------------------------------------------------*/ 5419 #undef __FUNCT__ 5420 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5421 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5422 { 5423 PetscErrorCode ierr; 5424 5425 PetscFunctionBegin; 5426 if (scall == MAT_INITIAL_MATRIX){ 5427 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5428 } 5429 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5430 PetscFunctionReturn(0); 5431 } 5432 5433 EXTERN_C_BEGIN 5434 #if defined(PETSC_HAVE_MUMPS) 5435 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5436 #endif 5437 #if defined(PETSC_HAVE_PASTIX) 5438 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5439 #endif 5440 #if defined(PETSC_HAVE_SUPERLU_DIST) 5441 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5442 #endif 5443 #if defined(PETSC_HAVE_SPOOLES) 5444 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5445 #endif 5446 EXTERN_C_END 5447 5448 /*MC 5449 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5450 5451 Options Database Keys: 5452 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5453 5454 Level: beginner 5455 5456 .seealso: MatCreateMPIAIJ() 5457 M*/ 5458 5459 EXTERN_C_BEGIN 5460 #undef __FUNCT__ 5461 #define __FUNCT__ "MatCreate_MPIAIJ" 5462 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5463 { 5464 Mat_MPIAIJ *b; 5465 PetscErrorCode ierr; 5466 PetscMPIInt size; 5467 5468 PetscFunctionBegin; 5469 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5470 5471 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5472 B->data = (void*)b; 5473 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5474 B->rmap->bs = 1; 5475 B->assembled = PETSC_FALSE; 5476 5477 B->insertmode = NOT_SET_VALUES; 5478 b->size = size; 5479 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5480 5481 /* build cache for off array entries formed */ 5482 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5483 b->donotstash = PETSC_FALSE; 5484 b->colmap = 0; 5485 b->garray = 0; 5486 b->roworiented = PETSC_TRUE; 5487 5488 /* stuff used for matrix vector multiply */ 5489 b->lvec = PETSC_NULL; 5490 b->Mvctx = PETSC_NULL; 5491 5492 /* stuff for MatGetRow() */ 5493 b->rowindices = 0; 5494 b->rowvalues = 0; 5495 b->getrowactive = PETSC_FALSE; 5496 5497 #if defined(PETSC_HAVE_SPOOLES) 5498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5499 "MatGetFactor_mpiaij_spooles", 5500 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5501 #endif 5502 #if defined(PETSC_HAVE_MUMPS) 5503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5504 "MatGetFactor_aij_mumps", 5505 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5506 #endif 5507 #if defined(PETSC_HAVE_PASTIX) 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5509 "MatGetFactor_mpiaij_pastix", 5510 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5511 #endif 5512 #if defined(PETSC_HAVE_SUPERLU_DIST) 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5514 "MatGetFactor_mpiaij_superlu_dist", 5515 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5516 #endif 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5518 "MatStoreValues_MPIAIJ", 5519 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5521 "MatRetrieveValues_MPIAIJ", 5522 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5524 "MatGetDiagonalBlock_MPIAIJ", 5525 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5527 "MatIsTranspose_MPIAIJ", 5528 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5530 "MatMPIAIJSetPreallocation_MPIAIJ", 5531 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5533 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5534 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5536 "MatDiagonalScaleLocal_MPIAIJ", 5537 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5539 "MatConvert_MPIAIJ_MPIAIJPERM", 5540 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5542 "MatConvert_MPIAIJ_MPIAIJCRL", 5543 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5545 "MatConvert_MPIAIJ_MPISBAIJ", 5546 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5547 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5548 "MatMatMult_MPIDense_MPIAIJ", 5549 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5550 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5551 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5552 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5554 "MatMatMultNumeric_MPIDense_MPIAIJ", 5555 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5556 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5557 PetscFunctionReturn(0); 5558 } 5559 EXTERN_C_END 5560 5561 #undef __FUNCT__ 5562 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5563 /*@ 5564 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5565 and "off-diagonal" part of the matrix in CSR format. 5566 5567 Collective on MPI_Comm 5568 5569 Input Parameters: 5570 + comm - MPI communicator 5571 . m - number of local rows (Cannot be PETSC_DECIDE) 5572 . n - This value should be the same as the local size used in creating the 5573 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5574 calculated if N is given) For square matrices n is almost always m. 5575 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5576 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5577 . i - row indices for "diagonal" portion of matrix 5578 . j - column indices 5579 . a - matrix values 5580 . oi - row indices for "off-diagonal" portion of matrix 5581 . oj - column indices 5582 - oa - matrix values 5583 5584 Output Parameter: 5585 . mat - the matrix 5586 5587 Level: advanced 5588 5589 Notes: 5590 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5591 must free the arrays once the matrix has been destroyed and not before. 5592 5593 The i and j indices are 0 based 5594 5595 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5596 5597 This sets local rows and cannot be used to set off-processor values. 5598 5599 You cannot later use MatSetValues() to change values in this matrix. 5600 5601 .keywords: matrix, aij, compressed row, sparse, parallel 5602 5603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5604 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5605 @*/ 5606 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5607 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5608 { 5609 PetscErrorCode ierr; 5610 Mat_MPIAIJ *maij; 5611 5612 PetscFunctionBegin; 5613 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5614 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5615 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5616 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5617 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5618 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5619 maij = (Mat_MPIAIJ*) (*mat)->data; 5620 maij->donotstash = PETSC_TRUE; 5621 (*mat)->preallocated = PETSC_TRUE; 5622 5623 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5624 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5625 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5626 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5627 5628 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5629 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5630 5631 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5634 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5635 5636 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5637 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5638 PetscFunctionReturn(0); 5639 } 5640 5641 /* 5642 Special version for direct calls from Fortran 5643 */ 5644 #include <private/fortranimpl.h> 5645 5646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5647 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5649 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5650 #endif 5651 5652 /* Change these macros so can be used in void function */ 5653 #undef CHKERRQ 5654 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5655 #undef SETERRQ2 5656 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5657 #undef SETERRQ3 5658 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5659 #undef SETERRQ 5660 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5661 5662 EXTERN_C_BEGIN 5663 #undef __FUNCT__ 5664 #define __FUNCT__ "matsetvaluesmpiaij_" 5665 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5666 { 5667 Mat mat = *mmat; 5668 PetscInt m = *mm, n = *mn; 5669 InsertMode addv = *maddv; 5670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5671 PetscScalar value; 5672 PetscErrorCode ierr; 5673 5674 MatCheckPreallocated(mat,1); 5675 if (mat->insertmode == NOT_SET_VALUES) { 5676 mat->insertmode = addv; 5677 } 5678 #if defined(PETSC_USE_DEBUG) 5679 else if (mat->insertmode != addv) { 5680 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5681 } 5682 #endif 5683 { 5684 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5685 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5686 PetscBool roworiented = aij->roworiented; 5687 5688 /* Some Variables required in the macro */ 5689 Mat A = aij->A; 5690 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5691 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5692 MatScalar *aa = a->a; 5693 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5694 Mat B = aij->B; 5695 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5696 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5697 MatScalar *ba = b->a; 5698 5699 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5700 PetscInt nonew = a->nonew; 5701 MatScalar *ap1,*ap2; 5702 5703 PetscFunctionBegin; 5704 for (i=0; i<m; i++) { 5705 if (im[i] < 0) continue; 5706 #if defined(PETSC_USE_DEBUG) 5707 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5708 #endif 5709 if (im[i] >= rstart && im[i] < rend) { 5710 row = im[i] - rstart; 5711 lastcol1 = -1; 5712 rp1 = aj + ai[row]; 5713 ap1 = aa + ai[row]; 5714 rmax1 = aimax[row]; 5715 nrow1 = ailen[row]; 5716 low1 = 0; 5717 high1 = nrow1; 5718 lastcol2 = -1; 5719 rp2 = bj + bi[row]; 5720 ap2 = ba + bi[row]; 5721 rmax2 = bimax[row]; 5722 nrow2 = bilen[row]; 5723 low2 = 0; 5724 high2 = nrow2; 5725 5726 for (j=0; j<n; j++) { 5727 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5728 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5729 if (in[j] >= cstart && in[j] < cend){ 5730 col = in[j] - cstart; 5731 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5732 } else if (in[j] < 0) continue; 5733 #if defined(PETSC_USE_DEBUG) 5734 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5735 #endif 5736 else { 5737 if (mat->was_assembled) { 5738 if (!aij->colmap) { 5739 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5740 } 5741 #if defined (PETSC_USE_CTABLE) 5742 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5743 col--; 5744 #else 5745 col = aij->colmap[in[j]] - 1; 5746 #endif 5747 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5748 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5749 col = in[j]; 5750 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5751 B = aij->B; 5752 b = (Mat_SeqAIJ*)B->data; 5753 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5754 rp2 = bj + bi[row]; 5755 ap2 = ba + bi[row]; 5756 rmax2 = bimax[row]; 5757 nrow2 = bilen[row]; 5758 low2 = 0; 5759 high2 = nrow2; 5760 bm = aij->B->rmap->n; 5761 ba = b->a; 5762 } 5763 } else col = in[j]; 5764 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5765 } 5766 } 5767 } else { 5768 if (!aij->donotstash) { 5769 if (roworiented) { 5770 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5771 } else { 5772 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5773 } 5774 } 5775 } 5776 }} 5777 PetscFunctionReturnVoid(); 5778 } 5779 EXTERN_C_END 5780 5781