1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 mat->assembled = PETSC_FALSE; 545 if (roworiented) { 546 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 547 } else { 548 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 549 } 550 } 551 } 552 } 553 PetscFunctionReturn(0); 554 } 555 556 #undef __FUNCT__ 557 #define __FUNCT__ "MatGetValues_MPIAIJ" 558 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 559 { 560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 561 PetscErrorCode ierr; 562 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 563 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 564 565 PetscFunctionBegin; 566 for (i=0; i<m; i++) { 567 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 568 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 569 if (idxm[i] >= rstart && idxm[i] < rend) { 570 row = idxm[i] - rstart; 571 for (j=0; j<n; j++) { 572 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 573 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 574 if (idxn[j] >= cstart && idxn[j] < cend){ 575 col = idxn[j] - cstart; 576 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 577 } else { 578 if (!aij->colmap) { 579 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 580 } 581 #if defined (PETSC_USE_CTABLE) 582 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 583 col --; 584 #else 585 col = aij->colmap[idxn[j]] - 1; 586 #endif 587 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 588 else { 589 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 590 } 591 } 592 } 593 } else { 594 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 595 } 596 } 597 PetscFunctionReturn(0); 598 } 599 600 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 601 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 604 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 605 { 606 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 607 PetscErrorCode ierr; 608 PetscInt nstash,reallocs; 609 InsertMode addv; 610 611 PetscFunctionBegin; 612 if (aij->donotstash || mat->nooffprocentries) { 613 PetscFunctionReturn(0); 614 } 615 616 /* make sure all processors are either in INSERTMODE or ADDMODE */ 617 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 618 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 619 mat->insertmode = addv; /* in case this processor had no cache */ 620 621 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 622 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 623 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 629 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 633 PetscErrorCode ierr; 634 PetscMPIInt n; 635 PetscInt i,j,rstart,ncols,flg; 636 PetscInt *row,*col; 637 PetscBool other_disassembled; 638 PetscScalar *val; 639 InsertMode addv = mat->insertmode; 640 641 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 642 PetscFunctionBegin; 643 if (!aij->donotstash && !mat->nooffprocentries) { 644 while (1) { 645 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 646 if (!flg) break; 647 648 for (i=0; i<n;) { 649 /* Now identify the consecutive vals belonging to the same row */ 650 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 651 if (j < n) ncols = j-i; 652 else ncols = n-i; 653 /* Now assemble all these values with a single function call */ 654 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 655 i = j; 656 } 657 } 658 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 659 } 660 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 661 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 662 663 /* determine if any processor has disassembled, if so we must 664 also disassemble ourselfs, in order that we may reassemble. */ 665 /* 666 if nonzero structure of submatrix B cannot change then we know that 667 no processor disassembled thus we can skip this stuff 668 */ 669 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 670 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 671 if (mat->was_assembled && !other_disassembled) { 672 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 673 } 674 } 675 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 676 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 680 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 681 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 682 683 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 684 aij->rowvalues = 0; 685 686 /* used by MatAXPY() */ 687 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 688 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 689 690 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 691 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 697 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 698 { 699 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 704 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatZeroRows_MPIAIJ" 710 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 711 { 712 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 715 PetscInt i,*owners = A->rmap->range; 716 PetscInt *nprocs,j,idx,nsends,row; 717 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 718 PetscInt *rvalues,count,base,slen,*source; 719 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 720 MPI_Comm comm = ((PetscObject)A)->comm; 721 MPI_Request *send_waits,*recv_waits; 722 MPI_Status recv_status,*send_status; 723 const PetscScalar *xx; 724 PetscScalar *bb; 725 #if defined(PETSC_DEBUG) 726 PetscBool found = PETSC_FALSE; 727 #endif 728 729 PetscFunctionBegin; 730 /* first count number of contributors to each processor */ 731 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 732 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 733 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 734 j = 0; 735 for (i=0; i<N; i++) { 736 if (lastidx > (idx = rows[i])) j = 0; 737 lastidx = idx; 738 for (; j<size; j++) { 739 if (idx >= owners[j] && idx < owners[j+1]) { 740 nprocs[2*j]++; 741 nprocs[2*j+1] = 1; 742 owner[i] = j; 743 #if defined(PETSC_DEBUG) 744 found = PETSC_TRUE; 745 #endif 746 break; 747 } 748 } 749 #if defined(PETSC_DEBUG) 750 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 751 found = PETSC_FALSE; 752 #endif 753 } 754 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 755 756 if (A->nooffproczerorows) { 757 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 758 nrecvs = nsends; 759 nmax = N; 760 } else { 761 /* inform other processors of number of messages and max length*/ 762 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 763 } 764 765 /* post receives: */ 766 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 767 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 768 for (i=0; i<nrecvs; i++) { 769 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 770 } 771 772 /* do sends: 773 1) starts[i] gives the starting index in svalues for stuff going to 774 the ith processor 775 */ 776 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 777 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 778 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 779 starts[0] = 0; 780 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 781 for (i=0; i<N; i++) { 782 svalues[starts[owner[i]]++] = rows[i]; 783 } 784 785 starts[0] = 0; 786 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 787 count = 0; 788 for (i=0; i<size; i++) { 789 if (nprocs[2*i+1]) { 790 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 791 } 792 } 793 ierr = PetscFree(starts);CHKERRQ(ierr); 794 795 base = owners[rank]; 796 797 /* wait on receives */ 798 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 799 count = nrecvs; slen = 0; 800 while (count) { 801 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 802 /* unpack receives into our local space */ 803 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 804 source[imdex] = recv_status.MPI_SOURCE; 805 lens[imdex] = n; 806 slen += n; 807 count--; 808 } 809 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 810 811 /* move the data into the send scatter */ 812 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 813 count = 0; 814 for (i=0; i<nrecvs; i++) { 815 values = rvalues + i*nmax; 816 for (j=0; j<lens[i]; j++) { 817 lrows[count++] = values[j] - base; 818 } 819 } 820 ierr = PetscFree(rvalues);CHKERRQ(ierr); 821 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 822 ierr = PetscFree(owner);CHKERRQ(ierr); 823 ierr = PetscFree(nprocs);CHKERRQ(ierr); 824 825 /* fix right hand side if needed */ 826 if (x && b) { 827 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 828 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 829 for (i=0; i<slen; i++) { 830 bb[lrows[i]] = diag*xx[lrows[i]]; 831 } 832 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 833 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 834 } 835 /* 836 Zero the required rows. If the "diagonal block" of the matrix 837 is square and the user wishes to set the diagonal we use separate 838 code so that MatSetValues() is not called for each diagonal allocating 839 new memory, thus calling lots of mallocs and slowing things down. 840 841 */ 842 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 843 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 844 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 845 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 846 } else if (diag != 0.0) { 847 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 849 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 850 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 851 } 852 for (i = 0; i < slen; i++) { 853 row = lrows[i] + rstart; 854 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 855 } 856 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 858 } else { 859 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(lrows);CHKERRQ(ierr); 862 863 /* wait on sends */ 864 if (nsends) { 865 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 866 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 867 ierr = PetscFree(send_status);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(send_waits);CHKERRQ(ierr); 870 ierr = PetscFree(svalues);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 876 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 877 { 878 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 879 PetscErrorCode ierr; 880 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 881 PetscInt i,*owners = A->rmap->range; 882 PetscInt *nprocs,j,idx,nsends; 883 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 884 PetscInt *rvalues,count,base,slen,*source; 885 PetscInt *lens,*lrows,*values,m; 886 MPI_Comm comm = ((PetscObject)A)->comm; 887 MPI_Request *send_waits,*recv_waits; 888 MPI_Status recv_status,*send_status; 889 const PetscScalar *xx; 890 PetscScalar *bb,*mask; 891 Vec xmask,lmask; 892 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 893 const PetscInt *aj, *ii,*ridx; 894 PetscScalar *aa; 895 #if defined(PETSC_DEBUG) 896 PetscBool found = PETSC_FALSE; 897 #endif 898 899 PetscFunctionBegin; 900 /* first count number of contributors to each processor */ 901 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 902 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 903 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 904 j = 0; 905 for (i=0; i<N; i++) { 906 if (lastidx > (idx = rows[i])) j = 0; 907 lastidx = idx; 908 for (; j<size; j++) { 909 if (idx >= owners[j] && idx < owners[j+1]) { 910 nprocs[2*j]++; 911 nprocs[2*j+1] = 1; 912 owner[i] = j; 913 #if defined(PETSC_DEBUG) 914 found = PETSC_TRUE; 915 #endif 916 break; 917 } 918 } 919 #if defined(PETSC_DEBUG) 920 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 921 found = PETSC_FALSE; 922 #endif 923 } 924 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 925 926 /* inform other processors of number of messages and max length*/ 927 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 928 929 /* post receives: */ 930 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 931 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 932 for (i=0; i<nrecvs; i++) { 933 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 934 } 935 936 /* do sends: 937 1) starts[i] gives the starting index in svalues for stuff going to 938 the ith processor 939 */ 940 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 941 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 942 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 943 starts[0] = 0; 944 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 945 for (i=0; i<N; i++) { 946 svalues[starts[owner[i]]++] = rows[i]; 947 } 948 949 starts[0] = 0; 950 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 951 count = 0; 952 for (i=0; i<size; i++) { 953 if (nprocs[2*i+1]) { 954 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 955 } 956 } 957 ierr = PetscFree(starts);CHKERRQ(ierr); 958 959 base = owners[rank]; 960 961 /* wait on receives */ 962 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 963 count = nrecvs; slen = 0; 964 while (count) { 965 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 966 /* unpack receives into our local space */ 967 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 968 source[imdex] = recv_status.MPI_SOURCE; 969 lens[imdex] = n; 970 slen += n; 971 count--; 972 } 973 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 974 975 /* move the data into the send scatter */ 976 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 977 count = 0; 978 for (i=0; i<nrecvs; i++) { 979 values = rvalues + i*nmax; 980 for (j=0; j<lens[i]; j++) { 981 lrows[count++] = values[j] - base; 982 } 983 } 984 ierr = PetscFree(rvalues);CHKERRQ(ierr); 985 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 986 ierr = PetscFree(owner);CHKERRQ(ierr); 987 ierr = PetscFree(nprocs);CHKERRQ(ierr); 988 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 989 990 /* zero diagonal part of matrix */ 991 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 992 993 /* handle off diagonal part of matrix */ 994 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 995 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 996 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 997 for (i=0; i<slen; i++) { 998 bb[lrows[i]] = 1; 999 } 1000 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1001 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1004 if (x) { 1005 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1008 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1009 } 1010 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1011 1012 /* remove zeroed rows of off diagonal matrix */ 1013 ii = aij->i; 1014 for (i=0; i<slen; i++) { 1015 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1016 } 1017 1018 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1019 if (aij->compressedrow.use){ 1020 m = aij->compressedrow.nrows; 1021 ii = aij->compressedrow.i; 1022 ridx = aij->compressedrow.rindex; 1023 for (i=0; i<m; i++){ 1024 n = ii[i+1] - ii[i]; 1025 aj = aij->j + ii[i]; 1026 aa = aij->a + ii[i]; 1027 1028 for (j=0; j<n; j++) { 1029 if (PetscAbsScalar(mask[*aj])) { 1030 if (b) bb[*ridx] -= *aa*xx[*aj]; 1031 *aa = 0.0; 1032 } 1033 aa++; 1034 aj++; 1035 } 1036 ridx++; 1037 } 1038 } else { /* do not use compressed row format */ 1039 m = l->B->rmap->n; 1040 for (i=0; i<m; i++) { 1041 n = ii[i+1] - ii[i]; 1042 aj = aij->j + ii[i]; 1043 aa = aij->a + ii[i]; 1044 for (j=0; j<n; j++) { 1045 if (PetscAbsScalar(mask[*aj])) { 1046 if (b) bb[i] -= *aa*xx[*aj]; 1047 *aa = 0.0; 1048 } 1049 aa++; 1050 aj++; 1051 } 1052 } 1053 } 1054 if (x) { 1055 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1056 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1057 } 1058 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1059 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1060 ierr = PetscFree(lrows);CHKERRQ(ierr); 1061 1062 /* wait on sends */ 1063 if (nsends) { 1064 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1065 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1066 ierr = PetscFree(send_status);CHKERRQ(ierr); 1067 } 1068 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1069 ierr = PetscFree(svalues);CHKERRQ(ierr); 1070 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatMult_MPIAIJ" 1076 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1079 PetscErrorCode ierr; 1080 PetscInt nt; 1081 1082 PetscFunctionBegin; 1083 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1084 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1085 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1086 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1094 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1095 { 1096 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1106 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1107 { 1108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1121 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 PetscBool merged; 1126 1127 PetscFunctionBegin; 1128 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1129 /* do nondiagonal part */ 1130 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1131 if (!merged) { 1132 /* send it on its way */ 1133 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 /* do local part */ 1135 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1136 /* receive remote parts: note this assumes the values are not actually */ 1137 /* added in yy until the next line, */ 1138 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 } else { 1140 /* do local part */ 1141 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1142 /* send it on its way */ 1143 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 /* values actually were received in the Begin() but we need to call this nop */ 1145 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1146 } 1147 PetscFunctionReturn(0); 1148 } 1149 1150 EXTERN_C_BEGIN 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1153 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1154 { 1155 MPI_Comm comm; 1156 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1157 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1158 IS Me,Notme; 1159 PetscErrorCode ierr; 1160 PetscInt M,N,first,last,*notme,i; 1161 PetscMPIInt size; 1162 1163 PetscFunctionBegin; 1164 1165 /* Easy test: symmetric diagonal block */ 1166 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1167 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1168 if (!*f) PetscFunctionReturn(0); 1169 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1170 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1171 if (size == 1) PetscFunctionReturn(0); 1172 1173 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1174 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1175 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1176 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1177 for (i=0; i<first; i++) notme[i] = i; 1178 for (i=last; i<M; i++) notme[i-last+first] = i; 1179 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1180 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1181 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1182 Aoff = Aoffs[0]; 1183 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1184 Boff = Boffs[0]; 1185 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1187 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1189 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1190 ierr = PetscFree(notme);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 EXTERN_C_END 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1205 /* send it on its way */ 1206 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1207 /* do local part */ 1208 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1209 /* receive remote parts */ 1210 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 /* 1215 This only works correctly for square matrices where the subblock A->A is the 1216 diagonal block 1217 */ 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1220 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1221 { 1222 PetscErrorCode ierr; 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1224 1225 PetscFunctionBegin; 1226 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1227 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1228 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatScale_MPIAIJ" 1234 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1235 { 1236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1241 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatDestroy_MPIAIJ" 1247 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1248 { 1249 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 #if defined(PETSC_USE_LOG) 1254 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1255 #endif 1256 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1257 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1259 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1260 #if defined (PETSC_USE_CTABLE) 1261 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1262 #else 1263 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1264 #endif 1265 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1266 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1267 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1268 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1269 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1270 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1271 1272 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1286 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1287 { 1288 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1289 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1290 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1291 PetscErrorCode ierr; 1292 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1293 int fd; 1294 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1295 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1296 PetscScalar *column_values; 1297 PetscInt message_count,flowcontrolcount; 1298 1299 PetscFunctionBegin; 1300 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1301 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1302 nz = A->nz + B->nz; 1303 if (!rank) { 1304 header[0] = MAT_FILE_CLASSID; 1305 header[1] = mat->rmap->N; 1306 header[2] = mat->cmap->N; 1307 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1308 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1309 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1310 /* get largest number of rows any processor has */ 1311 rlen = mat->rmap->n; 1312 range = mat->rmap->range; 1313 for (i=1; i<size; i++) { 1314 rlen = PetscMax(rlen,range[i+1] - range[i]); 1315 } 1316 } else { 1317 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1318 rlen = mat->rmap->n; 1319 } 1320 1321 /* load up the local row counts */ 1322 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1323 for (i=0; i<mat->rmap->n; i++) { 1324 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1325 } 1326 1327 /* store the row lengths to the file */ 1328 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1329 if (!rank) { 1330 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1331 for (i=1; i<size; i++) { 1332 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1333 rlen = range[i+1] - range[i]; 1334 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 } 1337 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1340 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1341 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1342 } 1343 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1344 1345 /* load up the local column indices */ 1346 nzmax = nz; /* )th processor needs space a largest processor needs */ 1347 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1348 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1349 cnt = 0; 1350 for (i=0; i<mat->rmap->n; i++) { 1351 for (j=B->i[i]; j<B->i[i+1]; j++) { 1352 if ( (col = garray[B->j[j]]) > cstart) break; 1353 column_indices[cnt++] = col; 1354 } 1355 for (k=A->i[i]; k<A->i[i+1]; k++) { 1356 column_indices[cnt++] = A->j[k] + cstart; 1357 } 1358 for (; j<B->i[i+1]; j++) { 1359 column_indices[cnt++] = garray[B->j[j]]; 1360 } 1361 } 1362 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1363 1364 /* store the column indices to the file */ 1365 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1366 if (!rank) { 1367 MPI_Status status; 1368 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1369 for (i=1; i<size; i++) { 1370 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1371 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1372 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1373 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1374 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1375 } 1376 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1377 } else { 1378 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1379 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1381 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1384 1385 /* load up the local column values */ 1386 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1387 cnt = 0; 1388 for (i=0; i<mat->rmap->n; i++) { 1389 for (j=B->i[i]; j<B->i[i+1]; j++) { 1390 if ( garray[B->j[j]] > cstart) break; 1391 column_values[cnt++] = B->a[j]; 1392 } 1393 for (k=A->i[i]; k<A->i[i+1]; k++) { 1394 column_values[cnt++] = A->a[k]; 1395 } 1396 for (; j<B->i[i+1]; j++) { 1397 column_values[cnt++] = B->a[j]; 1398 } 1399 } 1400 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1401 1402 /* store the column values to the file */ 1403 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1404 if (!rank) { 1405 MPI_Status status; 1406 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1407 for (i=1; i<size; i++) { 1408 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1409 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1410 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1411 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1412 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1413 } 1414 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1415 } else { 1416 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1417 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1419 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1420 } 1421 ierr = PetscFree(column_values);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1427 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1428 { 1429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1430 PetscErrorCode ierr; 1431 PetscMPIInt rank = aij->rank,size = aij->size; 1432 PetscBool isdraw,iascii,isbinary; 1433 PetscViewer sviewer; 1434 PetscViewerFormat format; 1435 1436 PetscFunctionBegin; 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1439 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1440 if (iascii) { 1441 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1442 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1443 MatInfo info; 1444 PetscBool inodes; 1445 1446 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1447 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1448 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1449 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1450 if (!inodes) { 1451 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1452 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1453 } else { 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1455 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1456 } 1457 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1459 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1461 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1464 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1467 PetscInt inodecount,inodelimit,*inodes; 1468 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1469 if (inodes) { 1470 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1471 } else { 1472 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1473 } 1474 PetscFunctionReturn(0); 1475 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1476 PetscFunctionReturn(0); 1477 } 1478 } else if (isbinary) { 1479 if (size == 1) { 1480 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1481 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1482 } else { 1483 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (isdraw) { 1487 PetscDraw draw; 1488 PetscBool isnull; 1489 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1490 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1491 } 1492 1493 if (size == 1) { 1494 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1495 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1496 } else { 1497 /* assemble the entire matrix onto first processor. */ 1498 Mat A; 1499 Mat_SeqAIJ *Aloc; 1500 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1501 MatScalar *a; 1502 1503 if (mat->rmap->N > 1024) { 1504 PetscBool flg = PETSC_FALSE; 1505 1506 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1507 if (!flg) { 1508 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1509 } 1510 } 1511 1512 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1513 if (!rank) { 1514 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1517 } 1518 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1519 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1520 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1521 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1522 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1523 1524 /* copy over the A part */ 1525 Aloc = (Mat_SeqAIJ*)aij->A->data; 1526 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1527 row = mat->rmap->rstart; 1528 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1529 for (i=0; i<m; i++) { 1530 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1531 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1532 } 1533 aj = Aloc->j; 1534 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1535 1536 /* copy over the B part */ 1537 Aloc = (Mat_SeqAIJ*)aij->B->data; 1538 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1539 row = mat->rmap->rstart; 1540 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1541 ct = cols; 1542 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1543 for (i=0; i<m; i++) { 1544 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1545 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1546 } 1547 ierr = PetscFree(ct);CHKERRQ(ierr); 1548 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1549 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1550 /* 1551 Everyone has to call to draw the matrix since the graphics waits are 1552 synchronized across all processors that share the PetscDraw object 1553 */ 1554 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1555 if (!rank) { 1556 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1557 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1558 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1559 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1560 } 1561 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1562 ierr = MatDestroy(&A);CHKERRQ(ierr); 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatView_MPIAIJ" 1569 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1570 { 1571 PetscErrorCode ierr; 1572 PetscBool iascii,isdraw,issocket,isbinary; 1573 1574 PetscFunctionBegin; 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1577 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1578 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1579 if (iascii || isdraw || isbinary || issocket) { 1580 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1581 } else { 1582 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSOR_MPIAIJ" 1589 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1590 { 1591 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1592 PetscErrorCode ierr; 1593 Vec bb1 = 0; 1594 PetscBool hasop; 1595 1596 PetscFunctionBegin; 1597 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1598 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1599 } 1600 1601 if (flag == SOR_APPLY_UPPER) { 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1607 if (flag & SOR_ZERO_INITIAL_GUESS) { 1608 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1609 its--; 1610 } 1611 1612 while (its--) { 1613 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1615 1616 /* update rhs: bb1 = bb - B*x */ 1617 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1618 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1619 1620 /* local sweep */ 1621 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1622 } 1623 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1624 if (flag & SOR_ZERO_INITIAL_GUESS) { 1625 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1626 its--; 1627 } 1628 while (its--) { 1629 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1631 1632 /* update rhs: bb1 = bb - B*x */ 1633 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1634 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1635 1636 /* local sweep */ 1637 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1638 } 1639 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 while (its--) { 1645 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 1648 /* update rhs: bb1 = bb - B*x */ 1649 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1650 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1651 1652 /* local sweep */ 1653 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1654 } 1655 } else if (flag & SOR_EISENSTAT) { 1656 Vec xx1; 1657 1658 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1659 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1660 1661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 if (!mat->diag) { 1664 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1665 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1666 } 1667 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1668 if (hasop) { 1669 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1670 } else { 1671 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1672 } 1673 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1674 1675 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1676 1677 /* local sweep */ 1678 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1679 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1680 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1681 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1682 1683 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatPermute_MPIAIJ" 1689 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1690 { 1691 MPI_Comm comm; 1692 PetscInt first,local_rowsize,local_colsize; 1693 const PetscInt *rows; 1694 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1699 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1700 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1701 /* collect the global row permutation and invert it */ 1702 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1703 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1704 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1705 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1706 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1707 /* get the local target indices */ 1708 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1709 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1710 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1711 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1712 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1713 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1714 /* the column permutation is so much easier; 1715 make a local version of 'colp' and invert it */ 1716 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1717 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1718 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1719 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1720 /* now we just get the submatrix */ 1721 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1722 /* clean up */ 1723 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1724 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1730 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1733 Mat A = mat->A,B = mat->B; 1734 PetscErrorCode ierr; 1735 PetscReal isend[5],irecv[5]; 1736 1737 PetscFunctionBegin; 1738 info->block_size = 1.0; 1739 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1740 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1741 isend[3] = info->memory; isend[4] = info->mallocs; 1742 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1743 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1744 isend[3] += info->memory; isend[4] += info->mallocs; 1745 if (flag == MAT_LOCAL) { 1746 info->nz_used = isend[0]; 1747 info->nz_allocated = isend[1]; 1748 info->nz_unneeded = isend[2]; 1749 info->memory = isend[3]; 1750 info->mallocs = isend[4]; 1751 } else if (flag == MAT_GLOBAL_MAX) { 1752 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1753 info->nz_used = irecv[0]; 1754 info->nz_allocated = irecv[1]; 1755 info->nz_unneeded = irecv[2]; 1756 info->memory = irecv[3]; 1757 info->mallocs = irecv[4]; 1758 } else if (flag == MAT_GLOBAL_SUM) { 1759 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1760 info->nz_used = irecv[0]; 1761 info->nz_allocated = irecv[1]; 1762 info->nz_unneeded = irecv[2]; 1763 info->memory = irecv[3]; 1764 info->mallocs = irecv[4]; 1765 } 1766 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1767 info->fill_ratio_needed = 0; 1768 info->factor_mallocs = 0; 1769 1770 PetscFunctionReturn(0); 1771 } 1772 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatSetOption_MPIAIJ" 1775 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1776 { 1777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1778 PetscErrorCode ierr; 1779 1780 PetscFunctionBegin; 1781 switch (op) { 1782 case MAT_NEW_NONZERO_LOCATIONS: 1783 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1784 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1785 case MAT_KEEP_NONZERO_PATTERN: 1786 case MAT_NEW_NONZERO_LOCATION_ERR: 1787 case MAT_USE_INODES: 1788 case MAT_IGNORE_ZERO_ENTRIES: 1789 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1790 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1791 break; 1792 case MAT_ROW_ORIENTED: 1793 a->roworiented = flg; 1794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1795 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1796 break; 1797 case MAT_NEW_DIAGONALS: 1798 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1799 break; 1800 case MAT_IGNORE_OFF_PROC_ENTRIES: 1801 a->donotstash = flg; 1802 break; 1803 case MAT_SPD: 1804 A->spd_set = PETSC_TRUE; 1805 A->spd = flg; 1806 if (flg) { 1807 A->symmetric = PETSC_TRUE; 1808 A->structurally_symmetric = PETSC_TRUE; 1809 A->symmetric_set = PETSC_TRUE; 1810 A->structurally_symmetric_set = PETSC_TRUE; 1811 } 1812 break; 1813 case MAT_SYMMETRIC: 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_STRUCTURALLY_SYMMETRIC: 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_HERMITIAN: 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 break; 1822 case MAT_SYMMETRY_ETERNAL: 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 break; 1825 default: 1826 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1827 } 1828 PetscFunctionReturn(0); 1829 } 1830 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatGetRow_MPIAIJ" 1833 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1834 { 1835 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1836 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1837 PetscErrorCode ierr; 1838 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1839 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1840 PetscInt *cmap,*idx_p; 1841 1842 PetscFunctionBegin; 1843 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1844 mat->getrowactive = PETSC_TRUE; 1845 1846 if (!mat->rowvalues && (idx || v)) { 1847 /* 1848 allocate enough space to hold information from the longest row. 1849 */ 1850 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1851 PetscInt max = 1,tmp; 1852 for (i=0; i<matin->rmap->n; i++) { 1853 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1854 if (max < tmp) { max = tmp; } 1855 } 1856 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1857 } 1858 1859 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1860 lrow = row - rstart; 1861 1862 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1863 if (!v) {pvA = 0; pvB = 0;} 1864 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1865 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1866 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1867 nztot = nzA + nzB; 1868 1869 cmap = mat->garray; 1870 if (v || idx) { 1871 if (nztot) { 1872 /* Sort by increasing column numbers, assuming A and B already sorted */ 1873 PetscInt imark = -1; 1874 if (v) { 1875 *v = v_p = mat->rowvalues; 1876 for (i=0; i<nzB; i++) { 1877 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1878 else break; 1879 } 1880 imark = i; 1881 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1882 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1883 } 1884 if (idx) { 1885 *idx = idx_p = mat->rowindices; 1886 if (imark > -1) { 1887 for (i=0; i<imark; i++) { 1888 idx_p[i] = cmap[cworkB[i]]; 1889 } 1890 } else { 1891 for (i=0; i<nzB; i++) { 1892 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1893 else break; 1894 } 1895 imark = i; 1896 } 1897 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1898 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 if (idx) *idx = 0; 1902 if (v) *v = 0; 1903 } 1904 } 1905 *nz = nztot; 1906 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1907 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1913 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1914 { 1915 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1916 1917 PetscFunctionBegin; 1918 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1919 aij->getrowactive = PETSC_FALSE; 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatNorm_MPIAIJ" 1925 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1926 { 1927 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1928 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1929 PetscErrorCode ierr; 1930 PetscInt i,j,cstart = mat->cmap->rstart; 1931 PetscReal sum = 0.0; 1932 MatScalar *v; 1933 1934 PetscFunctionBegin; 1935 if (aij->size == 1) { 1936 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1937 } else { 1938 if (type == NORM_FROBENIUS) { 1939 v = amat->a; 1940 for (i=0; i<amat->nz; i++) { 1941 #if defined(PETSC_USE_COMPLEX) 1942 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1943 #else 1944 sum += (*v)*(*v); v++; 1945 #endif 1946 } 1947 v = bmat->a; 1948 for (i=0; i<bmat->nz; i++) { 1949 #if defined(PETSC_USE_COMPLEX) 1950 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1951 #else 1952 sum += (*v)*(*v); v++; 1953 #endif 1954 } 1955 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1956 *norm = PetscSqrtReal(*norm); 1957 } else if (type == NORM_1) { /* max column norm */ 1958 PetscReal *tmp,*tmp2; 1959 PetscInt *jj,*garray = aij->garray; 1960 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1961 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1962 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1963 *norm = 0.0; 1964 v = amat->a; jj = amat->j; 1965 for (j=0; j<amat->nz; j++) { 1966 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1967 } 1968 v = bmat->a; jj = bmat->j; 1969 for (j=0; j<bmat->nz; j++) { 1970 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1971 } 1972 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1973 for (j=0; j<mat->cmap->N; j++) { 1974 if (tmp2[j] > *norm) *norm = tmp2[j]; 1975 } 1976 ierr = PetscFree(tmp);CHKERRQ(ierr); 1977 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1978 } else if (type == NORM_INFINITY) { /* max row norm */ 1979 PetscReal ntemp = 0.0; 1980 for (j=0; j<aij->A->rmap->n; j++) { 1981 v = amat->a + amat->i[j]; 1982 sum = 0.0; 1983 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1984 sum += PetscAbsScalar(*v); v++; 1985 } 1986 v = bmat->a + bmat->i[j]; 1987 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 if (sum > ntemp) ntemp = sum; 1991 } 1992 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1993 } else { 1994 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1995 } 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatTranspose_MPIAIJ" 2002 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2003 { 2004 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2005 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2006 PetscErrorCode ierr; 2007 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2008 PetscInt cstart=A->cmap->rstart,ncol; 2009 Mat B; 2010 MatScalar *array; 2011 2012 PetscFunctionBegin; 2013 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2014 2015 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2016 ai = Aloc->i; aj = Aloc->j; 2017 bi = Bloc->i; bj = Bloc->j; 2018 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2019 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2020 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2021 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2022 for (i=0; i<ai[ma]; i++){ 2023 d_nnz[aj[i]] ++; 2024 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2025 } 2026 2027 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2028 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2029 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2030 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2031 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2032 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2033 } else { 2034 B = *matout; 2035 } 2036 2037 /* copy over the A part */ 2038 array = Aloc->a; 2039 row = A->rmap->rstart; 2040 for (i=0; i<ma; i++) { 2041 ncol = ai[i+1]-ai[i]; 2042 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2043 row++; array += ncol; aj += ncol; 2044 } 2045 aj = Aloc->j; 2046 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2047 2048 /* copy over the B part */ 2049 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2050 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2051 array = Bloc->a; 2052 row = A->rmap->rstart; 2053 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2054 cols_tmp = cols; 2055 for (i=0; i<mb; i++) { 2056 ncol = bi[i+1]-bi[i]; 2057 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2058 row++; array += ncol; cols_tmp += ncol; 2059 } 2060 ierr = PetscFree(cols);CHKERRQ(ierr); 2061 2062 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2065 *matout = B; 2066 } else { 2067 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2068 } 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2074 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2075 { 2076 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2077 Mat a = aij->A,b = aij->B; 2078 PetscErrorCode ierr; 2079 PetscInt s1,s2,s3; 2080 2081 PetscFunctionBegin; 2082 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2083 if (rr) { 2084 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2085 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2086 /* Overlap communication with computation. */ 2087 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2088 } 2089 if (ll) { 2090 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2091 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2092 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2093 } 2094 /* scale the diagonal block */ 2095 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2096 2097 if (rr) { 2098 /* Do a scatter end and then right scale the off-diagonal block */ 2099 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2101 } 2102 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2108 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2109 { 2110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2111 PetscErrorCode ierr; 2112 2113 PetscFunctionBegin; 2114 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2115 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2116 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2117 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2122 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2123 { 2124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatEqual_MPIAIJ" 2134 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2135 { 2136 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2137 Mat a,b,c,d; 2138 PetscBool flg; 2139 PetscErrorCode ierr; 2140 2141 PetscFunctionBegin; 2142 a = matA->A; b = matA->B; 2143 c = matB->A; d = matB->B; 2144 2145 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2146 if (flg) { 2147 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2148 } 2149 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatCopy_MPIAIJ" 2155 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2156 { 2157 PetscErrorCode ierr; 2158 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2159 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2160 2161 PetscFunctionBegin; 2162 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2163 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2164 /* because of the column compression in the off-processor part of the matrix a->B, 2165 the number of columns in a->B and b->B may be different, hence we cannot call 2166 the MatCopy() directly on the two parts. If need be, we can provide a more 2167 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2168 then copying the submatrices */ 2169 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2170 } else { 2171 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2172 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2173 } 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatSetUp_MPIAIJ" 2179 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2180 { 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatAXPY_MPIAIJ" 2190 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2191 { 2192 PetscErrorCode ierr; 2193 PetscInt i; 2194 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2195 PetscBLASInt bnz,one=1; 2196 Mat_SeqAIJ *x,*y; 2197 2198 PetscFunctionBegin; 2199 if (str == SAME_NONZERO_PATTERN) { 2200 PetscScalar alpha = a; 2201 x = (Mat_SeqAIJ *)xx->A->data; 2202 y = (Mat_SeqAIJ *)yy->A->data; 2203 bnz = PetscBLASIntCast(x->nz); 2204 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2205 x = (Mat_SeqAIJ *)xx->B->data; 2206 y = (Mat_SeqAIJ *)yy->B->data; 2207 bnz = PetscBLASIntCast(x->nz); 2208 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2209 } else if (str == SUBSET_NONZERO_PATTERN) { 2210 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2211 2212 x = (Mat_SeqAIJ *)xx->B->data; 2213 y = (Mat_SeqAIJ *)yy->B->data; 2214 if (y->xtoy && y->XtoY != xx->B) { 2215 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2216 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2217 } 2218 if (!y->xtoy) { /* get xtoy */ 2219 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2220 y->XtoY = xx->B; 2221 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2222 } 2223 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2224 } else { 2225 Mat B; 2226 PetscInt *nnz_d,*nnz_o; 2227 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2228 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2229 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2230 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2231 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2232 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2233 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2234 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2235 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2236 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2237 ierr = MatHeaderReplace(Y,B); 2238 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2239 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2240 } 2241 PetscFunctionReturn(0); 2242 } 2243 2244 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatConjugate_MPIAIJ" 2248 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2249 { 2250 #if defined(PETSC_USE_COMPLEX) 2251 PetscErrorCode ierr; 2252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2253 2254 PetscFunctionBegin; 2255 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2256 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2257 #else 2258 PetscFunctionBegin; 2259 #endif 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatRealPart_MPIAIJ" 2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 2270 PetscFunctionBegin; 2271 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2272 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2279 { 2280 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2285 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #ifdef PETSC_HAVE_PBGL 2290 2291 #include <boost/parallel/mpi/bsp_process_group.hpp> 2292 #include <boost/graph/distributed/ilu_default_graph.hpp> 2293 #include <boost/graph/distributed/ilu_0_block.hpp> 2294 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2295 #include <boost/graph/distributed/petsc/interface.hpp> 2296 #include <boost/multi_array.hpp> 2297 #include <boost/parallel/distributed_property_map->hpp> 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2301 /* 2302 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2303 */ 2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2305 { 2306 namespace petsc = boost::distributed::petsc; 2307 2308 namespace graph_dist = boost::graph::distributed; 2309 using boost::graph::distributed::ilu_default::process_group_type; 2310 using boost::graph::ilu_permuted; 2311 2312 PetscBool row_identity, col_identity; 2313 PetscContainer c; 2314 PetscInt m, n, M, N; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2319 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2320 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2321 if (!row_identity || !col_identity) { 2322 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2323 } 2324 2325 process_group_type pg; 2326 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2327 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2328 lgraph_type& level_graph = *lgraph_p; 2329 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2330 2331 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2332 ilu_permuted(level_graph); 2333 2334 /* put together the new matrix */ 2335 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2336 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2337 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2338 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2339 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2340 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 2343 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2344 ierr = PetscContainerSetPointer(c, lgraph_p); 2345 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2346 ierr = PetscContainerDestroy(&c); 2347 PetscFunctionReturn(0); 2348 } 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2352 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2353 { 2354 PetscFunctionBegin; 2355 PetscFunctionReturn(0); 2356 } 2357 2358 #undef __FUNCT__ 2359 #define __FUNCT__ "MatSolve_MPIAIJ" 2360 /* 2361 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2362 */ 2363 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2364 { 2365 namespace graph_dist = boost::graph::distributed; 2366 2367 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2368 lgraph_type* lgraph_p; 2369 PetscContainer c; 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2374 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2375 ierr = VecCopy(b, x);CHKERRQ(ierr); 2376 2377 PetscScalar* array_x; 2378 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2379 PetscInt sx; 2380 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2381 2382 PetscScalar* array_b; 2383 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2384 PetscInt sb; 2385 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2386 2387 lgraph_type& level_graph = *lgraph_p; 2388 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2389 2390 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2391 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2392 ref_x(array_x, boost::extents[num_vertices(graph)]); 2393 2394 typedef boost::iterator_property_map<array_ref_type::iterator, 2395 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2396 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2397 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2398 2399 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2400 2401 PetscFunctionReturn(0); 2402 } 2403 #endif 2404 2405 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2406 PetscInt nzlocal,nsends,nrecvs; 2407 PetscMPIInt *send_rank,*recv_rank; 2408 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2409 PetscScalar *sbuf_a,**rbuf_a; 2410 PetscErrorCode (*Destroy)(Mat); 2411 } Mat_Redundant; 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2415 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2416 { 2417 PetscErrorCode ierr; 2418 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2419 PetscInt i; 2420 2421 PetscFunctionBegin; 2422 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2423 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2424 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2425 for (i=0; i<redund->nrecvs; i++){ 2426 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2427 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2428 } 2429 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2430 ierr = PetscFree(redund);CHKERRQ(ierr); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatDestroy_MatRedundant" 2436 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2437 { 2438 PetscErrorCode ierr; 2439 PetscContainer container; 2440 Mat_Redundant *redund=PETSC_NULL; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2444 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2445 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2446 A->ops->destroy = redund->Destroy; 2447 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2448 if (A->ops->destroy) { 2449 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2450 } 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2456 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2457 { 2458 PetscMPIInt rank,size; 2459 MPI_Comm comm=((PetscObject)mat)->comm; 2460 PetscErrorCode ierr; 2461 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2462 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2463 PetscInt *rowrange=mat->rmap->range; 2464 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2465 Mat A=aij->A,B=aij->B,C=*matredundant; 2466 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2467 PetscScalar *sbuf_a; 2468 PetscInt nzlocal=a->nz+b->nz; 2469 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2470 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2471 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2472 MatScalar *aworkA,*aworkB; 2473 PetscScalar *vals; 2474 PetscMPIInt tag1,tag2,tag3,imdex; 2475 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2476 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2477 MPI_Status recv_status,*send_status; 2478 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2479 PetscInt **rbuf_j=PETSC_NULL; 2480 PetscScalar **rbuf_a=PETSC_NULL; 2481 Mat_Redundant *redund=PETSC_NULL; 2482 PetscContainer container; 2483 2484 PetscFunctionBegin; 2485 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2486 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2487 2488 if (reuse == MAT_REUSE_MATRIX) { 2489 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2490 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2491 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2492 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2493 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2494 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2495 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2496 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2497 2498 nsends = redund->nsends; 2499 nrecvs = redund->nrecvs; 2500 send_rank = redund->send_rank; 2501 recv_rank = redund->recv_rank; 2502 sbuf_nz = redund->sbuf_nz; 2503 rbuf_nz = redund->rbuf_nz; 2504 sbuf_j = redund->sbuf_j; 2505 sbuf_a = redund->sbuf_a; 2506 rbuf_j = redund->rbuf_j; 2507 rbuf_a = redund->rbuf_a; 2508 } 2509 2510 if (reuse == MAT_INITIAL_MATRIX){ 2511 PetscMPIInt subrank,subsize; 2512 PetscInt nleftover,np_subcomm; 2513 /* get the destination processors' id send_rank, nsends and nrecvs */ 2514 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2515 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2516 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2517 np_subcomm = size/nsubcomm; 2518 nleftover = size - nsubcomm*np_subcomm; 2519 nsends = 0; nrecvs = 0; 2520 for (i=0; i<size; i++){ /* i=rank*/ 2521 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2522 send_rank[nsends] = i; nsends++; 2523 recv_rank[nrecvs++] = i; 2524 } 2525 } 2526 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2527 i = size-nleftover-1; 2528 j = 0; 2529 while (j < nsubcomm - nleftover){ 2530 send_rank[nsends++] = i; 2531 i--; j++; 2532 } 2533 } 2534 2535 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2536 for (i=0; i<nleftover; i++){ 2537 recv_rank[nrecvs++] = size-nleftover+i; 2538 } 2539 } 2540 2541 /* allocate sbuf_j, sbuf_a */ 2542 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2543 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2544 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2545 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2546 2547 /* copy mat's local entries into the buffers */ 2548 if (reuse == MAT_INITIAL_MATRIX){ 2549 rownz_max = 0; 2550 rptr = sbuf_j; 2551 cols = sbuf_j + rend-rstart + 1; 2552 vals = sbuf_a; 2553 rptr[0] = 0; 2554 for (i=0; i<rend-rstart; i++){ 2555 row = i + rstart; 2556 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2557 ncols = nzA + nzB; 2558 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2559 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2560 /* load the column indices for this row into cols */ 2561 lwrite = 0; 2562 for (l=0; l<nzB; l++) { 2563 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2564 vals[lwrite] = aworkB[l]; 2565 cols[lwrite++] = ctmp; 2566 } 2567 } 2568 for (l=0; l<nzA; l++){ 2569 vals[lwrite] = aworkA[l]; 2570 cols[lwrite++] = cstart + cworkA[l]; 2571 } 2572 for (l=0; l<nzB; l++) { 2573 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2574 vals[lwrite] = aworkB[l]; 2575 cols[lwrite++] = ctmp; 2576 } 2577 } 2578 vals += ncols; 2579 cols += ncols; 2580 rptr[i+1] = rptr[i] + ncols; 2581 if (rownz_max < ncols) rownz_max = ncols; 2582 } 2583 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2584 } else { /* only copy matrix values into sbuf_a */ 2585 rptr = sbuf_j; 2586 vals = sbuf_a; 2587 rptr[0] = 0; 2588 for (i=0; i<rend-rstart; i++){ 2589 row = i + rstart; 2590 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2591 ncols = nzA + nzB; 2592 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2593 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2594 lwrite = 0; 2595 for (l=0; l<nzB; l++) { 2596 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2597 } 2598 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2599 for (l=0; l<nzB; l++) { 2600 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2601 } 2602 vals += ncols; 2603 rptr[i+1] = rptr[i] + ncols; 2604 } 2605 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2606 2607 /* send nzlocal to others, and recv other's nzlocal */ 2608 /*--------------------------------------------------*/ 2609 if (reuse == MAT_INITIAL_MATRIX){ 2610 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2611 s_waits2 = s_waits3 + nsends; 2612 s_waits1 = s_waits2 + nsends; 2613 r_waits1 = s_waits1 + nsends; 2614 r_waits2 = r_waits1 + nrecvs; 2615 r_waits3 = r_waits2 + nrecvs; 2616 } else { 2617 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2618 r_waits3 = s_waits3 + nsends; 2619 } 2620 2621 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2622 if (reuse == MAT_INITIAL_MATRIX){ 2623 /* get new tags to keep the communication clean */ 2624 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2625 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2626 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2627 2628 /* post receives of other's nzlocal */ 2629 for (i=0; i<nrecvs; i++){ 2630 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2631 } 2632 /* send nzlocal to others */ 2633 for (i=0; i<nsends; i++){ 2634 sbuf_nz[i] = nzlocal; 2635 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2636 } 2637 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2638 count = nrecvs; 2639 while (count) { 2640 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2641 recv_rank[imdex] = recv_status.MPI_SOURCE; 2642 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2643 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2644 2645 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2646 rbuf_nz[imdex] += i + 2; 2647 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2648 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2649 count--; 2650 } 2651 /* wait on sends of nzlocal */ 2652 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2653 /* send mat->i,j to others, and recv from other's */ 2654 /*------------------------------------------------*/ 2655 for (i=0; i<nsends; i++){ 2656 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2657 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2658 } 2659 /* wait on receives of mat->i,j */ 2660 /*------------------------------*/ 2661 count = nrecvs; 2662 while (count) { 2663 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2664 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2665 count--; 2666 } 2667 /* wait on sends of mat->i,j */ 2668 /*---------------------------*/ 2669 if (nsends) { 2670 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2671 } 2672 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2673 2674 /* post receives, send and receive mat->a */ 2675 /*----------------------------------------*/ 2676 for (imdex=0; imdex<nrecvs; imdex++) { 2677 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2678 } 2679 for (i=0; i<nsends; i++){ 2680 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2681 } 2682 count = nrecvs; 2683 while (count) { 2684 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2685 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2686 count--; 2687 } 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2690 } 2691 2692 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2693 2694 /* create redundant matrix */ 2695 /*-------------------------*/ 2696 if (reuse == MAT_INITIAL_MATRIX){ 2697 /* compute rownz_max for preallocation */ 2698 for (imdex=0; imdex<nrecvs; imdex++){ 2699 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2700 rptr = rbuf_j[imdex]; 2701 for (i=0; i<j; i++){ 2702 ncols = rptr[i+1] - rptr[i]; 2703 if (rownz_max < ncols) rownz_max = ncols; 2704 } 2705 } 2706 2707 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2708 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2709 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2710 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2711 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2712 } else { 2713 C = *matredundant; 2714 } 2715 2716 /* insert local matrix entries */ 2717 rptr = sbuf_j; 2718 cols = sbuf_j + rend-rstart + 1; 2719 vals = sbuf_a; 2720 for (i=0; i<rend-rstart; i++){ 2721 row = i + rstart; 2722 ncols = rptr[i+1] - rptr[i]; 2723 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2724 vals += ncols; 2725 cols += ncols; 2726 } 2727 /* insert received matrix entries */ 2728 for (imdex=0; imdex<nrecvs; imdex++){ 2729 rstart = rowrange[recv_rank[imdex]]; 2730 rend = rowrange[recv_rank[imdex]+1]; 2731 rptr = rbuf_j[imdex]; 2732 cols = rbuf_j[imdex] + rend-rstart + 1; 2733 vals = rbuf_a[imdex]; 2734 for (i=0; i<rend-rstart; i++){ 2735 row = i + rstart; 2736 ncols = rptr[i+1] - rptr[i]; 2737 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2738 vals += ncols; 2739 cols += ncols; 2740 } 2741 } 2742 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2743 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2744 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2745 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2746 if (reuse == MAT_INITIAL_MATRIX) { 2747 PetscContainer container; 2748 *matredundant = C; 2749 /* create a supporting struct and attach it to C for reuse */ 2750 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2751 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2752 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2753 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2754 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2755 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2756 2757 redund->nzlocal = nzlocal; 2758 redund->nsends = nsends; 2759 redund->nrecvs = nrecvs; 2760 redund->send_rank = send_rank; 2761 redund->recv_rank = recv_rank; 2762 redund->sbuf_nz = sbuf_nz; 2763 redund->rbuf_nz = rbuf_nz; 2764 redund->sbuf_j = sbuf_j; 2765 redund->sbuf_a = sbuf_a; 2766 redund->rbuf_j = rbuf_j; 2767 redund->rbuf_a = rbuf_a; 2768 2769 redund->Destroy = C->ops->destroy; 2770 C->ops->destroy = MatDestroy_MatRedundant; 2771 } 2772 PetscFunctionReturn(0); 2773 } 2774 2775 #undef __FUNCT__ 2776 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2777 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2778 { 2779 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2780 PetscErrorCode ierr; 2781 PetscInt i,*idxb = 0; 2782 PetscScalar *va,*vb; 2783 Vec vtmp; 2784 2785 PetscFunctionBegin; 2786 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2787 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2788 if (idx) { 2789 for (i=0; i<A->rmap->n; i++) { 2790 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2791 } 2792 } 2793 2794 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2795 if (idx) { 2796 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2797 } 2798 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2799 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2800 2801 for (i=0; i<A->rmap->n; i++){ 2802 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2803 va[i] = vb[i]; 2804 if (idx) idx[i] = a->garray[idxb[i]]; 2805 } 2806 } 2807 2808 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2809 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2810 ierr = PetscFree(idxb);CHKERRQ(ierr); 2811 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2817 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2818 { 2819 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2820 PetscErrorCode ierr; 2821 PetscInt i,*idxb = 0; 2822 PetscScalar *va,*vb; 2823 Vec vtmp; 2824 2825 PetscFunctionBegin; 2826 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2827 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2828 if (idx) { 2829 for (i=0; i<A->cmap->n; i++) { 2830 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2831 } 2832 } 2833 2834 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2835 if (idx) { 2836 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2837 } 2838 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2839 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2840 2841 for (i=0; i<A->rmap->n; i++){ 2842 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2843 va[i] = vb[i]; 2844 if (idx) idx[i] = a->garray[idxb[i]]; 2845 } 2846 } 2847 2848 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2849 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2850 ierr = PetscFree(idxb);CHKERRQ(ierr); 2851 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2857 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2858 { 2859 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2860 PetscInt n = A->rmap->n; 2861 PetscInt cstart = A->cmap->rstart; 2862 PetscInt *cmap = mat->garray; 2863 PetscInt *diagIdx, *offdiagIdx; 2864 Vec diagV, offdiagV; 2865 PetscScalar *a, *diagA, *offdiagA; 2866 PetscInt r; 2867 PetscErrorCode ierr; 2868 2869 PetscFunctionBegin; 2870 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2871 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2872 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2873 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2874 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2875 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2876 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2877 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2878 for(r = 0; r < n; ++r) { 2879 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2880 a[r] = diagA[r]; 2881 idx[r] = cstart + diagIdx[r]; 2882 } else { 2883 a[r] = offdiagA[r]; 2884 idx[r] = cmap[offdiagIdx[r]]; 2885 } 2886 } 2887 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2889 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2890 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2891 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2892 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2893 PetscFunctionReturn(0); 2894 } 2895 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2898 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2899 { 2900 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2901 PetscInt n = A->rmap->n; 2902 PetscInt cstart = A->cmap->rstart; 2903 PetscInt *cmap = mat->garray; 2904 PetscInt *diagIdx, *offdiagIdx; 2905 Vec diagV, offdiagV; 2906 PetscScalar *a, *diagA, *offdiagA; 2907 PetscInt r; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2912 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2913 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2914 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2915 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2916 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2917 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2918 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2919 for(r = 0; r < n; ++r) { 2920 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2921 a[r] = diagA[r]; 2922 idx[r] = cstart + diagIdx[r]; 2923 } else { 2924 a[r] = offdiagA[r]; 2925 idx[r] = cmap[offdiagIdx[r]]; 2926 } 2927 } 2928 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2930 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2931 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2932 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2933 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2934 PetscFunctionReturn(0); 2935 } 2936 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2939 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2940 { 2941 PetscErrorCode ierr; 2942 Mat *dummy; 2943 2944 PetscFunctionBegin; 2945 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2946 *newmat = *dummy; 2947 ierr = PetscFree(dummy);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2952 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2955 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2956 { 2957 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2958 PetscErrorCode ierr; 2959 2960 PetscFunctionBegin; 2961 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 2965 2966 /* -------------------------------------------------------------------*/ 2967 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2968 MatGetRow_MPIAIJ, 2969 MatRestoreRow_MPIAIJ, 2970 MatMult_MPIAIJ, 2971 /* 4*/ MatMultAdd_MPIAIJ, 2972 MatMultTranspose_MPIAIJ, 2973 MatMultTransposeAdd_MPIAIJ, 2974 #ifdef PETSC_HAVE_PBGL 2975 MatSolve_MPIAIJ, 2976 #else 2977 0, 2978 #endif 2979 0, 2980 0, 2981 /*10*/ 0, 2982 0, 2983 0, 2984 MatSOR_MPIAIJ, 2985 MatTranspose_MPIAIJ, 2986 /*15*/ MatGetInfo_MPIAIJ, 2987 MatEqual_MPIAIJ, 2988 MatGetDiagonal_MPIAIJ, 2989 MatDiagonalScale_MPIAIJ, 2990 MatNorm_MPIAIJ, 2991 /*20*/ MatAssemblyBegin_MPIAIJ, 2992 MatAssemblyEnd_MPIAIJ, 2993 MatSetOption_MPIAIJ, 2994 MatZeroEntries_MPIAIJ, 2995 /*24*/ MatZeroRows_MPIAIJ, 2996 0, 2997 #ifdef PETSC_HAVE_PBGL 2998 0, 2999 #else 3000 0, 3001 #endif 3002 0, 3003 0, 3004 /*29*/ MatSetUp_MPIAIJ, 3005 #ifdef PETSC_HAVE_PBGL 3006 0, 3007 #else 3008 0, 3009 #endif 3010 0, 3011 0, 3012 0, 3013 /*34*/ MatDuplicate_MPIAIJ, 3014 0, 3015 0, 3016 0, 3017 0, 3018 /*39*/ MatAXPY_MPIAIJ, 3019 MatGetSubMatrices_MPIAIJ, 3020 MatIncreaseOverlap_MPIAIJ, 3021 MatGetValues_MPIAIJ, 3022 MatCopy_MPIAIJ, 3023 /*44*/ MatGetRowMax_MPIAIJ, 3024 MatScale_MPIAIJ, 3025 0, 3026 0, 3027 MatZeroRowsColumns_MPIAIJ, 3028 /*49*/ MatSetBlockSize_MPIAIJ, 3029 0, 3030 0, 3031 0, 3032 0, 3033 /*54*/ MatFDColoringCreate_MPIAIJ, 3034 0, 3035 MatSetUnfactored_MPIAIJ, 3036 MatPermute_MPIAIJ, 3037 0, 3038 /*59*/ MatGetSubMatrix_MPIAIJ, 3039 MatDestroy_MPIAIJ, 3040 MatView_MPIAIJ, 3041 0, 3042 0, 3043 /*64*/ 0, 3044 0, 3045 0, 3046 0, 3047 0, 3048 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3049 MatGetRowMinAbs_MPIAIJ, 3050 0, 3051 MatSetColoring_MPIAIJ, 3052 #if defined(PETSC_HAVE_ADIC) 3053 MatSetValuesAdic_MPIAIJ, 3054 #else 3055 0, 3056 #endif 3057 MatSetValuesAdifor_MPIAIJ, 3058 /*75*/ MatFDColoringApply_AIJ, 3059 0, 3060 0, 3061 0, 3062 0, 3063 /*80*/ 0, 3064 0, 3065 0, 3066 /*83*/ MatLoad_MPIAIJ, 3067 0, 3068 0, 3069 0, 3070 0, 3071 0, 3072 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3073 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3074 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3075 MatPtAP_Basic, 3076 MatPtAPSymbolic_MPIAIJ, 3077 /*94*/ MatPtAPNumeric_MPIAIJ, 3078 0, 3079 0, 3080 0, 3081 0, 3082 /*99*/ 0, 3083 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3084 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3085 MatConjugate_MPIAIJ, 3086 0, 3087 /*104*/MatSetValuesRow_MPIAIJ, 3088 MatRealPart_MPIAIJ, 3089 MatImaginaryPart_MPIAIJ, 3090 0, 3091 0, 3092 /*109*/0, 3093 MatGetRedundantMatrix_MPIAIJ, 3094 MatGetRowMin_MPIAIJ, 3095 0, 3096 0, 3097 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*119*/0, 3103 0, 3104 0, 3105 0, 3106 MatGetMultiProcBlock_MPIAIJ, 3107 /*124*/MatFindNonZeroRows_MPIAIJ, 3108 MatGetColumnNorms_MPIAIJ, 3109 MatInvertBlockDiagonal_MPIAIJ, 3110 0, 3111 MatGetSubMatricesParallel_MPIAIJ, 3112 /*129*/0, 3113 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3114 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3115 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3116 0, 3117 /*134*/0, 3118 0, 3119 0, 3120 0, 3121 0 3122 }; 3123 3124 /* ----------------------------------------------------------------------------------------*/ 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3129 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3130 { 3131 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3132 PetscErrorCode ierr; 3133 3134 PetscFunctionBegin; 3135 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3136 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3137 PetscFunctionReturn(0); 3138 } 3139 EXTERN_C_END 3140 3141 EXTERN_C_BEGIN 3142 #undef __FUNCT__ 3143 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3144 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3145 { 3146 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3147 PetscErrorCode ierr; 3148 3149 PetscFunctionBegin; 3150 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3151 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 EXTERN_C_END 3155 3156 EXTERN_C_BEGIN 3157 #undef __FUNCT__ 3158 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3159 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3160 { 3161 Mat_MPIAIJ *b; 3162 PetscErrorCode ierr; 3163 PetscInt i; 3164 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3165 3166 PetscFunctionBegin; 3167 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3168 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3169 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3170 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3171 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3172 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3173 3174 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3175 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3176 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3177 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3178 if (d_nnz) { 3179 for (i=0; i<B->rmap->n; i++) { 3180 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3181 } 3182 } 3183 if (o_nnz) { 3184 for (i=0; i<B->rmap->n; i++) { 3185 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3186 } 3187 } 3188 b = (Mat_MPIAIJ*)B->data; 3189 3190 if (!B->preallocated) { 3191 /* Explicitly create 2 MATSEQAIJ matrices. */ 3192 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3193 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3194 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3195 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3196 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3197 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3198 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3199 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3200 } 3201 3202 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3203 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3204 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3205 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3206 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3207 B->preallocated = PETSC_TRUE; 3208 PetscFunctionReturn(0); 3209 } 3210 EXTERN_C_END 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3215 { 3216 Mat mat; 3217 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 *newmat = 0; 3222 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3223 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3224 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3225 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3226 a = (Mat_MPIAIJ*)mat->data; 3227 3228 mat->factortype = matin->factortype; 3229 mat->rmap->bs = matin->rmap->bs; 3230 mat->assembled = PETSC_TRUE; 3231 mat->insertmode = NOT_SET_VALUES; 3232 mat->preallocated = PETSC_TRUE; 3233 3234 a->size = oldmat->size; 3235 a->rank = oldmat->rank; 3236 a->donotstash = oldmat->donotstash; 3237 a->roworiented = oldmat->roworiented; 3238 a->rowindices = 0; 3239 a->rowvalues = 0; 3240 a->getrowactive = PETSC_FALSE; 3241 3242 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3243 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3244 3245 if (oldmat->colmap) { 3246 #if defined (PETSC_USE_CTABLE) 3247 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3248 #else 3249 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3252 #endif 3253 } else a->colmap = 0; 3254 if (oldmat->garray) { 3255 PetscInt len; 3256 len = oldmat->B->cmap->n; 3257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3258 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3259 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3260 } else a->garray = 0; 3261 3262 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3264 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3266 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3268 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3270 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3271 *newmat = mat; 3272 PetscFunctionReturn(0); 3273 } 3274 3275 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatLoad_MPIAIJ" 3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3280 { 3281 PetscScalar *vals,*svals; 3282 MPI_Comm comm = ((PetscObject)viewer)->comm; 3283 PetscErrorCode ierr; 3284 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3285 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3286 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3287 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3288 PetscInt cend,cstart,n,*rowners,sizesset=1; 3289 int fd; 3290 3291 PetscFunctionBegin; 3292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3293 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3294 if (!rank) { 3295 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3296 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3297 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3298 } 3299 3300 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3301 3302 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3303 M = header[1]; N = header[2]; 3304 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3305 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3306 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3307 3308 /* If global sizes are set, check if they are consistent with that given in the file */ 3309 if (sizesset) { 3310 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3311 } 3312 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3313 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3314 3315 /* determine ownership of all rows */ 3316 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3317 else m = newMat->rmap->n; /* Set by user */ 3318 3319 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3320 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3321 3322 /* First process needs enough room for process with most rows */ 3323 if (!rank) { 3324 mmax = rowners[1]; 3325 for (i=2; i<size; i++) { 3326 mmax = PetscMax(mmax,rowners[i]); 3327 } 3328 } else mmax = m; 3329 3330 rowners[0] = 0; 3331 for (i=2; i<=size; i++) { 3332 rowners[i] += rowners[i-1]; 3333 } 3334 rstart = rowners[rank]; 3335 rend = rowners[rank+1]; 3336 3337 /* distribute row lengths to all processors */ 3338 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3339 if (!rank) { 3340 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3341 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3342 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3343 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3344 for (j=0; j<m; j++) { 3345 procsnz[0] += ourlens[j]; 3346 } 3347 for (i=1; i<size; i++) { 3348 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3349 /* calculate the number of nonzeros on each processor */ 3350 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3351 procsnz[i] += rowlengths[j]; 3352 } 3353 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3354 } 3355 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3356 } else { 3357 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3358 } 3359 3360 if (!rank) { 3361 /* determine max buffer needed and allocate it */ 3362 maxnz = 0; 3363 for (i=0; i<size; i++) { 3364 maxnz = PetscMax(maxnz,procsnz[i]); 3365 } 3366 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3367 3368 /* read in my part of the matrix column indices */ 3369 nz = procsnz[0]; 3370 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3371 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3372 3373 /* read in every one elses and ship off */ 3374 for (i=1; i<size; i++) { 3375 nz = procsnz[i]; 3376 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3377 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3378 } 3379 ierr = PetscFree(cols);CHKERRQ(ierr); 3380 } else { 3381 /* determine buffer space needed for message */ 3382 nz = 0; 3383 for (i=0; i<m; i++) { 3384 nz += ourlens[i]; 3385 } 3386 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3387 3388 /* receive message of column indices*/ 3389 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3390 } 3391 3392 /* determine column ownership if matrix is not square */ 3393 if (N != M) { 3394 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3395 else n = newMat->cmap->n; 3396 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3397 cstart = cend - n; 3398 } else { 3399 cstart = rstart; 3400 cend = rend; 3401 n = cend - cstart; 3402 } 3403 3404 /* loop over local rows, determining number of off diagonal entries */ 3405 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3406 jj = 0; 3407 for (i=0; i<m; i++) { 3408 for (j=0; j<ourlens[i]; j++) { 3409 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3410 jj++; 3411 } 3412 } 3413 3414 for (i=0; i<m; i++) { 3415 ourlens[i] -= offlens[i]; 3416 } 3417 if (!sizesset) { 3418 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3419 } 3420 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3421 3422 for (i=0; i<m; i++) { 3423 ourlens[i] += offlens[i]; 3424 } 3425 3426 if (!rank) { 3427 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3428 3429 /* read in my part of the matrix numerical values */ 3430 nz = procsnz[0]; 3431 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3432 3433 /* insert into matrix */ 3434 jj = rstart; 3435 smycols = mycols; 3436 svals = vals; 3437 for (i=0; i<m; i++) { 3438 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3439 smycols += ourlens[i]; 3440 svals += ourlens[i]; 3441 jj++; 3442 } 3443 3444 /* read in other processors and ship out */ 3445 for (i=1; i<size; i++) { 3446 nz = procsnz[i]; 3447 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3448 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3449 } 3450 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3451 } else { 3452 /* receive numeric values */ 3453 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3454 3455 /* receive message of values*/ 3456 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3457 3458 /* insert into matrix */ 3459 jj = rstart; 3460 smycols = mycols; 3461 svals = vals; 3462 for (i=0; i<m; i++) { 3463 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3464 smycols += ourlens[i]; 3465 svals += ourlens[i]; 3466 jj++; 3467 } 3468 } 3469 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3470 ierr = PetscFree(vals);CHKERRQ(ierr); 3471 ierr = PetscFree(mycols);CHKERRQ(ierr); 3472 ierr = PetscFree(rowners);CHKERRQ(ierr); 3473 3474 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3475 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3482 { 3483 PetscErrorCode ierr; 3484 IS iscol_local; 3485 PetscInt csize; 3486 3487 PetscFunctionBegin; 3488 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3489 if (call == MAT_REUSE_MATRIX) { 3490 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3491 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3492 } else { 3493 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3494 } 3495 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3496 if (call == MAT_INITIAL_MATRIX) { 3497 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3498 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3499 } 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3505 /* 3506 Not great since it makes two copies of the submatrix, first an SeqAIJ 3507 in local and then by concatenating the local matrices the end result. 3508 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3509 3510 Note: This requires a sequential iscol with all indices. 3511 */ 3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3513 { 3514 PetscErrorCode ierr; 3515 PetscMPIInt rank,size; 3516 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat *local,M,Mreuse; 3519 MatScalar *vwork,*aa; 3520 MPI_Comm comm = ((PetscObject)mat)->comm; 3521 Mat_SeqAIJ *aij; 3522 3523 3524 PetscFunctionBegin; 3525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 3528 if (call == MAT_REUSE_MATRIX) { 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3530 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3531 local = &Mreuse; 3532 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3533 } else { 3534 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3535 Mreuse = *local; 3536 ierr = PetscFree(local);CHKERRQ(ierr); 3537 } 3538 3539 /* 3540 m - number of local rows 3541 n - number of columns (same on all processors) 3542 rstart - first row in new global matrix generated 3543 */ 3544 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3545 if (call == MAT_INITIAL_MATRIX) { 3546 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3547 ii = aij->i; 3548 jj = aij->j; 3549 3550 /* 3551 Determine the number of non-zeros in the diagonal and off-diagonal 3552 portions of the matrix in order to do correct preallocation 3553 */ 3554 3555 /* first get start and end of "diagonal" columns */ 3556 if (csize == PETSC_DECIDE) { 3557 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3558 if (mglobal == n) { /* square matrix */ 3559 nlocal = m; 3560 } else { 3561 nlocal = n/size + ((n % size) > rank); 3562 } 3563 } else { 3564 nlocal = csize; 3565 } 3566 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3567 rstart = rend - nlocal; 3568 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3569 3570 /* next, compute all the lengths */ 3571 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3572 olens = dlens + m; 3573 for (i=0; i<m; i++) { 3574 jend = ii[i+1] - ii[i]; 3575 olen = 0; 3576 dlen = 0; 3577 for (j=0; j<jend; j++) { 3578 if (*jj < rstart || *jj >= rend) olen++; 3579 else dlen++; 3580 jj++; 3581 } 3582 olens[i] = olen; 3583 dlens[i] = dlen; 3584 } 3585 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3586 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3587 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3588 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3589 ierr = PetscFree(dlens);CHKERRQ(ierr); 3590 } else { 3591 PetscInt ml,nl; 3592 3593 M = *newmat; 3594 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3595 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3596 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3597 /* 3598 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3599 rather than the slower MatSetValues(). 3600 */ 3601 M->was_assembled = PETSC_TRUE; 3602 M->assembled = PETSC_FALSE; 3603 } 3604 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3605 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3606 ii = aij->i; 3607 jj = aij->j; 3608 aa = aij->a; 3609 for (i=0; i<m; i++) { 3610 row = rstart + i; 3611 nz = ii[i+1] - ii[i]; 3612 cwork = jj; jj += nz; 3613 vwork = aa; aa += nz; 3614 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3615 } 3616 3617 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 *newmat = M; 3620 3621 /* save submatrix used in processor for next request */ 3622 if (call == MAT_INITIAL_MATRIX) { 3623 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3624 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3625 } 3626 3627 PetscFunctionReturn(0); 3628 } 3629 3630 EXTERN_C_BEGIN 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3633 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3634 { 3635 PetscInt m,cstart, cend,j,nnz,i,d; 3636 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3637 const PetscInt *JJ; 3638 PetscScalar *values; 3639 PetscErrorCode ierr; 3640 3641 PetscFunctionBegin; 3642 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3643 3644 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3645 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3648 m = B->rmap->n; 3649 cstart = B->cmap->rstart; 3650 cend = B->cmap->rend; 3651 rstart = B->rmap->rstart; 3652 3653 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3654 3655 #if defined(PETSC_USE_DEBUGGING) 3656 for (i=0; i<m; i++) { 3657 nnz = Ii[i+1]- Ii[i]; 3658 JJ = J + Ii[i]; 3659 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3660 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3661 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3662 } 3663 #endif 3664 3665 for (i=0; i<m; i++) { 3666 nnz = Ii[i+1]- Ii[i]; 3667 JJ = J + Ii[i]; 3668 nnz_max = PetscMax(nnz_max,nnz); 3669 d = 0; 3670 for (j=0; j<nnz; j++) { 3671 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3672 } 3673 d_nnz[i] = d; 3674 o_nnz[i] = nnz - d; 3675 } 3676 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3677 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3678 3679 if (v) values = (PetscScalar*)v; 3680 else { 3681 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3682 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3683 } 3684 3685 for (i=0; i<m; i++) { 3686 ii = i + rstart; 3687 nnz = Ii[i+1]- Ii[i]; 3688 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 3693 if (!v) { 3694 ierr = PetscFree(values);CHKERRQ(ierr); 3695 } 3696 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3697 PetscFunctionReturn(0); 3698 } 3699 EXTERN_C_END 3700 3701 #undef __FUNCT__ 3702 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3703 /*@ 3704 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3705 (the default parallel PETSc format). 3706 3707 Collective on MPI_Comm 3708 3709 Input Parameters: 3710 + B - the matrix 3711 . i - the indices into j for the start of each local row (starts with zero) 3712 . j - the column indices for each local row (starts with zero) 3713 - v - optional values in the matrix 3714 3715 Level: developer 3716 3717 Notes: 3718 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3719 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3720 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3721 3722 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3723 3724 The format which is used for the sparse matrix input, is equivalent to a 3725 row-major ordering.. i.e for the following matrix, the input data expected is 3726 as shown: 3727 3728 1 0 0 3729 2 0 3 P0 3730 ------- 3731 4 5 6 P1 3732 3733 Process0 [P0]: rows_owned=[0,1] 3734 i = {0,1,3} [size = nrow+1 = 2+1] 3735 j = {0,0,2} [size = nz = 6] 3736 v = {1,2,3} [size = nz = 6] 3737 3738 Process1 [P1]: rows_owned=[2] 3739 i = {0,3} [size = nrow+1 = 1+1] 3740 j = {0,1,2} [size = nz = 6] 3741 v = {4,5,6} [size = nz = 6] 3742 3743 .keywords: matrix, aij, compressed row, sparse, parallel 3744 3745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3746 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3747 @*/ 3748 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3749 { 3750 PetscErrorCode ierr; 3751 3752 PetscFunctionBegin; 3753 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3754 PetscFunctionReturn(0); 3755 } 3756 3757 #undef __FUNCT__ 3758 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3759 /*@C 3760 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3761 (the default parallel PETSc format). For good matrix assembly performance 3762 the user should preallocate the matrix storage by setting the parameters 3763 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3764 performance can be increased by more than a factor of 50. 3765 3766 Collective on MPI_Comm 3767 3768 Input Parameters: 3769 + A - the matrix 3770 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3771 (same value is used for all local rows) 3772 . d_nnz - array containing the number of nonzeros in the various rows of the 3773 DIAGONAL portion of the local submatrix (possibly different for each row) 3774 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3775 The size of this array is equal to the number of local rows, i.e 'm'. 3776 For matrices that will be factored, you must leave room for (and set) 3777 the diagonal entry even if it is zero. 3778 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3779 submatrix (same value is used for all local rows). 3780 - o_nnz - array containing the number of nonzeros in the various rows of the 3781 OFF-DIAGONAL portion of the local submatrix (possibly different for 3782 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3783 structure. The size of this array is equal to the number 3784 of local rows, i.e 'm'. 3785 3786 If the *_nnz parameter is given then the *_nz parameter is ignored 3787 3788 The AIJ format (also called the Yale sparse matrix format or 3789 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3790 storage. The stored row and column indices begin with zero. 3791 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3792 3793 The parallel matrix is partitioned such that the first m0 rows belong to 3794 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3795 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3796 3797 The DIAGONAL portion of the local submatrix of a processor can be defined 3798 as the submatrix which is obtained by extraction the part corresponding to 3799 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3800 first row that belongs to the processor, r2 is the last row belonging to 3801 the this processor, and c1-c2 is range of indices of the local part of a 3802 vector suitable for applying the matrix to. This is an mxn matrix. In the 3803 common case of a square matrix, the row and column ranges are the same and 3804 the DIAGONAL part is also square. The remaining portion of the local 3805 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3806 3807 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3808 3809 You can call MatGetInfo() to get information on how effective the preallocation was; 3810 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3811 You can also run with the option -info and look for messages with the string 3812 malloc in them to see if additional memory allocation was needed. 3813 3814 Example usage: 3815 3816 Consider the following 8x8 matrix with 34 non-zero values, that is 3817 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3818 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3819 as follows: 3820 3821 .vb 3822 1 2 0 | 0 3 0 | 0 4 3823 Proc0 0 5 6 | 7 0 0 | 8 0 3824 9 0 10 | 11 0 0 | 12 0 3825 ------------------------------------- 3826 13 0 14 | 15 16 17 | 0 0 3827 Proc1 0 18 0 | 19 20 21 | 0 0 3828 0 0 0 | 22 23 0 | 24 0 3829 ------------------------------------- 3830 Proc2 25 26 27 | 0 0 28 | 29 0 3831 30 0 0 | 31 32 33 | 0 34 3832 .ve 3833 3834 This can be represented as a collection of submatrices as: 3835 3836 .vb 3837 A B C 3838 D E F 3839 G H I 3840 .ve 3841 3842 Where the submatrices A,B,C are owned by proc0, D,E,F are 3843 owned by proc1, G,H,I are owned by proc2. 3844 3845 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3846 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3847 The 'M','N' parameters are 8,8, and have the same values on all procs. 3848 3849 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3850 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3851 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3852 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3853 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3854 matrix, ans [DF] as another SeqAIJ matrix. 3855 3856 When d_nz, o_nz parameters are specified, d_nz storage elements are 3857 allocated for every row of the local diagonal submatrix, and o_nz 3858 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3859 One way to choose d_nz and o_nz is to use the max nonzerors per local 3860 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3861 In this case, the values of d_nz,o_nz are: 3862 .vb 3863 proc0 : dnz = 2, o_nz = 2 3864 proc1 : dnz = 3, o_nz = 2 3865 proc2 : dnz = 1, o_nz = 4 3866 .ve 3867 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3868 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3869 for proc3. i.e we are using 12+15+10=37 storage locations to store 3870 34 values. 3871 3872 When d_nnz, o_nnz parameters are specified, the storage is specified 3873 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3874 In the above case the values for d_nnz,o_nnz are: 3875 .vb 3876 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3877 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3878 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3879 .ve 3880 Here the space allocated is sum of all the above values i.e 34, and 3881 hence pre-allocation is perfect. 3882 3883 Level: intermediate 3884 3885 .keywords: matrix, aij, compressed row, sparse, parallel 3886 3887 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3888 MPIAIJ, MatGetInfo() 3889 @*/ 3890 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3891 { 3892 PetscErrorCode ierr; 3893 3894 PetscFunctionBegin; 3895 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3896 PetscValidType(B,1); 3897 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3898 PetscFunctionReturn(0); 3899 } 3900 3901 #undef __FUNCT__ 3902 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3903 /*@ 3904 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3905 CSR format the local rows. 3906 3907 Collective on MPI_Comm 3908 3909 Input Parameters: 3910 + comm - MPI communicator 3911 . m - number of local rows (Cannot be PETSC_DECIDE) 3912 . n - This value should be the same as the local size used in creating the 3913 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3914 calculated if N is given) For square matrices n is almost always m. 3915 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3916 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3917 . i - row indices 3918 . j - column indices 3919 - a - matrix values 3920 3921 Output Parameter: 3922 . mat - the matrix 3923 3924 Level: intermediate 3925 3926 Notes: 3927 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3928 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3929 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3930 3931 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3932 3933 The format which is used for the sparse matrix input, is equivalent to a 3934 row-major ordering.. i.e for the following matrix, the input data expected is 3935 as shown: 3936 3937 1 0 0 3938 2 0 3 P0 3939 ------- 3940 4 5 6 P1 3941 3942 Process0 [P0]: rows_owned=[0,1] 3943 i = {0,1,3} [size = nrow+1 = 2+1] 3944 j = {0,0,2} [size = nz = 6] 3945 v = {1,2,3} [size = nz = 6] 3946 3947 Process1 [P1]: rows_owned=[2] 3948 i = {0,3} [size = nrow+1 = 1+1] 3949 j = {0,1,2} [size = nz = 6] 3950 v = {4,5,6} [size = nz = 6] 3951 3952 .keywords: matrix, aij, compressed row, sparse, parallel 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3955 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3956 @*/ 3957 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 if (i[0]) { 3963 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3964 } 3965 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3966 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3967 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3968 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3969 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3970 PetscFunctionReturn(0); 3971 } 3972 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "MatCreateMPIAIJ" 3975 /*@C 3976 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3977 (the default parallel PETSc format). For good matrix assembly performance 3978 the user should preallocate the matrix storage by setting the parameters 3979 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3980 performance can be increased by more than a factor of 50. 3981 3982 Collective on MPI_Comm 3983 3984 Input Parameters: 3985 + comm - MPI communicator 3986 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3987 This value should be the same as the local size used in creating the 3988 y vector for the matrix-vector product y = Ax. 3989 . n - This value should be the same as the local size used in creating the 3990 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3991 calculated if N is given) For square matrices n is almost always m. 3992 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3993 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3994 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3995 (same value is used for all local rows) 3996 . d_nnz - array containing the number of nonzeros in the various rows of the 3997 DIAGONAL portion of the local submatrix (possibly different for each row) 3998 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3999 The size of this array is equal to the number of local rows, i.e 'm'. 4000 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4001 submatrix (same value is used for all local rows). 4002 - o_nnz - array containing the number of nonzeros in the various rows of the 4003 OFF-DIAGONAL portion of the local submatrix (possibly different for 4004 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4005 structure. The size of this array is equal to the number 4006 of local rows, i.e 'm'. 4007 4008 Output Parameter: 4009 . A - the matrix 4010 4011 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4012 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4013 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4014 4015 Notes: 4016 If the *_nnz parameter is given then the *_nz parameter is ignored 4017 4018 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4019 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4020 storage requirements for this matrix. 4021 4022 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4023 processor than it must be used on all processors that share the object for 4024 that argument. 4025 4026 The user MUST specify either the local or global matrix dimensions 4027 (possibly both). 4028 4029 The parallel matrix is partitioned across processors such that the 4030 first m0 rows belong to process 0, the next m1 rows belong to 4031 process 1, the next m2 rows belong to process 2 etc.. where 4032 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4033 values corresponding to [m x N] submatrix. 4034 4035 The columns are logically partitioned with the n0 columns belonging 4036 to 0th partition, the next n1 columns belonging to the next 4037 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4038 4039 The DIAGONAL portion of the local submatrix on any given processor 4040 is the submatrix corresponding to the rows and columns m,n 4041 corresponding to the given processor. i.e diagonal matrix on 4042 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4043 etc. The remaining portion of the local submatrix [m x (N-n)] 4044 constitute the OFF-DIAGONAL portion. The example below better 4045 illustrates this concept. 4046 4047 For a square global matrix we define each processor's diagonal portion 4048 to be its local rows and the corresponding columns (a square submatrix); 4049 each processor's off-diagonal portion encompasses the remainder of the 4050 local matrix (a rectangular submatrix). 4051 4052 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4053 4054 When calling this routine with a single process communicator, a matrix of 4055 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4056 type of communicator, use the construction mechanism: 4057 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4058 4059 By default, this format uses inodes (identical nodes) when possible. 4060 We search for consecutive rows with the same nonzero structure, thereby 4061 reusing matrix information to achieve increased efficiency. 4062 4063 Options Database Keys: 4064 + -mat_no_inode - Do not use inodes 4065 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4066 - -mat_aij_oneindex - Internally use indexing starting at 1 4067 rather than 0. Note that when calling MatSetValues(), 4068 the user still MUST index entries starting at 0! 4069 4070 4071 Example usage: 4072 4073 Consider the following 8x8 matrix with 34 non-zero values, that is 4074 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4075 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4076 as follows: 4077 4078 .vb 4079 1 2 0 | 0 3 0 | 0 4 4080 Proc0 0 5 6 | 7 0 0 | 8 0 4081 9 0 10 | 11 0 0 | 12 0 4082 ------------------------------------- 4083 13 0 14 | 15 16 17 | 0 0 4084 Proc1 0 18 0 | 19 20 21 | 0 0 4085 0 0 0 | 22 23 0 | 24 0 4086 ------------------------------------- 4087 Proc2 25 26 27 | 0 0 28 | 29 0 4088 30 0 0 | 31 32 33 | 0 34 4089 .ve 4090 4091 This can be represented as a collection of submatrices as: 4092 4093 .vb 4094 A B C 4095 D E F 4096 G H I 4097 .ve 4098 4099 Where the submatrices A,B,C are owned by proc0, D,E,F are 4100 owned by proc1, G,H,I are owned by proc2. 4101 4102 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4103 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4104 The 'M','N' parameters are 8,8, and have the same values on all procs. 4105 4106 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4107 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4108 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4109 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4110 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4111 matrix, ans [DF] as another SeqAIJ matrix. 4112 4113 When d_nz, o_nz parameters are specified, d_nz storage elements are 4114 allocated for every row of the local diagonal submatrix, and o_nz 4115 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4116 One way to choose d_nz and o_nz is to use the max nonzerors per local 4117 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4118 In this case, the values of d_nz,o_nz are: 4119 .vb 4120 proc0 : dnz = 2, o_nz = 2 4121 proc1 : dnz = 3, o_nz = 2 4122 proc2 : dnz = 1, o_nz = 4 4123 .ve 4124 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4125 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4126 for proc3. i.e we are using 12+15+10=37 storage locations to store 4127 34 values. 4128 4129 When d_nnz, o_nnz parameters are specified, the storage is specified 4130 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4131 In the above case the values for d_nnz,o_nnz are: 4132 .vb 4133 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4134 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4135 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4136 .ve 4137 Here the space allocated is sum of all the above values i.e 34, and 4138 hence pre-allocation is perfect. 4139 4140 Level: intermediate 4141 4142 .keywords: matrix, aij, compressed row, sparse, parallel 4143 4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4145 MPIAIJ, MatCreateMPIAIJWithArrays() 4146 @*/ 4147 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4148 { 4149 PetscErrorCode ierr; 4150 PetscMPIInt size; 4151 4152 PetscFunctionBegin; 4153 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4154 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4156 if (size > 1) { 4157 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4158 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4159 } else { 4160 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4161 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4162 } 4163 PetscFunctionReturn(0); 4164 } 4165 4166 #undef __FUNCT__ 4167 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4168 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4169 { 4170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4171 4172 PetscFunctionBegin; 4173 *Ad = a->A; 4174 *Ao = a->B; 4175 *colmap = a->garray; 4176 PetscFunctionReturn(0); 4177 } 4178 4179 #undef __FUNCT__ 4180 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4181 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4182 { 4183 PetscErrorCode ierr; 4184 PetscInt i; 4185 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4186 4187 PetscFunctionBegin; 4188 if (coloring->ctype == IS_COLORING_GLOBAL) { 4189 ISColoringValue *allcolors,*colors; 4190 ISColoring ocoloring; 4191 4192 /* set coloring for diagonal portion */ 4193 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4194 4195 /* set coloring for off-diagonal portion */ 4196 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4197 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4198 for (i=0; i<a->B->cmap->n; i++) { 4199 colors[i] = allcolors[a->garray[i]]; 4200 } 4201 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4202 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4203 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4204 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4205 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4206 ISColoringValue *colors; 4207 PetscInt *larray; 4208 ISColoring ocoloring; 4209 4210 /* set coloring for diagonal portion */ 4211 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4212 for (i=0; i<a->A->cmap->n; i++) { 4213 larray[i] = i + A->cmap->rstart; 4214 } 4215 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4216 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4217 for (i=0; i<a->A->cmap->n; i++) { 4218 colors[i] = coloring->colors[larray[i]]; 4219 } 4220 ierr = PetscFree(larray);CHKERRQ(ierr); 4221 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4222 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4223 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4224 4225 /* set coloring for off-diagonal portion */ 4226 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4227 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4228 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4229 for (i=0; i<a->B->cmap->n; i++) { 4230 colors[i] = coloring->colors[larray[i]]; 4231 } 4232 ierr = PetscFree(larray);CHKERRQ(ierr); 4233 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4234 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4235 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4236 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4237 4238 PetscFunctionReturn(0); 4239 } 4240 4241 #if defined(PETSC_HAVE_ADIC) 4242 #undef __FUNCT__ 4243 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4244 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4245 { 4246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4247 PetscErrorCode ierr; 4248 4249 PetscFunctionBegin; 4250 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4251 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4252 PetscFunctionReturn(0); 4253 } 4254 #endif 4255 4256 #undef __FUNCT__ 4257 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4258 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4259 { 4260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4261 PetscErrorCode ierr; 4262 4263 PetscFunctionBegin; 4264 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4265 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4266 PetscFunctionReturn(0); 4267 } 4268 4269 #undef __FUNCT__ 4270 #define __FUNCT__ "MatMergeSymbolic" 4271 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4272 { 4273 PetscErrorCode ierr; 4274 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4275 PetscInt *indx; 4276 4277 PetscFunctionBegin; 4278 /* This routine will ONLY return MPIAIJ type matrix */ 4279 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4280 if (n == PETSC_DECIDE){ 4281 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4282 } 4283 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4284 rstart -= m; 4285 4286 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4287 for (i=0;i<m;i++) { 4288 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4289 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4290 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4291 } 4292 4293 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4294 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4295 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4296 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4297 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4298 PetscFunctionReturn(0); 4299 } 4300 4301 #undef __FUNCT__ 4302 #define __FUNCT__ "MatMergeNumeric" 4303 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4304 { 4305 PetscErrorCode ierr; 4306 PetscInt m,N,i,rstart,nnz,Ii; 4307 PetscInt *indx; 4308 PetscScalar *values; 4309 4310 PetscFunctionBegin; 4311 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4312 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4313 for (i=0;i<m;i++) { 4314 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4315 Ii = i + rstart; 4316 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4317 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4318 } 4319 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4320 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4321 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4322 PetscFunctionReturn(0); 4323 } 4324 4325 #undef __FUNCT__ 4326 #define __FUNCT__ "MatMerge" 4327 /*@ 4328 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4329 matrices from each processor 4330 4331 Collective on MPI_Comm 4332 4333 Input Parameters: 4334 + comm - the communicators the parallel matrix will live on 4335 . inmat - the input sequential matrices 4336 . n - number of local columns (or PETSC_DECIDE) 4337 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4338 4339 Output Parameter: 4340 . outmat - the parallel matrix generated 4341 4342 Level: advanced 4343 4344 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4345 4346 @*/ 4347 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4348 { 4349 PetscErrorCode ierr; 4350 4351 PetscFunctionBegin; 4352 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4353 if (scall == MAT_INITIAL_MATRIX){ 4354 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4355 } 4356 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4357 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4358 PetscFunctionReturn(0); 4359 } 4360 4361 #undef __FUNCT__ 4362 #define __FUNCT__ "MatFileSplit" 4363 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4364 { 4365 PetscErrorCode ierr; 4366 PetscMPIInt rank; 4367 PetscInt m,N,i,rstart,nnz; 4368 size_t len; 4369 const PetscInt *indx; 4370 PetscViewer out; 4371 char *name; 4372 Mat B; 4373 const PetscScalar *values; 4374 4375 PetscFunctionBegin; 4376 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4377 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4378 /* Should this be the type of the diagonal block of A? */ 4379 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4380 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4381 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4382 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4383 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4384 for (i=0;i<m;i++) { 4385 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4386 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4387 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4388 } 4389 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4390 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4391 4392 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4393 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4394 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4395 sprintf(name,"%s.%d",outfile,rank); 4396 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4397 ierr = PetscFree(name); 4398 ierr = MatView(B,out);CHKERRQ(ierr); 4399 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4400 ierr = MatDestroy(&B);CHKERRQ(ierr); 4401 PetscFunctionReturn(0); 4402 } 4403 4404 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4405 #undef __FUNCT__ 4406 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4407 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4408 { 4409 PetscErrorCode ierr; 4410 Mat_Merge_SeqsToMPI *merge; 4411 PetscContainer container; 4412 4413 PetscFunctionBegin; 4414 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4415 if (container) { 4416 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4429 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4430 ierr = PetscFree(merge);CHKERRQ(ierr); 4431 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4432 } 4433 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 #include <../src/mat/utils/freespace.h> 4438 #include <petscbt.h> 4439 4440 #undef __FUNCT__ 4441 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4442 /*@C 4443 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4444 matrices from each processor 4445 4446 Collective on MPI_Comm 4447 4448 Input Parameters: 4449 + comm - the communicators the parallel matrix will live on 4450 . seqmat - the input sequential matrices 4451 . m - number of local rows (or PETSC_DECIDE) 4452 . n - number of local columns (or PETSC_DECIDE) 4453 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4454 4455 Output Parameter: 4456 . mpimat - the parallel matrix generated 4457 4458 Level: advanced 4459 4460 Notes: 4461 The dimensions of the sequential matrix in each processor MUST be the same. 4462 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4463 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4464 @*/ 4465 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4466 { 4467 PetscErrorCode ierr; 4468 MPI_Comm comm=((PetscObject)mpimat)->comm; 4469 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4470 PetscMPIInt size,rank,taga,*len_s; 4471 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4472 PetscInt proc,m; 4473 PetscInt **buf_ri,**buf_rj; 4474 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4475 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4476 MPI_Request *s_waits,*r_waits; 4477 MPI_Status *status; 4478 MatScalar *aa=a->a; 4479 MatScalar **abuf_r,*ba_i; 4480 Mat_Merge_SeqsToMPI *merge; 4481 PetscContainer container; 4482 4483 PetscFunctionBegin; 4484 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4485 4486 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4487 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4488 4489 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4490 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4491 4492 bi = merge->bi; 4493 bj = merge->bj; 4494 buf_ri = merge->buf_ri; 4495 buf_rj = merge->buf_rj; 4496 4497 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4498 owners = merge->rowmap->range; 4499 len_s = merge->len_s; 4500 4501 /* send and recv matrix values */ 4502 /*-----------------------------*/ 4503 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4504 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4505 4506 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4507 for (proc=0,k=0; proc<size; proc++){ 4508 if (!len_s[proc]) continue; 4509 i = owners[proc]; 4510 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4511 k++; 4512 } 4513 4514 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4515 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4516 ierr = PetscFree(status);CHKERRQ(ierr); 4517 4518 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4519 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4520 4521 /* insert mat values of mpimat */ 4522 /*----------------------------*/ 4523 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4524 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4525 4526 for (k=0; k<merge->nrecv; k++){ 4527 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4528 nrows = *(buf_ri_k[k]); 4529 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4530 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4531 } 4532 4533 /* set values of ba */ 4534 m = merge->rowmap->n; 4535 for (i=0; i<m; i++) { 4536 arow = owners[rank] + i; 4537 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4538 bnzi = bi[i+1] - bi[i]; 4539 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4540 4541 /* add local non-zero vals of this proc's seqmat into ba */ 4542 anzi = ai[arow+1] - ai[arow]; 4543 aj = a->j + ai[arow]; 4544 aa = a->a + ai[arow]; 4545 nextaj = 0; 4546 for (j=0; nextaj<anzi; j++){ 4547 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4548 ba_i[j] += aa[nextaj++]; 4549 } 4550 } 4551 4552 /* add received vals into ba */ 4553 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4554 /* i-th row */ 4555 if (i == *nextrow[k]) { 4556 anzi = *(nextai[k]+1) - *nextai[k]; 4557 aj = buf_rj[k] + *(nextai[k]); 4558 aa = abuf_r[k] + *(nextai[k]); 4559 nextaj = 0; 4560 for (j=0; nextaj<anzi; j++){ 4561 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4562 ba_i[j] += aa[nextaj++]; 4563 } 4564 } 4565 nextrow[k]++; nextai[k]++; 4566 } 4567 } 4568 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4569 } 4570 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4571 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4572 4573 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4574 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4575 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4576 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4577 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4578 PetscFunctionReturn(0); 4579 } 4580 4581 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4582 4583 #undef __FUNCT__ 4584 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4585 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4586 { 4587 PetscErrorCode ierr; 4588 Mat B_mpi; 4589 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4590 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4591 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4592 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4593 PetscInt len,proc,*dnz,*onz; 4594 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4595 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4596 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4597 MPI_Status *status; 4598 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4599 PetscBT lnkbt; 4600 Mat_Merge_SeqsToMPI *merge; 4601 PetscContainer container; 4602 4603 PetscFunctionBegin; 4604 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4605 4606 /* make sure it is a PETSc comm */ 4607 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4608 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4609 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4610 4611 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4612 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4613 4614 /* determine row ownership */ 4615 /*---------------------------------------------------------*/ 4616 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4617 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4618 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4619 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4620 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4621 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4622 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4623 4624 m = merge->rowmap->n; 4625 M = merge->rowmap->N; 4626 owners = merge->rowmap->range; 4627 4628 /* determine the number of messages to send, their lengths */ 4629 /*---------------------------------------------------------*/ 4630 len_s = merge->len_s; 4631 4632 len = 0; /* length of buf_si[] */ 4633 merge->nsend = 0; 4634 for (proc=0; proc<size; proc++){ 4635 len_si[proc] = 0; 4636 if (proc == rank){ 4637 len_s[proc] = 0; 4638 } else { 4639 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4640 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4641 } 4642 if (len_s[proc]) { 4643 merge->nsend++; 4644 nrows = 0; 4645 for (i=owners[proc]; i<owners[proc+1]; i++){ 4646 if (ai[i+1] > ai[i]) nrows++; 4647 } 4648 len_si[proc] = 2*(nrows+1); 4649 len += len_si[proc]; 4650 } 4651 } 4652 4653 /* determine the number and length of messages to receive for ij-structure */ 4654 /*-------------------------------------------------------------------------*/ 4655 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4656 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4657 4658 /* post the Irecv of j-structure */ 4659 /*-------------------------------*/ 4660 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4661 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4662 4663 /* post the Isend of j-structure */ 4664 /*--------------------------------*/ 4665 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4666 4667 for (proc=0, k=0; proc<size; proc++){ 4668 if (!len_s[proc]) continue; 4669 i = owners[proc]; 4670 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4671 k++; 4672 } 4673 4674 /* receives and sends of j-structure are complete */ 4675 /*------------------------------------------------*/ 4676 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4677 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4678 4679 /* send and recv i-structure */ 4680 /*---------------------------*/ 4681 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4682 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4683 4684 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4685 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4686 for (proc=0,k=0; proc<size; proc++){ 4687 if (!len_s[proc]) continue; 4688 /* form outgoing message for i-structure: 4689 buf_si[0]: nrows to be sent 4690 [1:nrows]: row index (global) 4691 [nrows+1:2*nrows+1]: i-structure index 4692 */ 4693 /*-------------------------------------------*/ 4694 nrows = len_si[proc]/2 - 1; 4695 buf_si_i = buf_si + nrows+1; 4696 buf_si[0] = nrows; 4697 buf_si_i[0] = 0; 4698 nrows = 0; 4699 for (i=owners[proc]; i<owners[proc+1]; i++){ 4700 anzi = ai[i+1] - ai[i]; 4701 if (anzi) { 4702 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4703 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4704 nrows++; 4705 } 4706 } 4707 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4708 k++; 4709 buf_si += len_si[proc]; 4710 } 4711 4712 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4713 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4714 4715 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4716 for (i=0; i<merge->nrecv; i++){ 4717 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4718 } 4719 4720 ierr = PetscFree(len_si);CHKERRQ(ierr); 4721 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4722 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4723 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4724 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4725 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4726 ierr = PetscFree(status);CHKERRQ(ierr); 4727 4728 /* compute a local seq matrix in each processor */ 4729 /*----------------------------------------------*/ 4730 /* allocate bi array and free space for accumulating nonzero column info */ 4731 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4732 bi[0] = 0; 4733 4734 /* create and initialize a linked list */ 4735 nlnk = N+1; 4736 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4737 4738 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4739 len = 0; 4740 len = ai[owners[rank+1]] - ai[owners[rank]]; 4741 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4742 current_space = free_space; 4743 4744 /* determine symbolic info for each local row */ 4745 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4746 4747 for (k=0; k<merge->nrecv; k++){ 4748 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4749 nrows = *buf_ri_k[k]; 4750 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4751 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4752 } 4753 4754 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4755 len = 0; 4756 for (i=0;i<m;i++) { 4757 bnzi = 0; 4758 /* add local non-zero cols of this proc's seqmat into lnk */ 4759 arow = owners[rank] + i; 4760 anzi = ai[arow+1] - ai[arow]; 4761 aj = a->j + ai[arow]; 4762 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4763 bnzi += nlnk; 4764 /* add received col data into lnk */ 4765 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4766 if (i == *nextrow[k]) { /* i-th row */ 4767 anzi = *(nextai[k]+1) - *nextai[k]; 4768 aj = buf_rj[k] + *nextai[k]; 4769 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4770 bnzi += nlnk; 4771 nextrow[k]++; nextai[k]++; 4772 } 4773 } 4774 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4775 4776 /* if free space is not available, make more free space */ 4777 if (current_space->local_remaining<bnzi) { 4778 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4779 nspacedouble++; 4780 } 4781 /* copy data into free space, then initialize lnk */ 4782 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4783 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4784 4785 current_space->array += bnzi; 4786 current_space->local_used += bnzi; 4787 current_space->local_remaining -= bnzi; 4788 4789 bi[i+1] = bi[i] + bnzi; 4790 } 4791 4792 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4793 4794 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4795 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4796 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4797 4798 /* create symbolic parallel matrix B_mpi */ 4799 /*---------------------------------------*/ 4800 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4801 if (n==PETSC_DECIDE) { 4802 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4803 } else { 4804 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4805 } 4806 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4807 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4808 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4809 4810 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4811 B_mpi->assembled = PETSC_FALSE; 4812 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4813 merge->bi = bi; 4814 merge->bj = bj; 4815 merge->buf_ri = buf_ri; 4816 merge->buf_rj = buf_rj; 4817 merge->coi = PETSC_NULL; 4818 merge->coj = PETSC_NULL; 4819 merge->owners_co = PETSC_NULL; 4820 4821 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4822 4823 /* attach the supporting struct to B_mpi for reuse */ 4824 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4825 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4826 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4827 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4828 *mpimat = B_mpi; 4829 4830 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4831 PetscFunctionReturn(0); 4832 } 4833 4834 #undef __FUNCT__ 4835 #define __FUNCT__ "MatMerge_SeqsToMPI" 4836 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4837 { 4838 PetscErrorCode ierr; 4839 4840 PetscFunctionBegin; 4841 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4842 if (scall == MAT_INITIAL_MATRIX){ 4843 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4844 } 4845 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4846 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4847 PetscFunctionReturn(0); 4848 } 4849 4850 #undef __FUNCT__ 4851 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4852 /*@ 4853 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4854 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4855 with MatGetSize() 4856 4857 Not Collective 4858 4859 Input Parameters: 4860 + A - the matrix 4861 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4862 4863 Output Parameter: 4864 . A_loc - the local sequential matrix generated 4865 4866 Level: developer 4867 4868 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4869 4870 @*/ 4871 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4872 { 4873 PetscErrorCode ierr; 4874 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4875 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4876 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4877 MatScalar *aa=a->a,*ba=b->a,*cam; 4878 PetscScalar *ca; 4879 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4880 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4881 PetscBool match; 4882 4883 PetscFunctionBegin; 4884 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4885 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4886 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4887 if (scall == MAT_INITIAL_MATRIX){ 4888 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4889 ci[0] = 0; 4890 for (i=0; i<am; i++){ 4891 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4892 } 4893 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4894 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4895 k = 0; 4896 for (i=0; i<am; i++) { 4897 ncols_o = bi[i+1] - bi[i]; 4898 ncols_d = ai[i+1] - ai[i]; 4899 /* off-diagonal portion of A */ 4900 for (jo=0; jo<ncols_o; jo++) { 4901 col = cmap[*bj]; 4902 if (col >= cstart) break; 4903 cj[k] = col; bj++; 4904 ca[k++] = *ba++; 4905 } 4906 /* diagonal portion of A */ 4907 for (j=0; j<ncols_d; j++) { 4908 cj[k] = cstart + *aj++; 4909 ca[k++] = *aa++; 4910 } 4911 /* off-diagonal portion of A */ 4912 for (j=jo; j<ncols_o; j++) { 4913 cj[k] = cmap[*bj++]; 4914 ca[k++] = *ba++; 4915 } 4916 } 4917 /* put together the new matrix */ 4918 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4919 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4920 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4921 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4922 mat->free_a = PETSC_TRUE; 4923 mat->free_ij = PETSC_TRUE; 4924 mat->nonew = 0; 4925 } else if (scall == MAT_REUSE_MATRIX){ 4926 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4927 ci = mat->i; cj = mat->j; cam = mat->a; 4928 for (i=0; i<am; i++) { 4929 /* off-diagonal portion of A */ 4930 ncols_o = bi[i+1] - bi[i]; 4931 for (jo=0; jo<ncols_o; jo++) { 4932 col = cmap[*bj]; 4933 if (col >= cstart) break; 4934 *cam++ = *ba++; bj++; 4935 } 4936 /* diagonal portion of A */ 4937 ncols_d = ai[i+1] - ai[i]; 4938 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4939 /* off-diagonal portion of A */ 4940 for (j=jo; j<ncols_o; j++) { 4941 *cam++ = *ba++; bj++; 4942 } 4943 } 4944 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4945 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 4949 #undef __FUNCT__ 4950 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4951 /*@C 4952 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4953 4954 Not Collective 4955 4956 Input Parameters: 4957 + A - the matrix 4958 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4959 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4960 4961 Output Parameter: 4962 . A_loc - the local sequential matrix generated 4963 4964 Level: developer 4965 4966 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4967 4968 @*/ 4969 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4970 { 4971 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4972 PetscErrorCode ierr; 4973 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4974 IS isrowa,iscola; 4975 Mat *aloc; 4976 PetscBool match; 4977 4978 PetscFunctionBegin; 4979 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4980 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4981 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4982 if (!row){ 4983 start = A->rmap->rstart; end = A->rmap->rend; 4984 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4985 } else { 4986 isrowa = *row; 4987 } 4988 if (!col){ 4989 start = A->cmap->rstart; 4990 cmap = a->garray; 4991 nzA = a->A->cmap->n; 4992 nzB = a->B->cmap->n; 4993 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4994 ncols = 0; 4995 for (i=0; i<nzB; i++) { 4996 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4997 else break; 4998 } 4999 imark = i; 5000 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5001 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5002 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5003 } else { 5004 iscola = *col; 5005 } 5006 if (scall != MAT_INITIAL_MATRIX){ 5007 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5008 aloc[0] = *A_loc; 5009 } 5010 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5011 *A_loc = aloc[0]; 5012 ierr = PetscFree(aloc);CHKERRQ(ierr); 5013 if (!row){ 5014 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5015 } 5016 if (!col){ 5017 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5018 } 5019 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5020 PetscFunctionReturn(0); 5021 } 5022 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "MatGetBrowsOfAcols" 5025 /*@C 5026 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5027 5028 Collective on Mat 5029 5030 Input Parameters: 5031 + A,B - the matrices in mpiaij format 5032 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5033 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5034 5035 Output Parameter: 5036 + rowb, colb - index sets of rows and columns of B to extract 5037 - B_seq - the sequential matrix generated 5038 5039 Level: developer 5040 5041 @*/ 5042 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5043 { 5044 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5045 PetscErrorCode ierr; 5046 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5047 IS isrowb,iscolb; 5048 Mat *bseq=PETSC_NULL; 5049 5050 PetscFunctionBegin; 5051 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5052 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5053 } 5054 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5055 5056 if (scall == MAT_INITIAL_MATRIX){ 5057 start = A->cmap->rstart; 5058 cmap = a->garray; 5059 nzA = a->A->cmap->n; 5060 nzB = a->B->cmap->n; 5061 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5062 ncols = 0; 5063 for (i=0; i<nzB; i++) { /* row < local row index */ 5064 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5065 else break; 5066 } 5067 imark = i; 5068 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5069 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5070 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5071 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5072 } else { 5073 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5074 isrowb = *rowb; iscolb = *colb; 5075 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5076 bseq[0] = *B_seq; 5077 } 5078 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5079 *B_seq = bseq[0]; 5080 ierr = PetscFree(bseq);CHKERRQ(ierr); 5081 if (!rowb){ 5082 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5083 } else { 5084 *rowb = isrowb; 5085 } 5086 if (!colb){ 5087 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5088 } else { 5089 *colb = iscolb; 5090 } 5091 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 #undef __FUNCT__ 5096 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5097 /* 5098 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5099 of the OFF-DIAGONAL portion of local A 5100 5101 Collective on Mat 5102 5103 Input Parameters: 5104 + A,B - the matrices in mpiaij format 5105 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5106 5107 Output Parameter: 5108 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5109 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5110 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5111 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5112 5113 Level: developer 5114 5115 */ 5116 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5117 { 5118 VecScatter_MPI_General *gen_to,*gen_from; 5119 PetscErrorCode ierr; 5120 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5121 Mat_SeqAIJ *b_oth; 5122 VecScatter ctx=a->Mvctx; 5123 MPI_Comm comm=((PetscObject)ctx)->comm; 5124 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5125 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5126 PetscScalar *rvalues,*svalues; 5127 MatScalar *b_otha,*bufa,*bufA; 5128 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5129 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5130 MPI_Status *sstatus,rstatus; 5131 PetscMPIInt jj; 5132 PetscInt *cols,sbs,rbs; 5133 PetscScalar *vals; 5134 5135 PetscFunctionBegin; 5136 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5137 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5138 } 5139 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5140 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5141 5142 gen_to = (VecScatter_MPI_General*)ctx->todata; 5143 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5144 rvalues = gen_from->values; /* holds the length of receiving row */ 5145 svalues = gen_to->values; /* holds the length of sending row */ 5146 nrecvs = gen_from->n; 5147 nsends = gen_to->n; 5148 5149 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5150 srow = gen_to->indices; /* local row index to be sent */ 5151 sstarts = gen_to->starts; 5152 sprocs = gen_to->procs; 5153 sstatus = gen_to->sstatus; 5154 sbs = gen_to->bs; 5155 rstarts = gen_from->starts; 5156 rprocs = gen_from->procs; 5157 rbs = gen_from->bs; 5158 5159 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5160 if (scall == MAT_INITIAL_MATRIX){ 5161 /* i-array */ 5162 /*---------*/ 5163 /* post receives */ 5164 for (i=0; i<nrecvs; i++){ 5165 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5166 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5167 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5168 } 5169 5170 /* pack the outgoing message */ 5171 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5172 sstartsj[0] = 0; rstartsj[0] = 0; 5173 len = 0; /* total length of j or a array to be sent */ 5174 k = 0; 5175 for (i=0; i<nsends; i++){ 5176 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5177 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5178 for (j=0; j<nrows; j++) { 5179 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5180 for (l=0; l<sbs; l++){ 5181 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5182 rowlen[j*sbs+l] = ncols; 5183 len += ncols; 5184 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5185 } 5186 k++; 5187 } 5188 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5189 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5190 } 5191 /* recvs and sends of i-array are completed */ 5192 i = nrecvs; 5193 while (i--) { 5194 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5195 } 5196 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5197 5198 /* allocate buffers for sending j and a arrays */ 5199 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5200 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5201 5202 /* create i-array of B_oth */ 5203 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5204 b_othi[0] = 0; 5205 len = 0; /* total length of j or a array to be received */ 5206 k = 0; 5207 for (i=0; i<nrecvs; i++){ 5208 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5209 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5210 for (j=0; j<nrows; j++) { 5211 b_othi[k+1] = b_othi[k] + rowlen[j]; 5212 len += rowlen[j]; k++; 5213 } 5214 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5215 } 5216 5217 /* allocate space for j and a arrrays of B_oth */ 5218 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5219 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5220 5221 /* j-array */ 5222 /*---------*/ 5223 /* post receives of j-array */ 5224 for (i=0; i<nrecvs; i++){ 5225 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5226 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5227 } 5228 5229 /* pack the outgoing message j-array */ 5230 k = 0; 5231 for (i=0; i<nsends; i++){ 5232 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5233 bufJ = bufj+sstartsj[i]; 5234 for (j=0; j<nrows; j++) { 5235 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5236 for (ll=0; ll<sbs; ll++){ 5237 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5238 for (l=0; l<ncols; l++){ 5239 *bufJ++ = cols[l]; 5240 } 5241 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5242 } 5243 } 5244 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5245 } 5246 5247 /* recvs and sends of j-array are completed */ 5248 i = nrecvs; 5249 while (i--) { 5250 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5251 } 5252 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5253 } else if (scall == MAT_REUSE_MATRIX){ 5254 sstartsj = *startsj_s; 5255 rstartsj = *startsj_r; 5256 bufa = *bufa_ptr; 5257 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5258 b_otha = b_oth->a; 5259 } else { 5260 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5261 } 5262 5263 /* a-array */ 5264 /*---------*/ 5265 /* post receives of a-array */ 5266 for (i=0; i<nrecvs; i++){ 5267 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5268 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5269 } 5270 5271 /* pack the outgoing message a-array */ 5272 k = 0; 5273 for (i=0; i<nsends; i++){ 5274 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5275 bufA = bufa+sstartsj[i]; 5276 for (j=0; j<nrows; j++) { 5277 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5278 for (ll=0; ll<sbs; ll++){ 5279 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5280 for (l=0; l<ncols; l++){ 5281 *bufA++ = vals[l]; 5282 } 5283 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5284 } 5285 } 5286 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5287 } 5288 /* recvs and sends of a-array are completed */ 5289 i = nrecvs; 5290 while (i--) { 5291 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5292 } 5293 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5294 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5295 5296 if (scall == MAT_INITIAL_MATRIX){ 5297 /* put together the new matrix */ 5298 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5299 5300 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5301 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5302 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5303 b_oth->free_a = PETSC_TRUE; 5304 b_oth->free_ij = PETSC_TRUE; 5305 b_oth->nonew = 0; 5306 5307 ierr = PetscFree(bufj);CHKERRQ(ierr); 5308 if (!startsj_s || !bufa_ptr){ 5309 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5310 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5311 } else { 5312 *startsj_s = sstartsj; 5313 *startsj_r = rstartsj; 5314 *bufa_ptr = bufa; 5315 } 5316 } 5317 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5318 PetscFunctionReturn(0); 5319 } 5320 5321 #undef __FUNCT__ 5322 #define __FUNCT__ "MatGetCommunicationStructs" 5323 /*@C 5324 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5325 5326 Not Collective 5327 5328 Input Parameters: 5329 . A - The matrix in mpiaij format 5330 5331 Output Parameter: 5332 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5333 . colmap - A map from global column index to local index into lvec 5334 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5335 5336 Level: developer 5337 5338 @*/ 5339 #if defined (PETSC_USE_CTABLE) 5340 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5341 #else 5342 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5343 #endif 5344 { 5345 Mat_MPIAIJ *a; 5346 5347 PetscFunctionBegin; 5348 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5349 PetscValidPointer(lvec, 2); 5350 PetscValidPointer(colmap, 3); 5351 PetscValidPointer(multScatter, 4); 5352 a = (Mat_MPIAIJ *) A->data; 5353 if (lvec) *lvec = a->lvec; 5354 if (colmap) *colmap = a->colmap; 5355 if (multScatter) *multScatter = a->Mvctx; 5356 PetscFunctionReturn(0); 5357 } 5358 5359 EXTERN_C_BEGIN 5360 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5361 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5362 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5363 EXTERN_C_END 5364 5365 #undef __FUNCT__ 5366 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5367 /* 5368 Computes (B'*A')' since computing B*A directly is untenable 5369 5370 n p p 5371 ( ) ( ) ( ) 5372 m ( A ) * n ( B ) = m ( C ) 5373 ( ) ( ) ( ) 5374 5375 */ 5376 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5377 { 5378 PetscErrorCode ierr; 5379 Mat At,Bt,Ct; 5380 5381 PetscFunctionBegin; 5382 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5383 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5384 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5385 ierr = MatDestroy(&At);CHKERRQ(ierr); 5386 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5387 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5388 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5389 PetscFunctionReturn(0); 5390 } 5391 5392 #undef __FUNCT__ 5393 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5394 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5395 { 5396 PetscErrorCode ierr; 5397 PetscInt m=A->rmap->n,n=B->cmap->n; 5398 Mat Cmat; 5399 5400 PetscFunctionBegin; 5401 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5402 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5403 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5404 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5405 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5406 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5407 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5408 *C = Cmat; 5409 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5410 PetscFunctionReturn(0); 5411 } 5412 5413 /* ----------------------------------------------------------------*/ 5414 #undef __FUNCT__ 5415 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5416 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5417 { 5418 PetscErrorCode ierr; 5419 5420 PetscFunctionBegin; 5421 if (scall == MAT_INITIAL_MATRIX){ 5422 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5423 } 5424 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5425 PetscFunctionReturn(0); 5426 } 5427 5428 EXTERN_C_BEGIN 5429 #if defined(PETSC_HAVE_MUMPS) 5430 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5431 #endif 5432 #if defined(PETSC_HAVE_PASTIX) 5433 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5434 #endif 5435 #if defined(PETSC_HAVE_SUPERLU_DIST) 5436 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5437 #endif 5438 #if defined(PETSC_HAVE_SPOOLES) 5439 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5440 #endif 5441 EXTERN_C_END 5442 5443 /*MC 5444 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5445 5446 Options Database Keys: 5447 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5448 5449 Level: beginner 5450 5451 .seealso: MatCreateMPIAIJ() 5452 M*/ 5453 5454 EXTERN_C_BEGIN 5455 #undef __FUNCT__ 5456 #define __FUNCT__ "MatCreate_MPIAIJ" 5457 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5458 { 5459 Mat_MPIAIJ *b; 5460 PetscErrorCode ierr; 5461 PetscMPIInt size; 5462 5463 PetscFunctionBegin; 5464 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5465 5466 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5467 B->data = (void*)b; 5468 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5469 B->rmap->bs = 1; 5470 B->assembled = PETSC_FALSE; 5471 5472 B->insertmode = NOT_SET_VALUES; 5473 b->size = size; 5474 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5475 5476 /* build cache for off array entries formed */ 5477 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5478 b->donotstash = PETSC_FALSE; 5479 b->colmap = 0; 5480 b->garray = 0; 5481 b->roworiented = PETSC_TRUE; 5482 5483 /* stuff used for matrix vector multiply */ 5484 b->lvec = PETSC_NULL; 5485 b->Mvctx = PETSC_NULL; 5486 5487 /* stuff for MatGetRow() */ 5488 b->rowindices = 0; 5489 b->rowvalues = 0; 5490 b->getrowactive = PETSC_FALSE; 5491 5492 #if defined(PETSC_HAVE_SPOOLES) 5493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5494 "MatGetFactor_mpiaij_spooles", 5495 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5496 #endif 5497 #if defined(PETSC_HAVE_MUMPS) 5498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5499 "MatGetFactor_aij_mumps", 5500 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5501 #endif 5502 #if defined(PETSC_HAVE_PASTIX) 5503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5504 "MatGetFactor_mpiaij_pastix", 5505 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5506 #endif 5507 #if defined(PETSC_HAVE_SUPERLU_DIST) 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5509 "MatGetFactor_mpiaij_superlu_dist", 5510 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5511 #endif 5512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5513 "MatStoreValues_MPIAIJ", 5514 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5516 "MatRetrieveValues_MPIAIJ", 5517 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5519 "MatGetDiagonalBlock_MPIAIJ", 5520 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5522 "MatIsTranspose_MPIAIJ", 5523 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5525 "MatMPIAIJSetPreallocation_MPIAIJ", 5526 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5528 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5529 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5531 "MatDiagonalScaleLocal_MPIAIJ", 5532 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5534 "MatConvert_MPIAIJ_MPIAIJPERM", 5535 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5537 "MatConvert_MPIAIJ_MPIAIJCRL", 5538 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5540 "MatConvert_MPIAIJ_MPISBAIJ", 5541 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5543 "MatMatMult_MPIDense_MPIAIJ", 5544 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5546 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5547 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5549 "MatMatMultNumeric_MPIDense_MPIAIJ", 5550 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5552 PetscFunctionReturn(0); 5553 } 5554 EXTERN_C_END 5555 5556 #undef __FUNCT__ 5557 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5558 /*@ 5559 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5560 and "off-diagonal" part of the matrix in CSR format. 5561 5562 Collective on MPI_Comm 5563 5564 Input Parameters: 5565 + comm - MPI communicator 5566 . m - number of local rows (Cannot be PETSC_DECIDE) 5567 . n - This value should be the same as the local size used in creating the 5568 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5569 calculated if N is given) For square matrices n is almost always m. 5570 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5571 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5572 . i - row indices for "diagonal" portion of matrix 5573 . j - column indices 5574 . a - matrix values 5575 . oi - row indices for "off-diagonal" portion of matrix 5576 . oj - column indices 5577 - oa - matrix values 5578 5579 Output Parameter: 5580 . mat - the matrix 5581 5582 Level: advanced 5583 5584 Notes: 5585 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5586 must free the arrays once the matrix has been destroyed and not before. 5587 5588 The i and j indices are 0 based 5589 5590 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5591 5592 This sets local rows and cannot be used to set off-processor values. 5593 5594 You cannot later use MatSetValues() to change values in this matrix. 5595 5596 .keywords: matrix, aij, compressed row, sparse, parallel 5597 5598 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5599 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5600 @*/ 5601 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5602 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5603 { 5604 PetscErrorCode ierr; 5605 Mat_MPIAIJ *maij; 5606 5607 PetscFunctionBegin; 5608 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5609 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5610 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5611 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5612 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5613 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5614 maij = (Mat_MPIAIJ*) (*mat)->data; 5615 maij->donotstash = PETSC_TRUE; 5616 (*mat)->preallocated = PETSC_TRUE; 5617 5618 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5619 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5620 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5621 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5622 5623 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5624 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5625 5626 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5627 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5629 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5630 5631 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 PetscFunctionReturn(0); 5634 } 5635 5636 /* 5637 Special version for direct calls from Fortran 5638 */ 5639 #include <private/fortranimpl.h> 5640 5641 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5642 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5643 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5644 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5645 #endif 5646 5647 /* Change these macros so can be used in void function */ 5648 #undef CHKERRQ 5649 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5650 #undef SETERRQ2 5651 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5652 #undef SETERRQ3 5653 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5654 #undef SETERRQ 5655 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5656 5657 EXTERN_C_BEGIN 5658 #undef __FUNCT__ 5659 #define __FUNCT__ "matsetvaluesmpiaij_" 5660 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5661 { 5662 Mat mat = *mmat; 5663 PetscInt m = *mm, n = *mn; 5664 InsertMode addv = *maddv; 5665 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5666 PetscScalar value; 5667 PetscErrorCode ierr; 5668 5669 MatCheckPreallocated(mat,1); 5670 if (mat->insertmode == NOT_SET_VALUES) { 5671 mat->insertmode = addv; 5672 } 5673 #if defined(PETSC_USE_DEBUG) 5674 else if (mat->insertmode != addv) { 5675 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5676 } 5677 #endif 5678 { 5679 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5680 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5681 PetscBool roworiented = aij->roworiented; 5682 5683 /* Some Variables required in the macro */ 5684 Mat A = aij->A; 5685 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5686 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5687 MatScalar *aa = a->a; 5688 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5689 Mat B = aij->B; 5690 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5691 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5692 MatScalar *ba = b->a; 5693 5694 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5695 PetscInt nonew = a->nonew; 5696 MatScalar *ap1,*ap2; 5697 5698 PetscFunctionBegin; 5699 for (i=0; i<m; i++) { 5700 if (im[i] < 0) continue; 5701 #if defined(PETSC_USE_DEBUG) 5702 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5703 #endif 5704 if (im[i] >= rstart && im[i] < rend) { 5705 row = im[i] - rstart; 5706 lastcol1 = -1; 5707 rp1 = aj + ai[row]; 5708 ap1 = aa + ai[row]; 5709 rmax1 = aimax[row]; 5710 nrow1 = ailen[row]; 5711 low1 = 0; 5712 high1 = nrow1; 5713 lastcol2 = -1; 5714 rp2 = bj + bi[row]; 5715 ap2 = ba + bi[row]; 5716 rmax2 = bimax[row]; 5717 nrow2 = bilen[row]; 5718 low2 = 0; 5719 high2 = nrow2; 5720 5721 for (j=0; j<n; j++) { 5722 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5723 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5724 if (in[j] >= cstart && in[j] < cend){ 5725 col = in[j] - cstart; 5726 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5727 } else if (in[j] < 0) continue; 5728 #if defined(PETSC_USE_DEBUG) 5729 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5730 #endif 5731 else { 5732 if (mat->was_assembled) { 5733 if (!aij->colmap) { 5734 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5735 } 5736 #if defined (PETSC_USE_CTABLE) 5737 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5738 col--; 5739 #else 5740 col = aij->colmap[in[j]] - 1; 5741 #endif 5742 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5743 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5744 col = in[j]; 5745 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5746 B = aij->B; 5747 b = (Mat_SeqAIJ*)B->data; 5748 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5749 rp2 = bj + bi[row]; 5750 ap2 = ba + bi[row]; 5751 rmax2 = bimax[row]; 5752 nrow2 = bilen[row]; 5753 low2 = 0; 5754 high2 = nrow2; 5755 bm = aij->B->rmap->n; 5756 ba = b->a; 5757 } 5758 } else col = in[j]; 5759 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5760 } 5761 } 5762 } else { 5763 if (!aij->donotstash) { 5764 if (roworiented) { 5765 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5766 } else { 5767 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5768 } 5769 } 5770 } 5771 }} 5772 PetscFunctionReturnVoid(); 5773 } 5774 EXTERN_C_END 5775 5776