1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 if (x) { 1004 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 } 1009 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1010 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<slen; i++) { 1014 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1015 } 1016 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use){ 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++){ 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* wait on sends */ 1062 if (nsends) { 1063 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1064 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1065 ierr = PetscFree(send_status);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1068 ierr = PetscFree(svalues);CHKERRQ(ierr); 1069 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatMult_MPIAIJ" 1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 PetscInt nt; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1083 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1084 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscBool merged; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1128 /* do nondiagonal part */ 1129 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1130 if (!merged) { 1131 /* send it on its way */ 1132 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 /* do local part */ 1134 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1135 /* receive remote parts: note this assumes the values are not actually */ 1136 /* added in yy until the next line, */ 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } else { 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* send it on its way */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 /* values actually were received in the Begin() but we need to call this nop */ 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 EXTERN_C_BEGIN 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1152 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1153 { 1154 MPI_Comm comm; 1155 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1156 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1157 IS Me,Notme; 1158 PetscErrorCode ierr; 1159 PetscInt M,N,first,last,*notme,i; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 1164 /* Easy test: symmetric diagonal block */ 1165 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1166 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1167 if (!*f) PetscFunctionReturn(0); 1168 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1170 if (size == 1) PetscFunctionReturn(0); 1171 1172 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1173 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1174 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1175 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1176 for (i=0; i<first; i++) notme[i] = i; 1177 for (i=last; i<M; i++) notme[i-last+first] = i; 1178 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1179 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1180 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1181 Aoff = Aoffs[0]; 1182 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1183 Boff = Boffs[0]; 1184 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1185 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1186 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1187 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1188 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1189 ierr = PetscFree(notme);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1220 { 1221 PetscErrorCode ierr; 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIAIJ" 1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatDestroy_MPIAIJ" 1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 #if defined(PETSC_USE_LOG) 1253 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1254 #endif 1255 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1256 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1257 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1258 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1259 #if defined (PETSC_USE_CTABLE) 1260 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1261 #else 1262 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1267 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1268 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1269 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1289 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1292 int fd; 1293 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1294 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1295 PetscScalar *column_values; 1296 PetscInt message_count,flowcontrolcount; 1297 1298 PetscFunctionBegin; 1299 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1300 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1301 nz = A->nz + B->nz; 1302 if (!rank) { 1303 header[0] = MAT_FILE_CLASSID; 1304 header[1] = mat->rmap->N; 1305 header[2] = mat->cmap->N; 1306 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1309 /* get largest number of rows any processor has */ 1310 rlen = mat->rmap->n; 1311 range = mat->rmap->range; 1312 for (i=1; i<size; i++) { 1313 rlen = PetscMax(rlen,range[i+1] - range[i]); 1314 } 1315 } else { 1316 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1317 rlen = mat->rmap->n; 1318 } 1319 1320 /* load up the local row counts */ 1321 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1322 for (i=0; i<mat->rmap->n; i++) { 1323 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1324 } 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1339 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* )th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( (col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_indices[cnt++] = A->j[k] + cstart; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_indices[cnt++] = garray[B->j[j]]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column indices to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1383 1384 /* load up the local column values */ 1385 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1386 cnt = 0; 1387 for (i=0; i<mat->rmap->n; i++) { 1388 for (j=B->i[i]; j<B->i[i+1]; j++) { 1389 if ( garray[B->j[j]] > cstart) break; 1390 column_values[cnt++] = B->a[j]; 1391 } 1392 for (k=A->i[i]; k<A->i[i+1]; k++) { 1393 column_values[cnt++] = A->a[k]; 1394 } 1395 for (; j<B->i[i+1]; j++) { 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 } 1399 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1400 1401 /* store the column values to the file */ 1402 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1403 if (!rank) { 1404 MPI_Status status; 1405 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 for (i=1; i<size; i++) { 1407 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1408 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1409 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1410 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 } 1413 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1416 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1418 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1419 } 1420 ierr = PetscFree(column_values);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 PetscErrorCode ierr; 1430 PetscMPIInt rank = aij->rank,size = aij->size; 1431 PetscBool isdraw,iascii,isbinary; 1432 PetscViewer sviewer; 1433 PetscViewerFormat format; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1438 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 if (iascii) { 1440 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1441 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1442 MatInfo info; 1443 PetscBool inodes; 1444 1445 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1446 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1449 if (!inodes) { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } else { 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1454 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1455 } 1456 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1458 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1460 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1463 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1466 PetscInt inodecount,inodelimit,*inodes; 1467 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1468 if (inodes) { 1469 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1475 PetscFunctionReturn(0); 1476 } 1477 } else if (isbinary) { 1478 if (size == 1) { 1479 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1480 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (isdraw) { 1486 PetscDraw draw; 1487 PetscBool isnull; 1488 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1489 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1490 } 1491 1492 if (size == 1) { 1493 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1494 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1495 } else { 1496 /* assemble the entire matrix onto first processor. */ 1497 Mat A; 1498 Mat_SeqAIJ *Aloc; 1499 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1500 MatScalar *a; 1501 1502 if (mat->rmap->N > 1024) { 1503 PetscBool flg = PETSC_FALSE; 1504 1505 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1506 if (!flg) { 1507 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1508 } 1509 } 1510 1511 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1512 if (!rank) { 1513 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1514 } else { 1515 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1516 } 1517 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1518 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1519 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1520 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1521 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1522 1523 /* copy over the A part */ 1524 Aloc = (Mat_SeqAIJ*)aij->A->data; 1525 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1526 row = mat->rmap->rstart; 1527 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1528 for (i=0; i<m; i++) { 1529 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1530 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1531 } 1532 aj = Aloc->j; 1533 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1534 1535 /* copy over the B part */ 1536 Aloc = (Mat_SeqAIJ*)aij->B->data; 1537 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1540 ct = cols; 1541 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1542 for (i=0; i<m; i++) { 1543 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1544 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1545 } 1546 ierr = PetscFree(ct);CHKERRQ(ierr); 1547 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1548 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1549 /* 1550 Everyone has to call to draw the matrix since the graphics waits are 1551 synchronized across all processors that share the PetscDraw object 1552 */ 1553 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1554 if (!rank) { 1555 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1556 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1557 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1558 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1559 } 1560 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1561 ierr = MatDestroy(&A);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatView_MPIAIJ" 1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1569 { 1570 PetscErrorCode ierr; 1571 PetscBool iascii,isdraw,issocket,isbinary; 1572 1573 PetscFunctionBegin; 1574 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1575 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1576 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1577 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1578 if (iascii || isdraw || isbinary || issocket) { 1579 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1580 } else { 1581 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSOR_MPIAIJ" 1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1589 { 1590 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1591 PetscErrorCode ierr; 1592 Vec bb1 = 0; 1593 PetscBool hasop; 1594 1595 PetscFunctionBegin; 1596 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1597 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1598 } 1599 1600 if (flag == SOR_APPLY_UPPER) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1606 if (flag & SOR_ZERO_INITIAL_GUESS) { 1607 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1608 its--; 1609 } 1610 1611 while (its--) { 1612 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 1615 /* update rhs: bb1 = bb - B*x */ 1616 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1617 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1618 1619 /* local sweep */ 1620 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1621 } 1622 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1623 if (flag & SOR_ZERO_INITIAL_GUESS) { 1624 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1625 its--; 1626 } 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_EISENSTAT) { 1655 Vec xx1; 1656 1657 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1659 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 if (!mat->diag) { 1663 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1664 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1665 } 1666 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1667 if (hasop) { 1668 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1669 } else { 1670 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1671 } 1672 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1673 1674 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1675 1676 /* local sweep */ 1677 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1678 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1679 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1680 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1681 1682 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatPermute_MPIAIJ" 1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1689 { 1690 MPI_Comm comm; 1691 PetscInt first,local_rowsize,local_colsize; 1692 const PetscInt *rows; 1693 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1698 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1699 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1700 /* collect the global row permutation and invert it */ 1701 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1702 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1703 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1704 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1705 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1706 /* get the local target indices */ 1707 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1708 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1709 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1710 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1711 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1712 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1713 /* the column permutation is so much easier; 1714 make a local version of 'colp' and invert it */ 1715 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1716 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1717 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1718 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1719 /* now we just get the submatrix */ 1720 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1721 /* clean up */ 1722 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1730 { 1731 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1732 Mat A = mat->A,B = mat->B; 1733 PetscErrorCode ierr; 1734 PetscReal isend[5],irecv[5]; 1735 1736 PetscFunctionBegin; 1737 info->block_size = 1.0; 1738 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1739 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1740 isend[3] = info->memory; isend[4] = info->mallocs; 1741 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1742 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1743 isend[3] += info->memory; isend[4] += info->mallocs; 1744 if (flag == MAT_LOCAL) { 1745 info->nz_used = isend[0]; 1746 info->nz_allocated = isend[1]; 1747 info->nz_unneeded = isend[2]; 1748 info->memory = isend[3]; 1749 info->mallocs = isend[4]; 1750 } else if (flag == MAT_GLOBAL_MAX) { 1751 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1752 info->nz_used = irecv[0]; 1753 info->nz_allocated = irecv[1]; 1754 info->nz_unneeded = irecv[2]; 1755 info->memory = irecv[3]; 1756 info->mallocs = irecv[4]; 1757 } else if (flag == MAT_GLOBAL_SUM) { 1758 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1759 info->nz_used = irecv[0]; 1760 info->nz_allocated = irecv[1]; 1761 info->nz_unneeded = irecv[2]; 1762 info->memory = irecv[3]; 1763 info->mallocs = irecv[4]; 1764 } 1765 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1766 info->fill_ratio_needed = 0; 1767 info->factor_mallocs = 0; 1768 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatSetOption_MPIAIJ" 1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1775 { 1776 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 switch (op) { 1781 case MAT_NEW_NONZERO_LOCATIONS: 1782 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1783 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1784 case MAT_KEEP_NONZERO_PATTERN: 1785 case MAT_NEW_NONZERO_LOCATION_ERR: 1786 case MAT_USE_INODES: 1787 case MAT_IGNORE_ZERO_ENTRIES: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_ROW_ORIENTED: 1792 a->roworiented = flg; 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_NEW_DIAGONALS: 1797 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1798 break; 1799 case MAT_IGNORE_OFF_PROC_ENTRIES: 1800 a->donotstash = flg; 1801 break; 1802 case MAT_SPD: 1803 A->spd_set = PETSC_TRUE; 1804 A->spd = flg; 1805 if (flg) { 1806 A->symmetric = PETSC_TRUE; 1807 A->structurally_symmetric = PETSC_TRUE; 1808 A->symmetric_set = PETSC_TRUE; 1809 A->structurally_symmetric_set = PETSC_TRUE; 1810 } 1811 break; 1812 case MAT_SYMMETRIC: 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 break; 1815 case MAT_STRUCTURALLY_SYMMETRIC: 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 break; 1818 case MAT_HERMITIAN: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_SYMMETRY_ETERNAL: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 default: 1825 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatGetRow_MPIAIJ" 1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1833 { 1834 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1835 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1836 PetscErrorCode ierr; 1837 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1838 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1839 PetscInt *cmap,*idx_p; 1840 1841 PetscFunctionBegin; 1842 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1843 mat->getrowactive = PETSC_TRUE; 1844 1845 if (!mat->rowvalues && (idx || v)) { 1846 /* 1847 allocate enough space to hold information from the longest row. 1848 */ 1849 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1850 PetscInt max = 1,tmp; 1851 for (i=0; i<matin->rmap->n; i++) { 1852 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1853 if (max < tmp) { max = tmp; } 1854 } 1855 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1856 } 1857 1858 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1859 lrow = row - rstart; 1860 1861 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1862 if (!v) {pvA = 0; pvB = 0;} 1863 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1864 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1865 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1866 nztot = nzA + nzB; 1867 1868 cmap = mat->garray; 1869 if (v || idx) { 1870 if (nztot) { 1871 /* Sort by increasing column numbers, assuming A and B already sorted */ 1872 PetscInt imark = -1; 1873 if (v) { 1874 *v = v_p = mat->rowvalues; 1875 for (i=0; i<nzB; i++) { 1876 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1877 else break; 1878 } 1879 imark = i; 1880 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1881 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1882 } 1883 if (idx) { 1884 *idx = idx_p = mat->rowindices; 1885 if (imark > -1) { 1886 for (i=0; i<imark; i++) { 1887 idx_p[i] = cmap[cworkB[i]]; 1888 } 1889 } else { 1890 for (i=0; i<nzB; i++) { 1891 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1892 else break; 1893 } 1894 imark = i; 1895 } 1896 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1897 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1898 } 1899 } else { 1900 if (idx) *idx = 0; 1901 if (v) *v = 0; 1902 } 1903 } 1904 *nz = nztot; 1905 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1906 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1913 { 1914 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1915 1916 PetscFunctionBegin; 1917 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1918 aij->getrowactive = PETSC_FALSE; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatNorm_MPIAIJ" 1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1925 { 1926 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1927 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1928 PetscErrorCode ierr; 1929 PetscInt i,j,cstart = mat->cmap->rstart; 1930 PetscReal sum = 0.0; 1931 MatScalar *v; 1932 1933 PetscFunctionBegin; 1934 if (aij->size == 1) { 1935 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1936 } else { 1937 if (type == NORM_FROBENIUS) { 1938 v = amat->a; 1939 for (i=0; i<amat->nz; i++) { 1940 #if defined(PETSC_USE_COMPLEX) 1941 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1942 #else 1943 sum += (*v)*(*v); v++; 1944 #endif 1945 } 1946 v = bmat->a; 1947 for (i=0; i<bmat->nz; i++) { 1948 #if defined(PETSC_USE_COMPLEX) 1949 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1950 #else 1951 sum += (*v)*(*v); v++; 1952 #endif 1953 } 1954 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1955 *norm = PetscSqrtReal(*norm); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1961 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1962 *norm = 0.0; 1963 v = amat->a; jj = amat->j; 1964 for (j=0; j<amat->nz; j++) { 1965 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1966 } 1967 v = bmat->a; jj = bmat->j; 1968 for (j=0; j<bmat->nz; j++) { 1969 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1970 } 1971 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1972 for (j=0; j<mat->cmap->N; j++) { 1973 if (tmp2[j] > *norm) *norm = tmp2[j]; 1974 } 1975 ierr = PetscFree(tmp);CHKERRQ(ierr); 1976 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1992 } else { 1993 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatTranspose_MPIAIJ" 2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2002 { 2003 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2004 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2005 PetscErrorCode ierr; 2006 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2007 PetscInt cstart=A->cmap->rstart,ncol; 2008 Mat B; 2009 MatScalar *array; 2010 2011 PetscFunctionBegin; 2012 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2013 2014 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2015 ai = Aloc->i; aj = Aloc->j; 2016 bi = Bloc->i; bj = Bloc->j; 2017 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2018 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2019 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2020 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (i=0; i<ai[ma]; i++){ 2022 d_nnz[aj[i]] ++; 2023 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2024 } 2025 2026 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2027 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2028 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2029 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2030 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2031 } else { 2032 B = *matout; 2033 } 2034 2035 /* copy over the A part */ 2036 array = Aloc->a; 2037 row = A->rmap->rstart; 2038 for (i=0; i<ma; i++) { 2039 ncol = ai[i+1]-ai[i]; 2040 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2041 row++; array += ncol; aj += ncol; 2042 } 2043 aj = Aloc->j; 2044 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2045 2046 /* copy over the B part */ 2047 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2048 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2049 array = Bloc->a; 2050 row = A->rmap->rstart; 2051 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2052 cols_tmp = cols; 2053 for (i=0; i<mb; i++) { 2054 ncol = bi[i+1]-bi[i]; 2055 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2056 row++; array += ncol; cols_tmp += ncol; 2057 } 2058 ierr = PetscFree(cols);CHKERRQ(ierr); 2059 2060 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2061 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2063 *matout = B; 2064 } else { 2065 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2072 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2073 { 2074 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2075 Mat a = aij->A,b = aij->B; 2076 PetscErrorCode ierr; 2077 PetscInt s1,s2,s3; 2078 2079 PetscFunctionBegin; 2080 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2081 if (rr) { 2082 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2083 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2084 /* Overlap communication with computation. */ 2085 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2086 } 2087 if (ll) { 2088 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2089 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2090 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2091 } 2092 /* scale the diagonal block */ 2093 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2094 2095 if (rr) { 2096 /* Do a scatter end and then right scale the off-diagonal block */ 2097 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2099 } 2100 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2106 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2107 { 2108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2109 PetscErrorCode ierr; 2110 2111 PetscFunctionBegin; 2112 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2113 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2114 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2115 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2121 { 2122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2123 PetscErrorCode ierr; 2124 2125 PetscFunctionBegin; 2126 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatEqual_MPIAIJ" 2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2133 { 2134 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2135 Mat a,b,c,d; 2136 PetscBool flg; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 a = matA->A; b = matA->B; 2141 c = matB->A; d = matB->B; 2142 2143 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2146 } 2147 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatCopy_MPIAIJ" 2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2154 { 2155 PetscErrorCode ierr; 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2157 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2158 2159 PetscFunctionBegin; 2160 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2161 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2162 /* because of the column compression in the off-processor part of the matrix a->B, 2163 the number of columns in a->B and b->B may be different, hence we cannot call 2164 the MatCopy() directly on the two parts. If need be, we can provide a more 2165 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2166 then copying the submatrices */ 2167 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2168 } else { 2169 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2170 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2177 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2178 { 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatAXPY_MPIAIJ" 2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2189 { 2190 PetscErrorCode ierr; 2191 PetscInt i; 2192 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2193 PetscBLASInt bnz,one=1; 2194 Mat_SeqAIJ *x,*y; 2195 2196 PetscFunctionBegin; 2197 if (str == SAME_NONZERO_PATTERN) { 2198 PetscScalar alpha = a; 2199 x = (Mat_SeqAIJ *)xx->A->data; 2200 y = (Mat_SeqAIJ *)yy->A->data; 2201 bnz = PetscBLASIntCast(x->nz); 2202 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2203 x = (Mat_SeqAIJ *)xx->B->data; 2204 y = (Mat_SeqAIJ *)yy->B->data; 2205 bnz = PetscBLASIntCast(x->nz); 2206 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2207 } else if (str == SUBSET_NONZERO_PATTERN) { 2208 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2209 2210 x = (Mat_SeqAIJ *)xx->B->data; 2211 y = (Mat_SeqAIJ *)yy->B->data; 2212 if (y->xtoy && y->XtoY != xx->B) { 2213 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2214 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2215 } 2216 if (!y->xtoy) { /* get xtoy */ 2217 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2218 y->XtoY = xx->B; 2219 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2220 } 2221 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2222 } else { 2223 Mat B; 2224 PetscInt *nnz_d,*nnz_o; 2225 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2226 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2227 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2228 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2229 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2230 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2231 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2232 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2233 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2234 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2235 ierr = MatHeaderReplace(Y,B); 2236 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2237 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2238 } 2239 PetscFunctionReturn(0); 2240 } 2241 2242 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatConjugate_MPIAIJ" 2246 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2247 { 2248 #if defined(PETSC_USE_COMPLEX) 2249 PetscErrorCode ierr; 2250 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2251 2252 PetscFunctionBegin; 2253 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2254 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2255 #else 2256 PetscFunctionBegin; 2257 #endif 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatRealPart_MPIAIJ" 2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2264 { 2265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2270 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2276 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2277 { 2278 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2283 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #ifdef PETSC_HAVE_PBGL 2288 2289 #include <boost/parallel/mpi/bsp_process_group.hpp> 2290 #include <boost/graph/distributed/ilu_default_graph.hpp> 2291 #include <boost/graph/distributed/ilu_0_block.hpp> 2292 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2293 #include <boost/graph/distributed/petsc/interface.hpp> 2294 #include <boost/multi_array.hpp> 2295 #include <boost/parallel/distributed_property_map->hpp> 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2299 /* 2300 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2301 */ 2302 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2303 { 2304 namespace petsc = boost::distributed::petsc; 2305 2306 namespace graph_dist = boost::graph::distributed; 2307 using boost::graph::distributed::ilu_default::process_group_type; 2308 using boost::graph::ilu_permuted; 2309 2310 PetscBool row_identity, col_identity; 2311 PetscContainer c; 2312 PetscInt m, n, M, N; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2317 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2318 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2319 if (!row_identity || !col_identity) { 2320 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2321 } 2322 2323 process_group_type pg; 2324 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2325 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2326 lgraph_type& level_graph = *lgraph_p; 2327 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2328 2329 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2330 ilu_permuted(level_graph); 2331 2332 /* put together the new matrix */ 2333 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2334 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2335 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2336 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2337 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2338 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 2341 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2342 ierr = PetscContainerSetPointer(c, lgraph_p); 2343 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2344 ierr = PetscContainerDestroy(&c); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2350 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2351 { 2352 PetscFunctionBegin; 2353 PetscFunctionReturn(0); 2354 } 2355 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatSolve_MPIAIJ" 2358 /* 2359 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2360 */ 2361 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2362 { 2363 namespace graph_dist = boost::graph::distributed; 2364 2365 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2366 lgraph_type* lgraph_p; 2367 PetscContainer c; 2368 PetscErrorCode ierr; 2369 2370 PetscFunctionBegin; 2371 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2372 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2373 ierr = VecCopy(b, x);CHKERRQ(ierr); 2374 2375 PetscScalar* array_x; 2376 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2377 PetscInt sx; 2378 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2379 2380 PetscScalar* array_b; 2381 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2382 PetscInt sb; 2383 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2384 2385 lgraph_type& level_graph = *lgraph_p; 2386 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2387 2388 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2389 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2390 ref_x(array_x, boost::extents[num_vertices(graph)]); 2391 2392 typedef boost::iterator_property_map<array_ref_type::iterator, 2393 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2394 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2395 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2396 2397 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2398 2399 PetscFunctionReturn(0); 2400 } 2401 #endif 2402 2403 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2404 PetscInt nzlocal,nsends,nrecvs; 2405 PetscMPIInt *send_rank,*recv_rank; 2406 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2407 PetscScalar *sbuf_a,**rbuf_a; 2408 PetscErrorCode (*Destroy)(Mat); 2409 } Mat_Redundant; 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2413 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2414 { 2415 PetscErrorCode ierr; 2416 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2417 PetscInt i; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2421 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2422 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2423 for (i=0; i<redund->nrecvs; i++){ 2424 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2425 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2426 } 2427 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2428 ierr = PetscFree(redund);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatDestroy_MatRedundant" 2434 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2435 { 2436 PetscErrorCode ierr; 2437 PetscContainer container; 2438 Mat_Redundant *redund=PETSC_NULL; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2442 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2443 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2444 A->ops->destroy = redund->Destroy; 2445 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2446 if (A->ops->destroy) { 2447 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2448 } 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2454 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2455 { 2456 PetscMPIInt rank,size; 2457 MPI_Comm comm=((PetscObject)mat)->comm; 2458 PetscErrorCode ierr; 2459 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2460 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2461 PetscInt *rowrange=mat->rmap->range; 2462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2463 Mat A=aij->A,B=aij->B,C=*matredundant; 2464 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2465 PetscScalar *sbuf_a; 2466 PetscInt nzlocal=a->nz+b->nz; 2467 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2468 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2469 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2470 MatScalar *aworkA,*aworkB; 2471 PetscScalar *vals; 2472 PetscMPIInt tag1,tag2,tag3,imdex; 2473 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2474 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2475 MPI_Status recv_status,*send_status; 2476 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2477 PetscInt **rbuf_j=PETSC_NULL; 2478 PetscScalar **rbuf_a=PETSC_NULL; 2479 Mat_Redundant *redund=PETSC_NULL; 2480 PetscContainer container; 2481 2482 PetscFunctionBegin; 2483 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2485 2486 if (reuse == MAT_REUSE_MATRIX) { 2487 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2488 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2489 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2490 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2491 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2492 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2493 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2494 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2495 2496 nsends = redund->nsends; 2497 nrecvs = redund->nrecvs; 2498 send_rank = redund->send_rank; 2499 recv_rank = redund->recv_rank; 2500 sbuf_nz = redund->sbuf_nz; 2501 rbuf_nz = redund->rbuf_nz; 2502 sbuf_j = redund->sbuf_j; 2503 sbuf_a = redund->sbuf_a; 2504 rbuf_j = redund->rbuf_j; 2505 rbuf_a = redund->rbuf_a; 2506 } 2507 2508 if (reuse == MAT_INITIAL_MATRIX){ 2509 PetscMPIInt subrank,subsize; 2510 PetscInt nleftover,np_subcomm; 2511 /* get the destination processors' id send_rank, nsends and nrecvs */ 2512 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2513 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2514 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2515 np_subcomm = size/nsubcomm; 2516 nleftover = size - nsubcomm*np_subcomm; 2517 nsends = 0; nrecvs = 0; 2518 for (i=0; i<size; i++){ /* i=rank*/ 2519 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2520 send_rank[nsends] = i; nsends++; 2521 recv_rank[nrecvs++] = i; 2522 } 2523 } 2524 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2525 i = size-nleftover-1; 2526 j = 0; 2527 while (j < nsubcomm - nleftover){ 2528 send_rank[nsends++] = i; 2529 i--; j++; 2530 } 2531 } 2532 2533 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2534 for (i=0; i<nleftover; i++){ 2535 recv_rank[nrecvs++] = size-nleftover+i; 2536 } 2537 } 2538 2539 /* allocate sbuf_j, sbuf_a */ 2540 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2541 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2542 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2543 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2544 2545 /* copy mat's local entries into the buffers */ 2546 if (reuse == MAT_INITIAL_MATRIX){ 2547 rownz_max = 0; 2548 rptr = sbuf_j; 2549 cols = sbuf_j + rend-rstart + 1; 2550 vals = sbuf_a; 2551 rptr[0] = 0; 2552 for (i=0; i<rend-rstart; i++){ 2553 row = i + rstart; 2554 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2555 ncols = nzA + nzB; 2556 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2557 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2558 /* load the column indices for this row into cols */ 2559 lwrite = 0; 2560 for (l=0; l<nzB; l++) { 2561 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2562 vals[lwrite] = aworkB[l]; 2563 cols[lwrite++] = ctmp; 2564 } 2565 } 2566 for (l=0; l<nzA; l++){ 2567 vals[lwrite] = aworkA[l]; 2568 cols[lwrite++] = cstart + cworkA[l]; 2569 } 2570 for (l=0; l<nzB; l++) { 2571 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2572 vals[lwrite] = aworkB[l]; 2573 cols[lwrite++] = ctmp; 2574 } 2575 } 2576 vals += ncols; 2577 cols += ncols; 2578 rptr[i+1] = rptr[i] + ncols; 2579 if (rownz_max < ncols) rownz_max = ncols; 2580 } 2581 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2582 } else { /* only copy matrix values into sbuf_a */ 2583 rptr = sbuf_j; 2584 vals = sbuf_a; 2585 rptr[0] = 0; 2586 for (i=0; i<rend-rstart; i++){ 2587 row = i + rstart; 2588 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2589 ncols = nzA + nzB; 2590 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2591 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2592 lwrite = 0; 2593 for (l=0; l<nzB; l++) { 2594 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2595 } 2596 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2597 for (l=0; l<nzB; l++) { 2598 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2599 } 2600 vals += ncols; 2601 rptr[i+1] = rptr[i] + ncols; 2602 } 2603 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2604 2605 /* send nzlocal to others, and recv other's nzlocal */ 2606 /*--------------------------------------------------*/ 2607 if (reuse == MAT_INITIAL_MATRIX){ 2608 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2609 s_waits2 = s_waits3 + nsends; 2610 s_waits1 = s_waits2 + nsends; 2611 r_waits1 = s_waits1 + nsends; 2612 r_waits2 = r_waits1 + nrecvs; 2613 r_waits3 = r_waits2 + nrecvs; 2614 } else { 2615 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2616 r_waits3 = s_waits3 + nsends; 2617 } 2618 2619 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2620 if (reuse == MAT_INITIAL_MATRIX){ 2621 /* get new tags to keep the communication clean */ 2622 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2623 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2624 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2625 2626 /* post receives of other's nzlocal */ 2627 for (i=0; i<nrecvs; i++){ 2628 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2629 } 2630 /* send nzlocal to others */ 2631 for (i=0; i<nsends; i++){ 2632 sbuf_nz[i] = nzlocal; 2633 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2634 } 2635 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2636 count = nrecvs; 2637 while (count) { 2638 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2639 recv_rank[imdex] = recv_status.MPI_SOURCE; 2640 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2641 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2642 2643 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2644 rbuf_nz[imdex] += i + 2; 2645 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2646 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2647 count--; 2648 } 2649 /* wait on sends of nzlocal */ 2650 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2651 /* send mat->i,j to others, and recv from other's */ 2652 /*------------------------------------------------*/ 2653 for (i=0; i<nsends; i++){ 2654 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2655 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2656 } 2657 /* wait on receives of mat->i,j */ 2658 /*------------------------------*/ 2659 count = nrecvs; 2660 while (count) { 2661 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2662 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2663 count--; 2664 } 2665 /* wait on sends of mat->i,j */ 2666 /*---------------------------*/ 2667 if (nsends) { 2668 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2669 } 2670 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2671 2672 /* post receives, send and receive mat->a */ 2673 /*----------------------------------------*/ 2674 for (imdex=0; imdex<nrecvs; imdex++) { 2675 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2676 } 2677 for (i=0; i<nsends; i++){ 2678 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2679 } 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 if (nsends) { 2687 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2688 } 2689 2690 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2691 2692 /* create redundant matrix */ 2693 /*-------------------------*/ 2694 if (reuse == MAT_INITIAL_MATRIX){ 2695 /* compute rownz_max for preallocation */ 2696 for (imdex=0; imdex<nrecvs; imdex++){ 2697 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2698 rptr = rbuf_j[imdex]; 2699 for (i=0; i<j; i++){ 2700 ncols = rptr[i+1] - rptr[i]; 2701 if (rownz_max < ncols) rownz_max = ncols; 2702 } 2703 } 2704 2705 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2706 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2707 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2708 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2709 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2710 } else { 2711 C = *matredundant; 2712 } 2713 2714 /* insert local matrix entries */ 2715 rptr = sbuf_j; 2716 cols = sbuf_j + rend-rstart + 1; 2717 vals = sbuf_a; 2718 for (i=0; i<rend-rstart; i++){ 2719 row = i + rstart; 2720 ncols = rptr[i+1] - rptr[i]; 2721 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2722 vals += ncols; 2723 cols += ncols; 2724 } 2725 /* insert received matrix entries */ 2726 for (imdex=0; imdex<nrecvs; imdex++){ 2727 rstart = rowrange[recv_rank[imdex]]; 2728 rend = rowrange[recv_rank[imdex]+1]; 2729 rptr = rbuf_j[imdex]; 2730 cols = rbuf_j[imdex] + rend-rstart + 1; 2731 vals = rbuf_a[imdex]; 2732 for (i=0; i<rend-rstart; i++){ 2733 row = i + rstart; 2734 ncols = rptr[i+1] - rptr[i]; 2735 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2736 vals += ncols; 2737 cols += ncols; 2738 } 2739 } 2740 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2741 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2743 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2744 if (reuse == MAT_INITIAL_MATRIX) { 2745 PetscContainer container; 2746 *matredundant = C; 2747 /* create a supporting struct and attach it to C for reuse */ 2748 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2749 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2750 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2751 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2752 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2753 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2754 2755 redund->nzlocal = nzlocal; 2756 redund->nsends = nsends; 2757 redund->nrecvs = nrecvs; 2758 redund->send_rank = send_rank; 2759 redund->recv_rank = recv_rank; 2760 redund->sbuf_nz = sbuf_nz; 2761 redund->rbuf_nz = rbuf_nz; 2762 redund->sbuf_j = sbuf_j; 2763 redund->sbuf_a = sbuf_a; 2764 redund->rbuf_j = rbuf_j; 2765 redund->rbuf_a = rbuf_a; 2766 2767 redund->Destroy = C->ops->destroy; 2768 C->ops->destroy = MatDestroy_MatRedundant; 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2775 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2776 { 2777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2778 PetscErrorCode ierr; 2779 PetscInt i,*idxb = 0; 2780 PetscScalar *va,*vb; 2781 Vec vtmp; 2782 2783 PetscFunctionBegin; 2784 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2785 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2786 if (idx) { 2787 for (i=0; i<A->rmap->n; i++) { 2788 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2789 } 2790 } 2791 2792 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2793 if (idx) { 2794 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2795 } 2796 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2797 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2798 2799 for (i=0; i<A->rmap->n; i++){ 2800 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2801 va[i] = vb[i]; 2802 if (idx) idx[i] = a->garray[idxb[i]]; 2803 } 2804 } 2805 2806 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2807 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2808 ierr = PetscFree(idxb);CHKERRQ(ierr); 2809 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 #undef __FUNCT__ 2814 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2815 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2816 { 2817 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2818 PetscErrorCode ierr; 2819 PetscInt i,*idxb = 0; 2820 PetscScalar *va,*vb; 2821 Vec vtmp; 2822 2823 PetscFunctionBegin; 2824 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2825 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2826 if (idx) { 2827 for (i=0; i<A->cmap->n; i++) { 2828 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2829 } 2830 } 2831 2832 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2833 if (idx) { 2834 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2835 } 2836 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2837 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2838 2839 for (i=0; i<A->rmap->n; i++){ 2840 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2841 va[i] = vb[i]; 2842 if (idx) idx[i] = a->garray[idxb[i]]; 2843 } 2844 } 2845 2846 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2847 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2848 ierr = PetscFree(idxb);CHKERRQ(ierr); 2849 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2855 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2856 { 2857 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2858 PetscInt n = A->rmap->n; 2859 PetscInt cstart = A->cmap->rstart; 2860 PetscInt *cmap = mat->garray; 2861 PetscInt *diagIdx, *offdiagIdx; 2862 Vec diagV, offdiagV; 2863 PetscScalar *a, *diagA, *offdiagA; 2864 PetscInt r; 2865 PetscErrorCode ierr; 2866 2867 PetscFunctionBegin; 2868 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2869 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2870 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2871 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2872 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2873 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2874 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2875 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2876 for(r = 0; r < n; ++r) { 2877 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2878 a[r] = diagA[r]; 2879 idx[r] = cstart + diagIdx[r]; 2880 } else { 2881 a[r] = offdiagA[r]; 2882 idx[r] = cmap[offdiagIdx[r]]; 2883 } 2884 } 2885 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2886 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2887 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2888 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2889 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2890 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2891 PetscFunctionReturn(0); 2892 } 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2896 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2897 { 2898 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2899 PetscInt n = A->rmap->n; 2900 PetscInt cstart = A->cmap->rstart; 2901 PetscInt *cmap = mat->garray; 2902 PetscInt *diagIdx, *offdiagIdx; 2903 Vec diagV, offdiagV; 2904 PetscScalar *a, *diagA, *offdiagA; 2905 PetscInt r; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2910 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2911 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2912 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2913 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2914 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2915 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2916 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2917 for(r = 0; r < n; ++r) { 2918 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2919 a[r] = diagA[r]; 2920 idx[r] = cstart + diagIdx[r]; 2921 } else { 2922 a[r] = offdiagA[r]; 2923 idx[r] = cmap[offdiagIdx[r]]; 2924 } 2925 } 2926 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2929 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2930 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2931 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2937 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2938 { 2939 PetscErrorCode ierr; 2940 Mat *dummy; 2941 2942 PetscFunctionBegin; 2943 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2944 *newmat = *dummy; 2945 ierr = PetscFree(dummy);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2953 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2954 { 2955 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2960 PetscFunctionReturn(0); 2961 } 2962 2963 2964 /* -------------------------------------------------------------------*/ 2965 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2966 MatGetRow_MPIAIJ, 2967 MatRestoreRow_MPIAIJ, 2968 MatMult_MPIAIJ, 2969 /* 4*/ MatMultAdd_MPIAIJ, 2970 MatMultTranspose_MPIAIJ, 2971 MatMultTransposeAdd_MPIAIJ, 2972 #ifdef PETSC_HAVE_PBGL 2973 MatSolve_MPIAIJ, 2974 #else 2975 0, 2976 #endif 2977 0, 2978 0, 2979 /*10*/ 0, 2980 0, 2981 0, 2982 MatSOR_MPIAIJ, 2983 MatTranspose_MPIAIJ, 2984 /*15*/ MatGetInfo_MPIAIJ, 2985 MatEqual_MPIAIJ, 2986 MatGetDiagonal_MPIAIJ, 2987 MatDiagonalScale_MPIAIJ, 2988 MatNorm_MPIAIJ, 2989 /*20*/ MatAssemblyBegin_MPIAIJ, 2990 MatAssemblyEnd_MPIAIJ, 2991 MatSetOption_MPIAIJ, 2992 MatZeroEntries_MPIAIJ, 2993 /*24*/ MatZeroRows_MPIAIJ, 2994 0, 2995 #ifdef PETSC_HAVE_PBGL 2996 0, 2997 #else 2998 0, 2999 #endif 3000 0, 3001 0, 3002 /*29*/ MatSetUpPreallocation_MPIAIJ, 3003 #ifdef PETSC_HAVE_PBGL 3004 0, 3005 #else 3006 0, 3007 #endif 3008 0, 3009 0, 3010 0, 3011 /*34*/ MatDuplicate_MPIAIJ, 3012 0, 3013 0, 3014 0, 3015 0, 3016 /*39*/ MatAXPY_MPIAIJ, 3017 MatGetSubMatrices_MPIAIJ, 3018 MatIncreaseOverlap_MPIAIJ, 3019 MatGetValues_MPIAIJ, 3020 MatCopy_MPIAIJ, 3021 /*44*/ MatGetRowMax_MPIAIJ, 3022 MatScale_MPIAIJ, 3023 0, 3024 0, 3025 MatZeroRowsColumns_MPIAIJ, 3026 /*49*/ MatSetBlockSize_MPIAIJ, 3027 0, 3028 0, 3029 0, 3030 0, 3031 /*54*/ MatFDColoringCreate_MPIAIJ, 3032 0, 3033 MatSetUnfactored_MPIAIJ, 3034 MatPermute_MPIAIJ, 3035 0, 3036 /*59*/ MatGetSubMatrix_MPIAIJ, 3037 MatDestroy_MPIAIJ, 3038 MatView_MPIAIJ, 3039 0, 3040 0, 3041 /*64*/ 0, 3042 0, 3043 0, 3044 0, 3045 0, 3046 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3047 MatGetRowMinAbs_MPIAIJ, 3048 0, 3049 MatSetColoring_MPIAIJ, 3050 #if defined(PETSC_HAVE_ADIC) 3051 MatSetValuesAdic_MPIAIJ, 3052 #else 3053 0, 3054 #endif 3055 MatSetValuesAdifor_MPIAIJ, 3056 /*75*/ MatFDColoringApply_AIJ, 3057 0, 3058 0, 3059 0, 3060 0, 3061 /*80*/ 0, 3062 0, 3063 0, 3064 /*83*/ MatLoad_MPIAIJ, 3065 0, 3066 0, 3067 0, 3068 0, 3069 0, 3070 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3071 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3072 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3073 MatPtAP_Basic, 3074 MatPtAPSymbolic_MPIAIJ, 3075 /*94*/ MatPtAPNumeric_MPIAIJ, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*99*/ 0, 3081 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3082 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3083 MatConjugate_MPIAIJ, 3084 0, 3085 /*104*/MatSetValuesRow_MPIAIJ, 3086 MatRealPart_MPIAIJ, 3087 MatImaginaryPart_MPIAIJ, 3088 0, 3089 0, 3090 /*109*/0, 3091 MatGetRedundantMatrix_MPIAIJ, 3092 MatGetRowMin_MPIAIJ, 3093 0, 3094 0, 3095 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3096 0, 3097 0, 3098 0, 3099 0, 3100 /*119*/0, 3101 0, 3102 0, 3103 0, 3104 MatGetMultiProcBlock_MPIAIJ, 3105 /*124*/MatFindNonZeroRows_MPIAIJ, 3106 MatGetColumnNorms_MPIAIJ, 3107 MatInvertBlockDiagonal_MPIAIJ, 3108 0, 3109 MatGetSubMatricesParallel_MPIAIJ, 3110 /*129*/0, 3111 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3112 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3113 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3114 0, 3115 /*134*/0, 3116 0, 3117 0, 3118 0, 3119 0 3120 }; 3121 3122 /* ----------------------------------------------------------------------------------------*/ 3123 3124 EXTERN_C_BEGIN 3125 #undef __FUNCT__ 3126 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3127 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3128 { 3129 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3130 PetscErrorCode ierr; 3131 3132 PetscFunctionBegin; 3133 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3134 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3135 PetscFunctionReturn(0); 3136 } 3137 EXTERN_C_END 3138 3139 EXTERN_C_BEGIN 3140 #undef __FUNCT__ 3141 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3142 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3143 { 3144 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3149 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3150 PetscFunctionReturn(0); 3151 } 3152 EXTERN_C_END 3153 3154 EXTERN_C_BEGIN 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3157 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3158 { 3159 Mat_MPIAIJ *b; 3160 PetscErrorCode ierr; 3161 PetscInt i; 3162 3163 PetscFunctionBegin; 3164 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3165 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3166 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3167 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3168 3169 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3170 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3171 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3172 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3173 if (d_nnz) { 3174 for (i=0; i<B->rmap->n; i++) { 3175 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3176 } 3177 } 3178 if (o_nnz) { 3179 for (i=0; i<B->rmap->n; i++) { 3180 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3181 } 3182 } 3183 b = (Mat_MPIAIJ*)B->data; 3184 3185 if (!B->preallocated) { 3186 /* Explicitly create 2 MATSEQAIJ matrices. */ 3187 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3188 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3189 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3190 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3191 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3192 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3193 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3194 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3195 } 3196 3197 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3198 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3199 B->preallocated = PETSC_TRUE; 3200 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 EXTERN_C_END 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3207 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3208 { 3209 Mat mat; 3210 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 *newmat = 0; 3215 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3216 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3217 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3218 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3219 a = (Mat_MPIAIJ*)mat->data; 3220 3221 mat->factortype = matin->factortype; 3222 mat->rmap->bs = matin->rmap->bs; 3223 mat->assembled = PETSC_TRUE; 3224 mat->insertmode = NOT_SET_VALUES; 3225 mat->preallocated = PETSC_TRUE; 3226 3227 a->size = oldmat->size; 3228 a->rank = oldmat->rank; 3229 a->donotstash = oldmat->donotstash; 3230 a->roworiented = oldmat->roworiented; 3231 a->rowindices = 0; 3232 a->rowvalues = 0; 3233 a->getrowactive = PETSC_FALSE; 3234 3235 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3236 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3237 3238 if (oldmat->colmap) { 3239 #if defined (PETSC_USE_CTABLE) 3240 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3241 #else 3242 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3243 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3244 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3245 #endif 3246 } else a->colmap = 0; 3247 if (oldmat->garray) { 3248 PetscInt len; 3249 len = oldmat->B->cmap->n; 3250 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3251 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3252 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3253 } else a->garray = 0; 3254 3255 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3257 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3258 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3259 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3260 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3261 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3262 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3263 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3264 *newmat = mat; 3265 PetscFunctionReturn(0); 3266 } 3267 3268 3269 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatLoad_MPIAIJ" 3272 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3273 { 3274 PetscScalar *vals,*svals; 3275 MPI_Comm comm = ((PetscObject)viewer)->comm; 3276 PetscErrorCode ierr; 3277 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3278 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3279 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3280 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3281 PetscInt cend,cstart,n,*rowners,sizesset=1; 3282 int fd; 3283 3284 PetscFunctionBegin; 3285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3286 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3287 if (!rank) { 3288 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3289 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3290 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3291 } 3292 3293 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3294 3295 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3296 M = header[1]; N = header[2]; 3297 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3298 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3299 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3300 3301 /* If global sizes are set, check if they are consistent with that given in the file */ 3302 if (sizesset) { 3303 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3304 } 3305 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3306 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3307 3308 /* determine ownership of all rows */ 3309 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3310 else m = newMat->rmap->n; /* Set by user */ 3311 3312 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3313 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3314 3315 /* First process needs enough room for process with most rows */ 3316 if (!rank) { 3317 mmax = rowners[1]; 3318 for (i=2; i<size; i++) { 3319 mmax = PetscMax(mmax,rowners[i]); 3320 } 3321 } else mmax = m; 3322 3323 rowners[0] = 0; 3324 for (i=2; i<=size; i++) { 3325 rowners[i] += rowners[i-1]; 3326 } 3327 rstart = rowners[rank]; 3328 rend = rowners[rank+1]; 3329 3330 /* distribute row lengths to all processors */ 3331 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3332 if (!rank) { 3333 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3334 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3335 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3336 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3337 for (j=0; j<m; j++) { 3338 procsnz[0] += ourlens[j]; 3339 } 3340 for (i=1; i<size; i++) { 3341 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3342 /* calculate the number of nonzeros on each processor */ 3343 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3344 procsnz[i] += rowlengths[j]; 3345 } 3346 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3347 } 3348 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3349 } else { 3350 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3351 } 3352 3353 if (!rank) { 3354 /* determine max buffer needed and allocate it */ 3355 maxnz = 0; 3356 for (i=0; i<size; i++) { 3357 maxnz = PetscMax(maxnz,procsnz[i]); 3358 } 3359 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3360 3361 /* read in my part of the matrix column indices */ 3362 nz = procsnz[0]; 3363 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3364 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3365 3366 /* read in every one elses and ship off */ 3367 for (i=1; i<size; i++) { 3368 nz = procsnz[i]; 3369 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3370 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3371 } 3372 ierr = PetscFree(cols);CHKERRQ(ierr); 3373 } else { 3374 /* determine buffer space needed for message */ 3375 nz = 0; 3376 for (i=0; i<m; i++) { 3377 nz += ourlens[i]; 3378 } 3379 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3380 3381 /* receive message of column indices*/ 3382 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3383 } 3384 3385 /* determine column ownership if matrix is not square */ 3386 if (N != M) { 3387 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3388 else n = newMat->cmap->n; 3389 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3390 cstart = cend - n; 3391 } else { 3392 cstart = rstart; 3393 cend = rend; 3394 n = cend - cstart; 3395 } 3396 3397 /* loop over local rows, determining number of off diagonal entries */ 3398 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3399 jj = 0; 3400 for (i=0; i<m; i++) { 3401 for (j=0; j<ourlens[i]; j++) { 3402 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3403 jj++; 3404 } 3405 } 3406 3407 for (i=0; i<m; i++) { 3408 ourlens[i] -= offlens[i]; 3409 } 3410 if (!sizesset) { 3411 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3412 } 3413 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3414 3415 for (i=0; i<m; i++) { 3416 ourlens[i] += offlens[i]; 3417 } 3418 3419 if (!rank) { 3420 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3421 3422 /* read in my part of the matrix numerical values */ 3423 nz = procsnz[0]; 3424 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3425 3426 /* insert into matrix */ 3427 jj = rstart; 3428 smycols = mycols; 3429 svals = vals; 3430 for (i=0; i<m; i++) { 3431 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3432 smycols += ourlens[i]; 3433 svals += ourlens[i]; 3434 jj++; 3435 } 3436 3437 /* read in other processors and ship out */ 3438 for (i=1; i<size; i++) { 3439 nz = procsnz[i]; 3440 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3441 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3442 } 3443 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3444 } else { 3445 /* receive numeric values */ 3446 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3447 3448 /* receive message of values*/ 3449 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3450 3451 /* insert into matrix */ 3452 jj = rstart; 3453 smycols = mycols; 3454 svals = vals; 3455 for (i=0; i<m; i++) { 3456 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3457 smycols += ourlens[i]; 3458 svals += ourlens[i]; 3459 jj++; 3460 } 3461 } 3462 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3463 ierr = PetscFree(vals);CHKERRQ(ierr); 3464 ierr = PetscFree(mycols);CHKERRQ(ierr); 3465 ierr = PetscFree(rowners);CHKERRQ(ierr); 3466 3467 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3469 PetscFunctionReturn(0); 3470 } 3471 3472 #undef __FUNCT__ 3473 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3474 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3475 { 3476 PetscErrorCode ierr; 3477 IS iscol_local; 3478 PetscInt csize; 3479 3480 PetscFunctionBegin; 3481 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3482 if (call == MAT_REUSE_MATRIX) { 3483 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3484 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3485 } else { 3486 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3487 } 3488 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3489 if (call == MAT_INITIAL_MATRIX) { 3490 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3491 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3492 } 3493 PetscFunctionReturn(0); 3494 } 3495 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3498 /* 3499 Not great since it makes two copies of the submatrix, first an SeqAIJ 3500 in local and then by concatenating the local matrices the end result. 3501 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3502 3503 Note: This requires a sequential iscol with all indices. 3504 */ 3505 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3506 { 3507 PetscErrorCode ierr; 3508 PetscMPIInt rank,size; 3509 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3510 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3511 Mat *local,M,Mreuse; 3512 MatScalar *vwork,*aa; 3513 MPI_Comm comm = ((PetscObject)mat)->comm; 3514 Mat_SeqAIJ *aij; 3515 3516 3517 PetscFunctionBegin; 3518 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3520 3521 if (call == MAT_REUSE_MATRIX) { 3522 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3523 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3524 local = &Mreuse; 3525 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3526 } else { 3527 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3528 Mreuse = *local; 3529 ierr = PetscFree(local);CHKERRQ(ierr); 3530 } 3531 3532 /* 3533 m - number of local rows 3534 n - number of columns (same on all processors) 3535 rstart - first row in new global matrix generated 3536 */ 3537 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3538 if (call == MAT_INITIAL_MATRIX) { 3539 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3540 ii = aij->i; 3541 jj = aij->j; 3542 3543 /* 3544 Determine the number of non-zeros in the diagonal and off-diagonal 3545 portions of the matrix in order to do correct preallocation 3546 */ 3547 3548 /* first get start and end of "diagonal" columns */ 3549 if (csize == PETSC_DECIDE) { 3550 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3551 if (mglobal == n) { /* square matrix */ 3552 nlocal = m; 3553 } else { 3554 nlocal = n/size + ((n % size) > rank); 3555 } 3556 } else { 3557 nlocal = csize; 3558 } 3559 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3560 rstart = rend - nlocal; 3561 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3562 3563 /* next, compute all the lengths */ 3564 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3565 olens = dlens + m; 3566 for (i=0; i<m; i++) { 3567 jend = ii[i+1] - ii[i]; 3568 olen = 0; 3569 dlen = 0; 3570 for (j=0; j<jend; j++) { 3571 if (*jj < rstart || *jj >= rend) olen++; 3572 else dlen++; 3573 jj++; 3574 } 3575 olens[i] = olen; 3576 dlens[i] = dlen; 3577 } 3578 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3579 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3580 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3581 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3582 ierr = PetscFree(dlens);CHKERRQ(ierr); 3583 } else { 3584 PetscInt ml,nl; 3585 3586 M = *newmat; 3587 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3588 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3589 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3590 /* 3591 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3592 rather than the slower MatSetValues(). 3593 */ 3594 M->was_assembled = PETSC_TRUE; 3595 M->assembled = PETSC_FALSE; 3596 } 3597 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3598 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3599 ii = aij->i; 3600 jj = aij->j; 3601 aa = aij->a; 3602 for (i=0; i<m; i++) { 3603 row = rstart + i; 3604 nz = ii[i+1] - ii[i]; 3605 cwork = jj; jj += nz; 3606 vwork = aa; aa += nz; 3607 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3608 } 3609 3610 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 *newmat = M; 3613 3614 /* save submatrix used in processor for next request */ 3615 if (call == MAT_INITIAL_MATRIX) { 3616 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3617 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3618 } 3619 3620 PetscFunctionReturn(0); 3621 } 3622 3623 EXTERN_C_BEGIN 3624 #undef __FUNCT__ 3625 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3626 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3627 { 3628 PetscInt m,cstart, cend,j,nnz,i,d; 3629 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3630 const PetscInt *JJ; 3631 PetscScalar *values; 3632 PetscErrorCode ierr; 3633 3634 PetscFunctionBegin; 3635 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3636 3637 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3638 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3639 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3640 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3641 m = B->rmap->n; 3642 cstart = B->cmap->rstart; 3643 cend = B->cmap->rend; 3644 rstart = B->rmap->rstart; 3645 3646 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3647 3648 #if defined(PETSC_USE_DEBUGGING) 3649 for (i=0; i<m; i++) { 3650 nnz = Ii[i+1]- Ii[i]; 3651 JJ = J + Ii[i]; 3652 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3653 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3654 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3655 } 3656 #endif 3657 3658 for (i=0; i<m; i++) { 3659 nnz = Ii[i+1]- Ii[i]; 3660 JJ = J + Ii[i]; 3661 nnz_max = PetscMax(nnz_max,nnz); 3662 d = 0; 3663 for (j=0; j<nnz; j++) { 3664 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3665 } 3666 d_nnz[i] = d; 3667 o_nnz[i] = nnz - d; 3668 } 3669 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3670 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3671 3672 if (v) values = (PetscScalar*)v; 3673 else { 3674 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3675 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3676 } 3677 3678 for (i=0; i<m; i++) { 3679 ii = i + rstart; 3680 nnz = Ii[i+1]- Ii[i]; 3681 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3682 } 3683 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 3686 if (!v) { 3687 ierr = PetscFree(values);CHKERRQ(ierr); 3688 } 3689 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3690 PetscFunctionReturn(0); 3691 } 3692 EXTERN_C_END 3693 3694 #undef __FUNCT__ 3695 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3696 /*@ 3697 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3698 (the default parallel PETSc format). 3699 3700 Collective on MPI_Comm 3701 3702 Input Parameters: 3703 + B - the matrix 3704 . i - the indices into j for the start of each local row (starts with zero) 3705 . j - the column indices for each local row (starts with zero) 3706 - v - optional values in the matrix 3707 3708 Level: developer 3709 3710 Notes: 3711 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3712 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3713 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3714 3715 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3716 3717 The format which is used for the sparse matrix input, is equivalent to a 3718 row-major ordering.. i.e for the following matrix, the input data expected is 3719 as shown: 3720 3721 1 0 0 3722 2 0 3 P0 3723 ------- 3724 4 5 6 P1 3725 3726 Process0 [P0]: rows_owned=[0,1] 3727 i = {0,1,3} [size = nrow+1 = 2+1] 3728 j = {0,0,2} [size = nz = 6] 3729 v = {1,2,3} [size = nz = 6] 3730 3731 Process1 [P1]: rows_owned=[2] 3732 i = {0,3} [size = nrow+1 = 1+1] 3733 j = {0,1,2} [size = nz = 6] 3734 v = {4,5,6} [size = nz = 6] 3735 3736 .keywords: matrix, aij, compressed row, sparse, parallel 3737 3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3739 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3740 @*/ 3741 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3742 { 3743 PetscErrorCode ierr; 3744 3745 PetscFunctionBegin; 3746 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3752 /*@C 3753 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3754 (the default parallel PETSc format). For good matrix assembly performance 3755 the user should preallocate the matrix storage by setting the parameters 3756 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3757 performance can be increased by more than a factor of 50. 3758 3759 Collective on MPI_Comm 3760 3761 Input Parameters: 3762 + A - the matrix 3763 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3764 (same value is used for all local rows) 3765 . d_nnz - array containing the number of nonzeros in the various rows of the 3766 DIAGONAL portion of the local submatrix (possibly different for each row) 3767 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3768 The size of this array is equal to the number of local rows, i.e 'm'. 3769 For matrices that will be factored, you must leave room for (and set) 3770 the diagonal entry even if it is zero. 3771 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3772 submatrix (same value is used for all local rows). 3773 - o_nnz - array containing the number of nonzeros in the various rows of the 3774 OFF-DIAGONAL portion of the local submatrix (possibly different for 3775 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3776 structure. The size of this array is equal to the number 3777 of local rows, i.e 'm'. 3778 3779 If the *_nnz parameter is given then the *_nz parameter is ignored 3780 3781 The AIJ format (also called the Yale sparse matrix format or 3782 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3783 storage. The stored row and column indices begin with zero. 3784 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3785 3786 The parallel matrix is partitioned such that the first m0 rows belong to 3787 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3788 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3789 3790 The DIAGONAL portion of the local submatrix of a processor can be defined 3791 as the submatrix which is obtained by extraction the part corresponding to 3792 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3793 first row that belongs to the processor, r2 is the last row belonging to 3794 the this processor, and c1-c2 is range of indices of the local part of a 3795 vector suitable for applying the matrix to. This is an mxn matrix. In the 3796 common case of a square matrix, the row and column ranges are the same and 3797 the DIAGONAL part is also square. The remaining portion of the local 3798 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3799 3800 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3801 3802 You can call MatGetInfo() to get information on how effective the preallocation was; 3803 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3804 You can also run with the option -info and look for messages with the string 3805 malloc in them to see if additional memory allocation was needed. 3806 3807 Example usage: 3808 3809 Consider the following 8x8 matrix with 34 non-zero values, that is 3810 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3811 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3812 as follows: 3813 3814 .vb 3815 1 2 0 | 0 3 0 | 0 4 3816 Proc0 0 5 6 | 7 0 0 | 8 0 3817 9 0 10 | 11 0 0 | 12 0 3818 ------------------------------------- 3819 13 0 14 | 15 16 17 | 0 0 3820 Proc1 0 18 0 | 19 20 21 | 0 0 3821 0 0 0 | 22 23 0 | 24 0 3822 ------------------------------------- 3823 Proc2 25 26 27 | 0 0 28 | 29 0 3824 30 0 0 | 31 32 33 | 0 34 3825 .ve 3826 3827 This can be represented as a collection of submatrices as: 3828 3829 .vb 3830 A B C 3831 D E F 3832 G H I 3833 .ve 3834 3835 Where the submatrices A,B,C are owned by proc0, D,E,F are 3836 owned by proc1, G,H,I are owned by proc2. 3837 3838 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3839 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3840 The 'M','N' parameters are 8,8, and have the same values on all procs. 3841 3842 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3843 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3844 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3845 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3846 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3847 matrix, ans [DF] as another SeqAIJ matrix. 3848 3849 When d_nz, o_nz parameters are specified, d_nz storage elements are 3850 allocated for every row of the local diagonal submatrix, and o_nz 3851 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3852 One way to choose d_nz and o_nz is to use the max nonzerors per local 3853 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3854 In this case, the values of d_nz,o_nz are: 3855 .vb 3856 proc0 : dnz = 2, o_nz = 2 3857 proc1 : dnz = 3, o_nz = 2 3858 proc2 : dnz = 1, o_nz = 4 3859 .ve 3860 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3861 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3862 for proc3. i.e we are using 12+15+10=37 storage locations to store 3863 34 values. 3864 3865 When d_nnz, o_nnz parameters are specified, the storage is specified 3866 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3867 In the above case the values for d_nnz,o_nnz are: 3868 .vb 3869 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3870 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3871 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3872 .ve 3873 Here the space allocated is sum of all the above values i.e 34, and 3874 hence pre-allocation is perfect. 3875 3876 Level: intermediate 3877 3878 .keywords: matrix, aij, compressed row, sparse, parallel 3879 3880 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3881 MPIAIJ, MatGetInfo() 3882 @*/ 3883 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3884 { 3885 PetscErrorCode ierr; 3886 3887 PetscFunctionBegin; 3888 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3889 PetscValidType(B,1); 3890 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3891 PetscFunctionReturn(0); 3892 } 3893 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3896 /*@ 3897 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3898 CSR format the local rows. 3899 3900 Collective on MPI_Comm 3901 3902 Input Parameters: 3903 + comm - MPI communicator 3904 . m - number of local rows (Cannot be PETSC_DECIDE) 3905 . n - This value should be the same as the local size used in creating the 3906 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3907 calculated if N is given) For square matrices n is almost always m. 3908 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3909 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3910 . i - row indices 3911 . j - column indices 3912 - a - matrix values 3913 3914 Output Parameter: 3915 . mat - the matrix 3916 3917 Level: intermediate 3918 3919 Notes: 3920 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3921 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3922 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3923 3924 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3925 3926 The format which is used for the sparse matrix input, is equivalent to a 3927 row-major ordering.. i.e for the following matrix, the input data expected is 3928 as shown: 3929 3930 1 0 0 3931 2 0 3 P0 3932 ------- 3933 4 5 6 P1 3934 3935 Process0 [P0]: rows_owned=[0,1] 3936 i = {0,1,3} [size = nrow+1 = 2+1] 3937 j = {0,0,2} [size = nz = 6] 3938 v = {1,2,3} [size = nz = 6] 3939 3940 Process1 [P1]: rows_owned=[2] 3941 i = {0,3} [size = nrow+1 = 1+1] 3942 j = {0,1,2} [size = nz = 6] 3943 v = {4,5,6} [size = nz = 6] 3944 3945 .keywords: matrix, aij, compressed row, sparse, parallel 3946 3947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3948 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3949 @*/ 3950 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3951 { 3952 PetscErrorCode ierr; 3953 3954 PetscFunctionBegin; 3955 if (i[0]) { 3956 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3957 } 3958 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3959 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3960 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3961 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3962 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 #undef __FUNCT__ 3967 #define __FUNCT__ "MatCreateMPIAIJ" 3968 /*@C 3969 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3970 (the default parallel PETSc format). For good matrix assembly performance 3971 the user should preallocate the matrix storage by setting the parameters 3972 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3973 performance can be increased by more than a factor of 50. 3974 3975 Collective on MPI_Comm 3976 3977 Input Parameters: 3978 + comm - MPI communicator 3979 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3980 This value should be the same as the local size used in creating the 3981 y vector for the matrix-vector product y = Ax. 3982 . n - This value should be the same as the local size used in creating the 3983 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3984 calculated if N is given) For square matrices n is almost always m. 3985 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3986 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3987 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3988 (same value is used for all local rows) 3989 . d_nnz - array containing the number of nonzeros in the various rows of the 3990 DIAGONAL portion of the local submatrix (possibly different for each row) 3991 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3992 The size of this array is equal to the number of local rows, i.e 'm'. 3993 You must leave room for the diagonal entry even if it is zero. 3994 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3995 submatrix (same value is used for all local rows). 3996 - o_nnz - array containing the number of nonzeros in the various rows of the 3997 OFF-DIAGONAL portion of the local submatrix (possibly different for 3998 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3999 structure. The size of this array is equal to the number 4000 of local rows, i.e 'm'. 4001 4002 Output Parameter: 4003 . A - the matrix 4004 4005 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4006 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4007 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4008 4009 Notes: 4010 If the *_nnz parameter is given then the *_nz parameter is ignored 4011 4012 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4013 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4014 storage requirements for this matrix. 4015 4016 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4017 processor than it must be used on all processors that share the object for 4018 that argument. 4019 4020 The user MUST specify either the local or global matrix dimensions 4021 (possibly both). 4022 4023 The parallel matrix is partitioned across processors such that the 4024 first m0 rows belong to process 0, the next m1 rows belong to 4025 process 1, the next m2 rows belong to process 2 etc.. where 4026 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4027 values corresponding to [m x N] submatrix. 4028 4029 The columns are logically partitioned with the n0 columns belonging 4030 to 0th partition, the next n1 columns belonging to the next 4031 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4032 4033 The DIAGONAL portion of the local submatrix on any given processor 4034 is the submatrix corresponding to the rows and columns m,n 4035 corresponding to the given processor. i.e diagonal matrix on 4036 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4037 etc. The remaining portion of the local submatrix [m x (N-n)] 4038 constitute the OFF-DIAGONAL portion. The example below better 4039 illustrates this concept. 4040 4041 For a square global matrix we define each processor's diagonal portion 4042 to be its local rows and the corresponding columns (a square submatrix); 4043 each processor's off-diagonal portion encompasses the remainder of the 4044 local matrix (a rectangular submatrix). 4045 4046 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4047 4048 When calling this routine with a single process communicator, a matrix of 4049 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4050 type of communicator, use the construction mechanism: 4051 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4052 4053 By default, this format uses inodes (identical nodes) when possible. 4054 We search for consecutive rows with the same nonzero structure, thereby 4055 reusing matrix information to achieve increased efficiency. 4056 4057 Options Database Keys: 4058 + -mat_no_inode - Do not use inodes 4059 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4060 - -mat_aij_oneindex - Internally use indexing starting at 1 4061 rather than 0. Note that when calling MatSetValues(), 4062 the user still MUST index entries starting at 0! 4063 4064 4065 Example usage: 4066 4067 Consider the following 8x8 matrix with 34 non-zero values, that is 4068 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4069 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4070 as follows: 4071 4072 .vb 4073 1 2 0 | 0 3 0 | 0 4 4074 Proc0 0 5 6 | 7 0 0 | 8 0 4075 9 0 10 | 11 0 0 | 12 0 4076 ------------------------------------- 4077 13 0 14 | 15 16 17 | 0 0 4078 Proc1 0 18 0 | 19 20 21 | 0 0 4079 0 0 0 | 22 23 0 | 24 0 4080 ------------------------------------- 4081 Proc2 25 26 27 | 0 0 28 | 29 0 4082 30 0 0 | 31 32 33 | 0 34 4083 .ve 4084 4085 This can be represented as a collection of submatrices as: 4086 4087 .vb 4088 A B C 4089 D E F 4090 G H I 4091 .ve 4092 4093 Where the submatrices A,B,C are owned by proc0, D,E,F are 4094 owned by proc1, G,H,I are owned by proc2. 4095 4096 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4098 The 'M','N' parameters are 8,8, and have the same values on all procs. 4099 4100 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4101 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4102 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4103 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4104 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4105 matrix, ans [DF] as another SeqAIJ matrix. 4106 4107 When d_nz, o_nz parameters are specified, d_nz storage elements are 4108 allocated for every row of the local diagonal submatrix, and o_nz 4109 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4110 One way to choose d_nz and o_nz is to use the max nonzerors per local 4111 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4112 In this case, the values of d_nz,o_nz are: 4113 .vb 4114 proc0 : dnz = 2, o_nz = 2 4115 proc1 : dnz = 3, o_nz = 2 4116 proc2 : dnz = 1, o_nz = 4 4117 .ve 4118 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4119 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4120 for proc3. i.e we are using 12+15+10=37 storage locations to store 4121 34 values. 4122 4123 When d_nnz, o_nnz parameters are specified, the storage is specified 4124 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4125 In the above case the values for d_nnz,o_nnz are: 4126 .vb 4127 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4128 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4129 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4130 .ve 4131 Here the space allocated is sum of all the above values i.e 34, and 4132 hence pre-allocation is perfect. 4133 4134 Level: intermediate 4135 4136 .keywords: matrix, aij, compressed row, sparse, parallel 4137 4138 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4139 MPIAIJ, MatCreateMPIAIJWithArrays() 4140 @*/ 4141 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4142 { 4143 PetscErrorCode ierr; 4144 PetscMPIInt size; 4145 4146 PetscFunctionBegin; 4147 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4148 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4149 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4150 if (size > 1) { 4151 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4152 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4153 } else { 4154 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4155 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4156 } 4157 PetscFunctionReturn(0); 4158 } 4159 4160 #undef __FUNCT__ 4161 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4162 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4163 { 4164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4165 4166 PetscFunctionBegin; 4167 *Ad = a->A; 4168 *Ao = a->B; 4169 *colmap = a->garray; 4170 PetscFunctionReturn(0); 4171 } 4172 4173 #undef __FUNCT__ 4174 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4175 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4176 { 4177 PetscErrorCode ierr; 4178 PetscInt i; 4179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4180 4181 PetscFunctionBegin; 4182 if (coloring->ctype == IS_COLORING_GLOBAL) { 4183 ISColoringValue *allcolors,*colors; 4184 ISColoring ocoloring; 4185 4186 /* set coloring for diagonal portion */ 4187 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4188 4189 /* set coloring for off-diagonal portion */ 4190 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4191 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4192 for (i=0; i<a->B->cmap->n; i++) { 4193 colors[i] = allcolors[a->garray[i]]; 4194 } 4195 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4196 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4197 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4198 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4199 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4200 ISColoringValue *colors; 4201 PetscInt *larray; 4202 ISColoring ocoloring; 4203 4204 /* set coloring for diagonal portion */ 4205 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4206 for (i=0; i<a->A->cmap->n; i++) { 4207 larray[i] = i + A->cmap->rstart; 4208 } 4209 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4210 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4211 for (i=0; i<a->A->cmap->n; i++) { 4212 colors[i] = coloring->colors[larray[i]]; 4213 } 4214 ierr = PetscFree(larray);CHKERRQ(ierr); 4215 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4216 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4217 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4218 4219 /* set coloring for off-diagonal portion */ 4220 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4221 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4222 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4223 for (i=0; i<a->B->cmap->n; i++) { 4224 colors[i] = coloring->colors[larray[i]]; 4225 } 4226 ierr = PetscFree(larray);CHKERRQ(ierr); 4227 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4228 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4229 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4230 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4231 4232 PetscFunctionReturn(0); 4233 } 4234 4235 #if defined(PETSC_HAVE_ADIC) 4236 #undef __FUNCT__ 4237 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4238 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4239 { 4240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4241 PetscErrorCode ierr; 4242 4243 PetscFunctionBegin; 4244 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4245 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4246 PetscFunctionReturn(0); 4247 } 4248 #endif 4249 4250 #undef __FUNCT__ 4251 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4252 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4253 { 4254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4255 PetscErrorCode ierr; 4256 4257 PetscFunctionBegin; 4258 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4259 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4260 PetscFunctionReturn(0); 4261 } 4262 4263 #undef __FUNCT__ 4264 #define __FUNCT__ "MatMergeSymbolic" 4265 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4266 { 4267 PetscErrorCode ierr; 4268 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4269 PetscInt *indx; 4270 4271 PetscFunctionBegin; 4272 /* This routine will ONLY return MPIAIJ type matrix */ 4273 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4274 if (n == PETSC_DECIDE){ 4275 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4276 } 4277 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4278 rstart -= m; 4279 4280 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4281 for (i=0;i<m;i++) { 4282 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4283 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4284 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4285 } 4286 4287 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4288 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4289 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4290 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4291 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4292 PetscFunctionReturn(0); 4293 } 4294 4295 #undef __FUNCT__ 4296 #define __FUNCT__ "MatMergeNumeric" 4297 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4298 { 4299 PetscErrorCode ierr; 4300 PetscInt m,N,i,rstart,nnz,Ii; 4301 PetscInt *indx; 4302 PetscScalar *values; 4303 4304 PetscFunctionBegin; 4305 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4306 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4307 for (i=0;i<m;i++) { 4308 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4309 Ii = i + rstart; 4310 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4311 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4312 } 4313 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4314 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4315 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4316 PetscFunctionReturn(0); 4317 } 4318 4319 #undef __FUNCT__ 4320 #define __FUNCT__ "MatMerge" 4321 /*@ 4322 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4323 matrices from each processor 4324 4325 Collective on MPI_Comm 4326 4327 Input Parameters: 4328 + comm - the communicators the parallel matrix will live on 4329 . inmat - the input sequential matrices 4330 . n - number of local columns (or PETSC_DECIDE) 4331 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4332 4333 Output Parameter: 4334 . outmat - the parallel matrix generated 4335 4336 Level: advanced 4337 4338 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4339 4340 @*/ 4341 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4342 { 4343 PetscErrorCode ierr; 4344 4345 PetscFunctionBegin; 4346 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4347 if (scall == MAT_INITIAL_MATRIX){ 4348 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4349 } 4350 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4351 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4352 PetscFunctionReturn(0); 4353 } 4354 4355 #undef __FUNCT__ 4356 #define __FUNCT__ "MatFileSplit" 4357 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4358 { 4359 PetscErrorCode ierr; 4360 PetscMPIInt rank; 4361 PetscInt m,N,i,rstart,nnz; 4362 size_t len; 4363 const PetscInt *indx; 4364 PetscViewer out; 4365 char *name; 4366 Mat B; 4367 const PetscScalar *values; 4368 4369 PetscFunctionBegin; 4370 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4371 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4372 /* Should this be the type of the diagonal block of A? */ 4373 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4374 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4375 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4376 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4377 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4378 for (i=0;i<m;i++) { 4379 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4380 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4381 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4382 } 4383 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4385 4386 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4387 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4388 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4389 sprintf(name,"%s.%d",outfile,rank); 4390 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4391 ierr = PetscFree(name); 4392 ierr = MatView(B,out);CHKERRQ(ierr); 4393 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4394 ierr = MatDestroy(&B);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4399 #undef __FUNCT__ 4400 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4401 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4402 { 4403 PetscErrorCode ierr; 4404 Mat_Merge_SeqsToMPI *merge; 4405 PetscContainer container; 4406 4407 PetscFunctionBegin; 4408 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4409 if (container) { 4410 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4411 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4412 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4423 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4424 ierr = PetscFree(merge);CHKERRQ(ierr); 4425 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4426 } 4427 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #include <../src/mat/utils/freespace.h> 4432 #include <petscbt.h> 4433 4434 #undef __FUNCT__ 4435 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4436 /*@C 4437 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4438 matrices from each processor 4439 4440 Collective on MPI_Comm 4441 4442 Input Parameters: 4443 + comm - the communicators the parallel matrix will live on 4444 . seqmat - the input sequential matrices 4445 . m - number of local rows (or PETSC_DECIDE) 4446 . n - number of local columns (or PETSC_DECIDE) 4447 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4448 4449 Output Parameter: 4450 . mpimat - the parallel matrix generated 4451 4452 Level: advanced 4453 4454 Notes: 4455 The dimensions of the sequential matrix in each processor MUST be the same. 4456 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4457 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4458 @*/ 4459 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4460 { 4461 PetscErrorCode ierr; 4462 MPI_Comm comm=((PetscObject)mpimat)->comm; 4463 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4464 PetscMPIInt size,rank,taga,*len_s; 4465 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4466 PetscInt proc,m; 4467 PetscInt **buf_ri,**buf_rj; 4468 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4469 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4470 MPI_Request *s_waits,*r_waits; 4471 MPI_Status *status; 4472 MatScalar *aa=a->a; 4473 MatScalar **abuf_r,*ba_i; 4474 Mat_Merge_SeqsToMPI *merge; 4475 PetscContainer container; 4476 4477 PetscFunctionBegin; 4478 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4479 4480 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4481 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4482 4483 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4484 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4485 4486 bi = merge->bi; 4487 bj = merge->bj; 4488 buf_ri = merge->buf_ri; 4489 buf_rj = merge->buf_rj; 4490 4491 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4492 owners = merge->rowmap->range; 4493 len_s = merge->len_s; 4494 4495 /* send and recv matrix values */ 4496 /*-----------------------------*/ 4497 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4498 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4499 4500 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4501 for (proc=0,k=0; proc<size; proc++){ 4502 if (!len_s[proc]) continue; 4503 i = owners[proc]; 4504 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4505 k++; 4506 } 4507 4508 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4509 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4510 ierr = PetscFree(status);CHKERRQ(ierr); 4511 4512 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4513 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4514 4515 /* insert mat values of mpimat */ 4516 /*----------------------------*/ 4517 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4518 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4519 4520 for (k=0; k<merge->nrecv; k++){ 4521 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4522 nrows = *(buf_ri_k[k]); 4523 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4524 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4525 } 4526 4527 /* set values of ba */ 4528 m = merge->rowmap->n; 4529 for (i=0; i<m; i++) { 4530 arow = owners[rank] + i; 4531 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4532 bnzi = bi[i+1] - bi[i]; 4533 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4534 4535 /* add local non-zero vals of this proc's seqmat into ba */ 4536 anzi = ai[arow+1] - ai[arow]; 4537 aj = a->j + ai[arow]; 4538 aa = a->a + ai[arow]; 4539 nextaj = 0; 4540 for (j=0; nextaj<anzi; j++){ 4541 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4542 ba_i[j] += aa[nextaj++]; 4543 } 4544 } 4545 4546 /* add received vals into ba */ 4547 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4548 /* i-th row */ 4549 if (i == *nextrow[k]) { 4550 anzi = *(nextai[k]+1) - *nextai[k]; 4551 aj = buf_rj[k] + *(nextai[k]); 4552 aa = abuf_r[k] + *(nextai[k]); 4553 nextaj = 0; 4554 for (j=0; nextaj<anzi; j++){ 4555 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4556 ba_i[j] += aa[nextaj++]; 4557 } 4558 } 4559 nextrow[k]++; nextai[k]++; 4560 } 4561 } 4562 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4563 } 4564 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4565 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4566 4567 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4568 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4569 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4570 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4571 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4576 4577 #undef __FUNCT__ 4578 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4579 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4580 { 4581 PetscErrorCode ierr; 4582 Mat B_mpi; 4583 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4584 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4585 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4586 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4587 PetscInt len,proc,*dnz,*onz; 4588 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4589 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4590 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4591 MPI_Status *status; 4592 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4593 PetscBT lnkbt; 4594 Mat_Merge_SeqsToMPI *merge; 4595 PetscContainer container; 4596 4597 PetscFunctionBegin; 4598 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4599 4600 /* make sure it is a PETSc comm */ 4601 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4603 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4604 4605 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4606 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4607 4608 /* determine row ownership */ 4609 /*---------------------------------------------------------*/ 4610 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4611 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4612 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4613 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4614 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4615 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4616 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4617 4618 m = merge->rowmap->n; 4619 M = merge->rowmap->N; 4620 owners = merge->rowmap->range; 4621 4622 /* determine the number of messages to send, their lengths */ 4623 /*---------------------------------------------------------*/ 4624 len_s = merge->len_s; 4625 4626 len = 0; /* length of buf_si[] */ 4627 merge->nsend = 0; 4628 for (proc=0; proc<size; proc++){ 4629 len_si[proc] = 0; 4630 if (proc == rank){ 4631 len_s[proc] = 0; 4632 } else { 4633 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4634 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4635 } 4636 if (len_s[proc]) { 4637 merge->nsend++; 4638 nrows = 0; 4639 for (i=owners[proc]; i<owners[proc+1]; i++){ 4640 if (ai[i+1] > ai[i]) nrows++; 4641 } 4642 len_si[proc] = 2*(nrows+1); 4643 len += len_si[proc]; 4644 } 4645 } 4646 4647 /* determine the number and length of messages to receive for ij-structure */ 4648 /*-------------------------------------------------------------------------*/ 4649 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4650 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4651 4652 /* post the Irecv of j-structure */ 4653 /*-------------------------------*/ 4654 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4655 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4656 4657 /* post the Isend of j-structure */ 4658 /*--------------------------------*/ 4659 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4660 4661 for (proc=0, k=0; proc<size; proc++){ 4662 if (!len_s[proc]) continue; 4663 i = owners[proc]; 4664 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4665 k++; 4666 } 4667 4668 /* receives and sends of j-structure are complete */ 4669 /*------------------------------------------------*/ 4670 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4671 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4672 4673 /* send and recv i-structure */ 4674 /*---------------------------*/ 4675 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4676 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4677 4678 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4679 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4680 for (proc=0,k=0; proc<size; proc++){ 4681 if (!len_s[proc]) continue; 4682 /* form outgoing message for i-structure: 4683 buf_si[0]: nrows to be sent 4684 [1:nrows]: row index (global) 4685 [nrows+1:2*nrows+1]: i-structure index 4686 */ 4687 /*-------------------------------------------*/ 4688 nrows = len_si[proc]/2 - 1; 4689 buf_si_i = buf_si + nrows+1; 4690 buf_si[0] = nrows; 4691 buf_si_i[0] = 0; 4692 nrows = 0; 4693 for (i=owners[proc]; i<owners[proc+1]; i++){ 4694 anzi = ai[i+1] - ai[i]; 4695 if (anzi) { 4696 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4697 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4698 nrows++; 4699 } 4700 } 4701 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4702 k++; 4703 buf_si += len_si[proc]; 4704 } 4705 4706 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4707 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4708 4709 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4710 for (i=0; i<merge->nrecv; i++){ 4711 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4712 } 4713 4714 ierr = PetscFree(len_si);CHKERRQ(ierr); 4715 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4716 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4717 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4718 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4719 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4720 ierr = PetscFree(status);CHKERRQ(ierr); 4721 4722 /* compute a local seq matrix in each processor */ 4723 /*----------------------------------------------*/ 4724 /* allocate bi array and free space for accumulating nonzero column info */ 4725 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4726 bi[0] = 0; 4727 4728 /* create and initialize a linked list */ 4729 nlnk = N+1; 4730 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4731 4732 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4733 len = 0; 4734 len = ai[owners[rank+1]] - ai[owners[rank]]; 4735 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4736 current_space = free_space; 4737 4738 /* determine symbolic info for each local row */ 4739 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4740 4741 for (k=0; k<merge->nrecv; k++){ 4742 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4743 nrows = *buf_ri_k[k]; 4744 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4745 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4746 } 4747 4748 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4749 len = 0; 4750 for (i=0;i<m;i++) { 4751 bnzi = 0; 4752 /* add local non-zero cols of this proc's seqmat into lnk */ 4753 arow = owners[rank] + i; 4754 anzi = ai[arow+1] - ai[arow]; 4755 aj = a->j + ai[arow]; 4756 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4757 bnzi += nlnk; 4758 /* add received col data into lnk */ 4759 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4760 if (i == *nextrow[k]) { /* i-th row */ 4761 anzi = *(nextai[k]+1) - *nextai[k]; 4762 aj = buf_rj[k] + *nextai[k]; 4763 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4764 bnzi += nlnk; 4765 nextrow[k]++; nextai[k]++; 4766 } 4767 } 4768 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4769 4770 /* if free space is not available, make more free space */ 4771 if (current_space->local_remaining<bnzi) { 4772 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4773 nspacedouble++; 4774 } 4775 /* copy data into free space, then initialize lnk */ 4776 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4777 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4778 4779 current_space->array += bnzi; 4780 current_space->local_used += bnzi; 4781 current_space->local_remaining -= bnzi; 4782 4783 bi[i+1] = bi[i] + bnzi; 4784 } 4785 4786 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4787 4788 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4789 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4790 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4791 4792 /* create symbolic parallel matrix B_mpi */ 4793 /*---------------------------------------*/ 4794 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4795 if (n==PETSC_DECIDE) { 4796 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4797 } else { 4798 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4799 } 4800 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4801 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4802 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4803 4804 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4805 B_mpi->assembled = PETSC_FALSE; 4806 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4807 merge->bi = bi; 4808 merge->bj = bj; 4809 merge->buf_ri = buf_ri; 4810 merge->buf_rj = buf_rj; 4811 merge->coi = PETSC_NULL; 4812 merge->coj = PETSC_NULL; 4813 merge->owners_co = PETSC_NULL; 4814 4815 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4816 4817 /* attach the supporting struct to B_mpi for reuse */ 4818 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4819 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4820 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4821 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4822 *mpimat = B_mpi; 4823 4824 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4825 PetscFunctionReturn(0); 4826 } 4827 4828 #undef __FUNCT__ 4829 #define __FUNCT__ "MatMerge_SeqsToMPI" 4830 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4831 { 4832 PetscErrorCode ierr; 4833 4834 PetscFunctionBegin; 4835 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4836 if (scall == MAT_INITIAL_MATRIX){ 4837 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4838 } 4839 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4840 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4841 PetscFunctionReturn(0); 4842 } 4843 4844 #undef __FUNCT__ 4845 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4846 /*@ 4847 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4848 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4849 with MatGetSize() 4850 4851 Not Collective 4852 4853 Input Parameters: 4854 + A - the matrix 4855 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4856 4857 Output Parameter: 4858 . A_loc - the local sequential matrix generated 4859 4860 Level: developer 4861 4862 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4863 4864 @*/ 4865 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4866 { 4867 PetscErrorCode ierr; 4868 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4869 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4870 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4871 MatScalar *aa=a->a,*ba=b->a,*cam; 4872 PetscScalar *ca; 4873 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4874 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4875 PetscBool match; 4876 4877 PetscFunctionBegin; 4878 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4879 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4880 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4881 if (scall == MAT_INITIAL_MATRIX){ 4882 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4883 ci[0] = 0; 4884 for (i=0; i<am; i++){ 4885 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4886 } 4887 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4888 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4889 k = 0; 4890 for (i=0; i<am; i++) { 4891 ncols_o = bi[i+1] - bi[i]; 4892 ncols_d = ai[i+1] - ai[i]; 4893 /* off-diagonal portion of A */ 4894 for (jo=0; jo<ncols_o; jo++) { 4895 col = cmap[*bj]; 4896 if (col >= cstart) break; 4897 cj[k] = col; bj++; 4898 ca[k++] = *ba++; 4899 } 4900 /* diagonal portion of A */ 4901 for (j=0; j<ncols_d; j++) { 4902 cj[k] = cstart + *aj++; 4903 ca[k++] = *aa++; 4904 } 4905 /* off-diagonal portion of A */ 4906 for (j=jo; j<ncols_o; j++) { 4907 cj[k] = cmap[*bj++]; 4908 ca[k++] = *ba++; 4909 } 4910 } 4911 /* put together the new matrix */ 4912 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4913 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4914 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4915 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4916 mat->free_a = PETSC_TRUE; 4917 mat->free_ij = PETSC_TRUE; 4918 mat->nonew = 0; 4919 } else if (scall == MAT_REUSE_MATRIX){ 4920 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4921 ci = mat->i; cj = mat->j; cam = mat->a; 4922 for (i=0; i<am; i++) { 4923 /* off-diagonal portion of A */ 4924 ncols_o = bi[i+1] - bi[i]; 4925 for (jo=0; jo<ncols_o; jo++) { 4926 col = cmap[*bj]; 4927 if (col >= cstart) break; 4928 *cam++ = *ba++; bj++; 4929 } 4930 /* diagonal portion of A */ 4931 ncols_d = ai[i+1] - ai[i]; 4932 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4933 /* off-diagonal portion of A */ 4934 for (j=jo; j<ncols_o; j++) { 4935 *cam++ = *ba++; bj++; 4936 } 4937 } 4938 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4939 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4940 PetscFunctionReturn(0); 4941 } 4942 4943 #undef __FUNCT__ 4944 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4945 /*@C 4946 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4947 4948 Not Collective 4949 4950 Input Parameters: 4951 + A - the matrix 4952 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4953 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4954 4955 Output Parameter: 4956 . A_loc - the local sequential matrix generated 4957 4958 Level: developer 4959 4960 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4961 4962 @*/ 4963 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4964 { 4965 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4966 PetscErrorCode ierr; 4967 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4968 IS isrowa,iscola; 4969 Mat *aloc; 4970 PetscBool match; 4971 4972 PetscFunctionBegin; 4973 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4974 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4975 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4976 if (!row){ 4977 start = A->rmap->rstart; end = A->rmap->rend; 4978 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4979 } else { 4980 isrowa = *row; 4981 } 4982 if (!col){ 4983 start = A->cmap->rstart; 4984 cmap = a->garray; 4985 nzA = a->A->cmap->n; 4986 nzB = a->B->cmap->n; 4987 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4988 ncols = 0; 4989 for (i=0; i<nzB; i++) { 4990 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4991 else break; 4992 } 4993 imark = i; 4994 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4995 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4996 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4997 } else { 4998 iscola = *col; 4999 } 5000 if (scall != MAT_INITIAL_MATRIX){ 5001 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5002 aloc[0] = *A_loc; 5003 } 5004 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5005 *A_loc = aloc[0]; 5006 ierr = PetscFree(aloc);CHKERRQ(ierr); 5007 if (!row){ 5008 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5009 } 5010 if (!col){ 5011 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5012 } 5013 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 5017 #undef __FUNCT__ 5018 #define __FUNCT__ "MatGetBrowsOfAcols" 5019 /*@C 5020 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5021 5022 Collective on Mat 5023 5024 Input Parameters: 5025 + A,B - the matrices in mpiaij format 5026 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5027 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5028 5029 Output Parameter: 5030 + rowb, colb - index sets of rows and columns of B to extract 5031 - B_seq - the sequential matrix generated 5032 5033 Level: developer 5034 5035 @*/ 5036 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5037 { 5038 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5039 PetscErrorCode ierr; 5040 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5041 IS isrowb,iscolb; 5042 Mat *bseq=PETSC_NULL; 5043 5044 PetscFunctionBegin; 5045 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5046 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5047 } 5048 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5049 5050 if (scall == MAT_INITIAL_MATRIX){ 5051 start = A->cmap->rstart; 5052 cmap = a->garray; 5053 nzA = a->A->cmap->n; 5054 nzB = a->B->cmap->n; 5055 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5056 ncols = 0; 5057 for (i=0; i<nzB; i++) { /* row < local row index */ 5058 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5059 else break; 5060 } 5061 imark = i; 5062 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5063 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5064 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5065 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5066 } else { 5067 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5068 isrowb = *rowb; iscolb = *colb; 5069 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5070 bseq[0] = *B_seq; 5071 } 5072 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5073 *B_seq = bseq[0]; 5074 ierr = PetscFree(bseq);CHKERRQ(ierr); 5075 if (!rowb){ 5076 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5077 } else { 5078 *rowb = isrowb; 5079 } 5080 if (!colb){ 5081 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5082 } else { 5083 *colb = iscolb; 5084 } 5085 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5086 PetscFunctionReturn(0); 5087 } 5088 5089 #undef __FUNCT__ 5090 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5091 /* 5092 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5093 of the OFF-DIAGONAL portion of local A 5094 5095 Collective on Mat 5096 5097 Input Parameters: 5098 + A,B - the matrices in mpiaij format 5099 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5100 5101 Output Parameter: 5102 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5103 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5104 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5105 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5106 5107 Level: developer 5108 5109 */ 5110 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5111 { 5112 VecScatter_MPI_General *gen_to,*gen_from; 5113 PetscErrorCode ierr; 5114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5115 Mat_SeqAIJ *b_oth; 5116 VecScatter ctx=a->Mvctx; 5117 MPI_Comm comm=((PetscObject)ctx)->comm; 5118 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5119 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5120 PetscScalar *rvalues,*svalues; 5121 MatScalar *b_otha,*bufa,*bufA; 5122 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5123 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5124 MPI_Status *sstatus,rstatus; 5125 PetscMPIInt jj; 5126 PetscInt *cols,sbs,rbs; 5127 PetscScalar *vals; 5128 5129 PetscFunctionBegin; 5130 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5131 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5132 } 5133 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5134 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5135 5136 gen_to = (VecScatter_MPI_General*)ctx->todata; 5137 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5138 rvalues = gen_from->values; /* holds the length of receiving row */ 5139 svalues = gen_to->values; /* holds the length of sending row */ 5140 nrecvs = gen_from->n; 5141 nsends = gen_to->n; 5142 5143 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5144 srow = gen_to->indices; /* local row index to be sent */ 5145 sstarts = gen_to->starts; 5146 sprocs = gen_to->procs; 5147 sstatus = gen_to->sstatus; 5148 sbs = gen_to->bs; 5149 rstarts = gen_from->starts; 5150 rprocs = gen_from->procs; 5151 rbs = gen_from->bs; 5152 5153 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5154 if (scall == MAT_INITIAL_MATRIX){ 5155 /* i-array */ 5156 /*---------*/ 5157 /* post receives */ 5158 for (i=0; i<nrecvs; i++){ 5159 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5160 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5161 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5162 } 5163 5164 /* pack the outgoing message */ 5165 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5166 sstartsj[0] = 0; rstartsj[0] = 0; 5167 len = 0; /* total length of j or a array to be sent */ 5168 k = 0; 5169 for (i=0; i<nsends; i++){ 5170 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5171 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5172 for (j=0; j<nrows; j++) { 5173 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5174 for (l=0; l<sbs; l++){ 5175 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5176 rowlen[j*sbs+l] = ncols; 5177 len += ncols; 5178 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5179 } 5180 k++; 5181 } 5182 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5183 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5184 } 5185 /* recvs and sends of i-array are completed */ 5186 i = nrecvs; 5187 while (i--) { 5188 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5189 } 5190 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5191 5192 /* allocate buffers for sending j and a arrays */ 5193 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5194 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5195 5196 /* create i-array of B_oth */ 5197 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5198 b_othi[0] = 0; 5199 len = 0; /* total length of j or a array to be received */ 5200 k = 0; 5201 for (i=0; i<nrecvs; i++){ 5202 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5203 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5204 for (j=0; j<nrows; j++) { 5205 b_othi[k+1] = b_othi[k] + rowlen[j]; 5206 len += rowlen[j]; k++; 5207 } 5208 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5209 } 5210 5211 /* allocate space for j and a arrrays of B_oth */ 5212 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5213 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5214 5215 /* j-array */ 5216 /*---------*/ 5217 /* post receives of j-array */ 5218 for (i=0; i<nrecvs; i++){ 5219 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5220 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5221 } 5222 5223 /* pack the outgoing message j-array */ 5224 k = 0; 5225 for (i=0; i<nsends; i++){ 5226 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5227 bufJ = bufj+sstartsj[i]; 5228 for (j=0; j<nrows; j++) { 5229 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5230 for (ll=0; ll<sbs; ll++){ 5231 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5232 for (l=0; l<ncols; l++){ 5233 *bufJ++ = cols[l]; 5234 } 5235 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5236 } 5237 } 5238 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5239 } 5240 5241 /* recvs and sends of j-array are completed */ 5242 i = nrecvs; 5243 while (i--) { 5244 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5245 } 5246 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5247 } else if (scall == MAT_REUSE_MATRIX){ 5248 sstartsj = *startsj_s; 5249 rstartsj = *startsj_r; 5250 bufa = *bufa_ptr; 5251 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5252 b_otha = b_oth->a; 5253 } else { 5254 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5255 } 5256 5257 /* a-array */ 5258 /*---------*/ 5259 /* post receives of a-array */ 5260 for (i=0; i<nrecvs; i++){ 5261 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5262 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5263 } 5264 5265 /* pack the outgoing message a-array */ 5266 k = 0; 5267 for (i=0; i<nsends; i++){ 5268 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5269 bufA = bufa+sstartsj[i]; 5270 for (j=0; j<nrows; j++) { 5271 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5272 for (ll=0; ll<sbs; ll++){ 5273 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5274 for (l=0; l<ncols; l++){ 5275 *bufA++ = vals[l]; 5276 } 5277 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5278 } 5279 } 5280 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5281 } 5282 /* recvs and sends of a-array are completed */ 5283 i = nrecvs; 5284 while (i--) { 5285 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5286 } 5287 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5288 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5289 5290 if (scall == MAT_INITIAL_MATRIX){ 5291 /* put together the new matrix */ 5292 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5293 5294 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5295 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5296 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5297 b_oth->free_a = PETSC_TRUE; 5298 b_oth->free_ij = PETSC_TRUE; 5299 b_oth->nonew = 0; 5300 5301 ierr = PetscFree(bufj);CHKERRQ(ierr); 5302 if (!startsj_s || !bufa_ptr){ 5303 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5304 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5305 } else { 5306 *startsj_s = sstartsj; 5307 *startsj_r = rstartsj; 5308 *bufa_ptr = bufa; 5309 } 5310 } 5311 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5312 PetscFunctionReturn(0); 5313 } 5314 5315 #undef __FUNCT__ 5316 #define __FUNCT__ "MatGetCommunicationStructs" 5317 /*@C 5318 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5319 5320 Not Collective 5321 5322 Input Parameters: 5323 . A - The matrix in mpiaij format 5324 5325 Output Parameter: 5326 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5327 . colmap - A map from global column index to local index into lvec 5328 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5329 5330 Level: developer 5331 5332 @*/ 5333 #if defined (PETSC_USE_CTABLE) 5334 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5335 #else 5336 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5337 #endif 5338 { 5339 Mat_MPIAIJ *a; 5340 5341 PetscFunctionBegin; 5342 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5343 PetscValidPointer(lvec, 2); 5344 PetscValidPointer(colmap, 3); 5345 PetscValidPointer(multScatter, 4); 5346 a = (Mat_MPIAIJ *) A->data; 5347 if (lvec) *lvec = a->lvec; 5348 if (colmap) *colmap = a->colmap; 5349 if (multScatter) *multScatter = a->Mvctx; 5350 PetscFunctionReturn(0); 5351 } 5352 5353 EXTERN_C_BEGIN 5354 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5355 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5356 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5357 EXTERN_C_END 5358 5359 #undef __FUNCT__ 5360 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5361 /* 5362 Computes (B'*A')' since computing B*A directly is untenable 5363 5364 n p p 5365 ( ) ( ) ( ) 5366 m ( A ) * n ( B ) = m ( C ) 5367 ( ) ( ) ( ) 5368 5369 */ 5370 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5371 { 5372 PetscErrorCode ierr; 5373 Mat At,Bt,Ct; 5374 5375 PetscFunctionBegin; 5376 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5377 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5378 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5379 ierr = MatDestroy(&At);CHKERRQ(ierr); 5380 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5381 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5382 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5383 PetscFunctionReturn(0); 5384 } 5385 5386 #undef __FUNCT__ 5387 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5388 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5389 { 5390 PetscErrorCode ierr; 5391 PetscInt m=A->rmap->n,n=B->cmap->n; 5392 Mat Cmat; 5393 5394 PetscFunctionBegin; 5395 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5396 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5397 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5398 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5399 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5400 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5401 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5402 *C = Cmat; 5403 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5404 PetscFunctionReturn(0); 5405 } 5406 5407 /* ----------------------------------------------------------------*/ 5408 #undef __FUNCT__ 5409 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5410 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5411 { 5412 PetscErrorCode ierr; 5413 5414 PetscFunctionBegin; 5415 if (scall == MAT_INITIAL_MATRIX){ 5416 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5417 } 5418 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5419 PetscFunctionReturn(0); 5420 } 5421 5422 EXTERN_C_BEGIN 5423 #if defined(PETSC_HAVE_MUMPS) 5424 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5425 #endif 5426 #if defined(PETSC_HAVE_PASTIX) 5427 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5428 #endif 5429 #if defined(PETSC_HAVE_SUPERLU_DIST) 5430 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5431 #endif 5432 #if defined(PETSC_HAVE_SPOOLES) 5433 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5434 #endif 5435 EXTERN_C_END 5436 5437 /*MC 5438 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5439 5440 Options Database Keys: 5441 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5442 5443 Level: beginner 5444 5445 .seealso: MatCreateMPIAIJ() 5446 M*/ 5447 5448 EXTERN_C_BEGIN 5449 #undef __FUNCT__ 5450 #define __FUNCT__ "MatCreate_MPIAIJ" 5451 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5452 { 5453 Mat_MPIAIJ *b; 5454 PetscErrorCode ierr; 5455 PetscMPIInt size; 5456 5457 PetscFunctionBegin; 5458 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5459 5460 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5461 B->data = (void*)b; 5462 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5463 B->rmap->bs = 1; 5464 B->assembled = PETSC_FALSE; 5465 5466 B->insertmode = NOT_SET_VALUES; 5467 b->size = size; 5468 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5469 5470 /* build cache for off array entries formed */ 5471 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5472 b->donotstash = PETSC_FALSE; 5473 b->colmap = 0; 5474 b->garray = 0; 5475 b->roworiented = PETSC_TRUE; 5476 5477 /* stuff used for matrix vector multiply */ 5478 b->lvec = PETSC_NULL; 5479 b->Mvctx = PETSC_NULL; 5480 5481 /* stuff for MatGetRow() */ 5482 b->rowindices = 0; 5483 b->rowvalues = 0; 5484 b->getrowactive = PETSC_FALSE; 5485 5486 #if defined(PETSC_HAVE_SPOOLES) 5487 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5488 "MatGetFactor_mpiaij_spooles", 5489 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5490 #endif 5491 #if defined(PETSC_HAVE_MUMPS) 5492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5493 "MatGetFactor_aij_mumps", 5494 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5495 #endif 5496 #if defined(PETSC_HAVE_PASTIX) 5497 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5498 "MatGetFactor_mpiaij_pastix", 5499 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5500 #endif 5501 #if defined(PETSC_HAVE_SUPERLU_DIST) 5502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5503 "MatGetFactor_mpiaij_superlu_dist", 5504 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5505 #endif 5506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5507 "MatStoreValues_MPIAIJ", 5508 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5510 "MatRetrieveValues_MPIAIJ", 5511 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5513 "MatGetDiagonalBlock_MPIAIJ", 5514 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5516 "MatIsTranspose_MPIAIJ", 5517 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5519 "MatMPIAIJSetPreallocation_MPIAIJ", 5520 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5522 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5523 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5525 "MatDiagonalScaleLocal_MPIAIJ", 5526 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5528 "MatConvert_MPIAIJ_MPIAIJPERM", 5529 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5531 "MatConvert_MPIAIJ_MPIAIJCRL", 5532 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5534 "MatConvert_MPIAIJ_MPISBAIJ", 5535 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5537 "MatMatMult_MPIDense_MPIAIJ", 5538 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5540 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5541 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5543 "MatMatMultNumeric_MPIDense_MPIAIJ", 5544 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5545 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5546 PetscFunctionReturn(0); 5547 } 5548 EXTERN_C_END 5549 5550 #undef __FUNCT__ 5551 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5552 /*@ 5553 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5554 and "off-diagonal" part of the matrix in CSR format. 5555 5556 Collective on MPI_Comm 5557 5558 Input Parameters: 5559 + comm - MPI communicator 5560 . m - number of local rows (Cannot be PETSC_DECIDE) 5561 . n - This value should be the same as the local size used in creating the 5562 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5563 calculated if N is given) For square matrices n is almost always m. 5564 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5565 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5566 . i - row indices for "diagonal" portion of matrix 5567 . j - column indices 5568 . a - matrix values 5569 . oi - row indices for "off-diagonal" portion of matrix 5570 . oj - column indices 5571 - oa - matrix values 5572 5573 Output Parameter: 5574 . mat - the matrix 5575 5576 Level: advanced 5577 5578 Notes: 5579 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5580 must free the arrays once the matrix has been destroyed and not before. 5581 5582 The i and j indices are 0 based 5583 5584 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5585 5586 This sets local rows and cannot be used to set off-processor values. 5587 5588 You cannot later use MatSetValues() to change values in this matrix. 5589 5590 .keywords: matrix, aij, compressed row, sparse, parallel 5591 5592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5593 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5594 @*/ 5595 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5596 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5597 { 5598 PetscErrorCode ierr; 5599 Mat_MPIAIJ *maij; 5600 5601 PetscFunctionBegin; 5602 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5603 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5604 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5605 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5606 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5607 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5608 maij = (Mat_MPIAIJ*) (*mat)->data; 5609 maij->donotstash = PETSC_TRUE; 5610 (*mat)->preallocated = PETSC_TRUE; 5611 5612 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5613 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5614 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5615 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5616 5617 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5618 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5619 5620 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5621 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5622 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5623 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5624 5625 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5627 PetscFunctionReturn(0); 5628 } 5629 5630 /* 5631 Special version for direct calls from Fortran 5632 */ 5633 #include <private/fortranimpl.h> 5634 5635 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5636 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5637 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5638 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5639 #endif 5640 5641 /* Change these macros so can be used in void function */ 5642 #undef CHKERRQ 5643 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5644 #undef SETERRQ2 5645 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5646 #undef SETERRQ 5647 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5648 5649 EXTERN_C_BEGIN 5650 #undef __FUNCT__ 5651 #define __FUNCT__ "matsetvaluesmpiaij_" 5652 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5653 { 5654 Mat mat = *mmat; 5655 PetscInt m = *mm, n = *mn; 5656 InsertMode addv = *maddv; 5657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5658 PetscScalar value; 5659 PetscErrorCode ierr; 5660 5661 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5662 if (mat->insertmode == NOT_SET_VALUES) { 5663 mat->insertmode = addv; 5664 } 5665 #if defined(PETSC_USE_DEBUG) 5666 else if (mat->insertmode != addv) { 5667 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5668 } 5669 #endif 5670 { 5671 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5672 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5673 PetscBool roworiented = aij->roworiented; 5674 5675 /* Some Variables required in the macro */ 5676 Mat A = aij->A; 5677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5678 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5679 MatScalar *aa = a->a; 5680 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5681 Mat B = aij->B; 5682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5683 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5684 MatScalar *ba = b->a; 5685 5686 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5687 PetscInt nonew = a->nonew; 5688 MatScalar *ap1,*ap2; 5689 5690 PetscFunctionBegin; 5691 for (i=0; i<m; i++) { 5692 if (im[i] < 0) continue; 5693 #if defined(PETSC_USE_DEBUG) 5694 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5695 #endif 5696 if (im[i] >= rstart && im[i] < rend) { 5697 row = im[i] - rstart; 5698 lastcol1 = -1; 5699 rp1 = aj + ai[row]; 5700 ap1 = aa + ai[row]; 5701 rmax1 = aimax[row]; 5702 nrow1 = ailen[row]; 5703 low1 = 0; 5704 high1 = nrow1; 5705 lastcol2 = -1; 5706 rp2 = bj + bi[row]; 5707 ap2 = ba + bi[row]; 5708 rmax2 = bimax[row]; 5709 nrow2 = bilen[row]; 5710 low2 = 0; 5711 high2 = nrow2; 5712 5713 for (j=0; j<n; j++) { 5714 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5715 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5716 if (in[j] >= cstart && in[j] < cend){ 5717 col = in[j] - cstart; 5718 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5719 } else if (in[j] < 0) continue; 5720 #if defined(PETSC_USE_DEBUG) 5721 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5722 #endif 5723 else { 5724 if (mat->was_assembled) { 5725 if (!aij->colmap) { 5726 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5727 } 5728 #if defined (PETSC_USE_CTABLE) 5729 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5730 col--; 5731 #else 5732 col = aij->colmap[in[j]] - 1; 5733 #endif 5734 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5735 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5736 col = in[j]; 5737 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5738 B = aij->B; 5739 b = (Mat_SeqAIJ*)B->data; 5740 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5741 rp2 = bj + bi[row]; 5742 ap2 = ba + bi[row]; 5743 rmax2 = bimax[row]; 5744 nrow2 = bilen[row]; 5745 low2 = 0; 5746 high2 = nrow2; 5747 bm = aij->B->rmap->n; 5748 ba = b->a; 5749 } 5750 } else col = in[j]; 5751 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5752 } 5753 } 5754 } else { 5755 if (!aij->donotstash) { 5756 if (roworiented) { 5757 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5758 } else { 5759 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5760 } 5761 } 5762 } 5763 }} 5764 PetscFunctionReturnVoid(); 5765 } 5766 EXTERN_C_END 5767 5768