1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 334 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 342 #if defined (PETSC_USE_CTABLE) 343 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 344 for (i=0; i<n; i++){ 345 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 346 } 347 #else 348 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 349 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 352 #endif 353 PetscFunctionReturn(0); 354 } 355 356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 357 { \ 358 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 359 lastcol1 = col;\ 360 while (high1-low1 > 5) { \ 361 t = (low1+high1)/2; \ 362 if (rp1[t] > col) high1 = t; \ 363 else low1 = t; \ 364 } \ 365 for (_i=low1; _i<high1; _i++) { \ 366 if (rp1[_i] > col) break; \ 367 if (rp1[_i] == col) { \ 368 if (addv == ADD_VALUES) ap1[_i] += value; \ 369 else ap1[_i] = value; \ 370 goto a_noinsert; \ 371 } \ 372 } \ 373 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 374 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 375 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 376 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 377 N = nrow1++ - 1; a->nz++; high1++; \ 378 /* shift up all the later entries in this row */ \ 379 for (ii=N; ii>=_i; ii--) { \ 380 rp1[ii+1] = rp1[ii]; \ 381 ap1[ii+1] = ap1[ii]; \ 382 } \ 383 rp1[_i] = col; \ 384 ap1[_i] = value; \ 385 a_noinsert: ; \ 386 ailen[row] = nrow1; \ 387 } 388 389 390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 391 { \ 392 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 393 lastcol2 = col;\ 394 while (high2-low2 > 5) { \ 395 t = (low2+high2)/2; \ 396 if (rp2[t] > col) high2 = t; \ 397 else low2 = t; \ 398 } \ 399 for (_i=low2; _i<high2; _i++) { \ 400 if (rp2[_i] > col) break; \ 401 if (rp2[_i] == col) { \ 402 if (addv == ADD_VALUES) ap2[_i] += value; \ 403 else ap2[_i] = value; \ 404 goto b_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 410 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 411 N = nrow2++ - 1; b->nz++; high2++; \ 412 /* shift up all the later entries in this row */ \ 413 for (ii=N; ii>=_i; ii--) { \ 414 rp2[ii+1] = rp2[ii]; \ 415 ap2[ii+1] = ap2[ii]; \ 416 } \ 417 rp2[_i] = col; \ 418 ap2[_i] = value; \ 419 b_noinsert: ; \ 420 bilen[row] = nrow2; \ 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 426 { 427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 429 PetscErrorCode ierr; 430 PetscInt l,*garray = mat->garray,diag; 431 432 PetscFunctionBegin; 433 /* code only works for square matrices A */ 434 435 /* find size of row to the left of the diagonal part */ 436 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 437 row = row - diag; 438 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 439 if (garray[b->j[b->i[row]+l]] > diag) break; 440 } 441 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 442 443 /* diagonal part */ 444 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 445 446 /* right of diagonal part */ 447 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValues_MPIAIJ" 453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscScalar value; 457 PetscErrorCode ierr; 458 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 459 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 460 PetscBool roworiented = aij->roworiented; 461 462 /* Some Variables required in the macro */ 463 Mat A = aij->A; 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 465 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 466 MatScalar *aa = a->a; 467 PetscBool ignorezeroentries = a->ignorezeroentries; 468 Mat B = aij->B; 469 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 470 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 471 MatScalar *ba = b->a; 472 473 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 474 PetscInt nonew; 475 MatScalar *ap1,*ap2; 476 477 PetscFunctionBegin; 478 if (v) PetscValidScalarPointer(v,6); 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 lastcol1 = -1; 487 rp1 = aj + ai[row]; 488 ap1 = aa + ai[row]; 489 rmax1 = aimax[row]; 490 nrow1 = ailen[row]; 491 low1 = 0; 492 high1 = nrow1; 493 lastcol2 = -1; 494 rp2 = bj + bi[row]; 495 ap2 = ba + bi[row]; 496 rmax2 = bimax[row]; 497 nrow2 = bilen[row]; 498 low2 = 0; 499 high2 = nrow2; 500 501 for (j=0; j<n; j++) { 502 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 503 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 504 if (in[j] >= cstart && in[j] < cend){ 505 col = in[j] - cstart; 506 nonew = a->nonew; 507 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 508 } else if (in[j] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 511 #endif 512 else { 513 if (mat->was_assembled) { 514 if (!aij->colmap) { 515 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 516 } 517 #if defined (PETSC_USE_CTABLE) 518 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 519 col--; 520 #else 521 col = aij->colmap[in[j]] - 1; 522 #endif 523 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 524 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 col = in[j]; 526 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 527 B = aij->B; 528 b = (Mat_SeqAIJ*)B->data; 529 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 530 rp2 = bj + bi[row]; 531 ap2 = ba + bi[row]; 532 rmax2 = bimax[row]; 533 nrow2 = bilen[row]; 534 low2 = 0; 535 high2 = nrow2; 536 bm = aij->B->rmap->n; 537 ba = b->a; 538 } 539 } else col = in[j]; 540 nonew = b->nonew; 541 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 542 } 543 } 544 } else { 545 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 546 if (!aij->donotstash) { 547 mat->assembled = PETSC_FALSE; 548 if (roworiented) { 549 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatGetValues_MPIAIJ" 561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 562 { 563 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 564 PetscErrorCode ierr; 565 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 566 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 567 568 PetscFunctionBegin; 569 for (i=0; i<m; i++) { 570 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 571 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 572 if (idxm[i] >= rstart && idxm[i] < rend) { 573 row = idxm[i] - rstart; 574 for (j=0; j<n; j++) { 575 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 576 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 577 if (idxn[j] >= cstart && idxn[j] < cend){ 578 col = idxn[j] - cstart; 579 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 580 } else { 581 if (!aij->colmap) { 582 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 583 } 584 #if defined (PETSC_USE_CTABLE) 585 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 586 col --; 587 #else 588 col = aij->colmap[idxn[j]] - 1; 589 #endif 590 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 591 else { 592 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 593 } 594 } 595 } 596 } else { 597 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 598 } 599 } 600 PetscFunctionReturn(0); 601 } 602 603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 608 { 609 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 610 PetscErrorCode ierr; 611 PetscInt nstash,reallocs; 612 InsertMode addv; 613 614 PetscFunctionBegin; 615 if (aij->donotstash || mat->nooffprocentries) { 616 PetscFunctionReturn(0); 617 } 618 619 /* make sure all processors are either in INSERTMODE or ADDMODE */ 620 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 621 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 622 mat->insertmode = addv; /* in case this processor had no cache */ 623 624 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 625 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 626 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 636 PetscErrorCode ierr; 637 PetscMPIInt n; 638 PetscInt i,j,rstart,ncols,flg; 639 PetscInt *row,*col; 640 PetscBool other_disassembled; 641 PetscScalar *val; 642 InsertMode addv = mat->insertmode; 643 644 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 645 PetscFunctionBegin; 646 if (!aij->donotstash && !mat->nooffprocentries) { 647 while (1) { 648 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 649 if (!flg) break; 650 651 for (i=0; i<n;) { 652 /* Now identify the consecutive vals belonging to the same row */ 653 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 654 if (j < n) ncols = j-i; 655 else ncols = n-i; 656 /* Now assemble all these values with a single function call */ 657 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 658 i = j; 659 } 660 } 661 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 662 } 663 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 664 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 665 666 /* determine if any processor has disassembled, if so we must 667 also disassemble ourselfs, in order that we may reassemble. */ 668 /* 669 if nonzero structure of submatrix B cannot change then we know that 670 no processor disassembled thus we can skip this stuff 671 */ 672 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 673 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 674 if (mat->was_assembled && !other_disassembled) { 675 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 } 678 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 679 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 680 } 681 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 682 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 684 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 685 686 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 687 aij->rowvalues = 0; 688 689 /* used by MatAXPY() */ 690 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 691 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 692 693 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 694 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 701 { 702 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 703 PetscErrorCode ierr; 704 705 PetscFunctionBegin; 706 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 707 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatZeroRows_MPIAIJ" 713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 714 { 715 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 718 PetscInt i,*owners = A->rmap->range; 719 PetscInt *nprocs,j,idx,nsends,row; 720 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 721 PetscInt *rvalues,count,base,slen,*source; 722 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 723 MPI_Comm comm = ((PetscObject)A)->comm; 724 MPI_Request *send_waits,*recv_waits; 725 MPI_Status recv_status,*send_status; 726 const PetscScalar *xx; 727 PetscScalar *bb; 728 #if defined(PETSC_DEBUG) 729 PetscBool found = PETSC_FALSE; 730 #endif 731 732 PetscFunctionBegin; 733 /* first count number of contributors to each processor */ 734 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 735 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 736 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 737 j = 0; 738 for (i=0; i<N; i++) { 739 if (lastidx > (idx = rows[i])) j = 0; 740 lastidx = idx; 741 for (; j<size; j++) { 742 if (idx >= owners[j] && idx < owners[j+1]) { 743 nprocs[2*j]++; 744 nprocs[2*j+1] = 1; 745 owner[i] = j; 746 #if defined(PETSC_DEBUG) 747 found = PETSC_TRUE; 748 #endif 749 break; 750 } 751 } 752 #if defined(PETSC_DEBUG) 753 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 754 found = PETSC_FALSE; 755 #endif 756 } 757 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 758 759 if (A->nooffproczerorows) { 760 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 761 nrecvs = nsends; 762 nmax = N; 763 } else { 764 /* inform other processors of number of messages and max length*/ 765 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 766 } 767 768 /* post receives: */ 769 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 770 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 771 for (i=0; i<nrecvs; i++) { 772 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 773 } 774 775 /* do sends: 776 1) starts[i] gives the starting index in svalues for stuff going to 777 the ith processor 778 */ 779 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 780 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 781 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 782 starts[0] = 0; 783 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 784 for (i=0; i<N; i++) { 785 svalues[starts[owner[i]]++] = rows[i]; 786 } 787 788 starts[0] = 0; 789 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 790 count = 0; 791 for (i=0; i<size; i++) { 792 if (nprocs[2*i+1]) { 793 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 794 } 795 } 796 ierr = PetscFree(starts);CHKERRQ(ierr); 797 798 base = owners[rank]; 799 800 /* wait on receives */ 801 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 802 count = nrecvs; slen = 0; 803 while (count) { 804 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 805 /* unpack receives into our local space */ 806 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 807 source[imdex] = recv_status.MPI_SOURCE; 808 lens[imdex] = n; 809 slen += n; 810 count--; 811 } 812 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 813 814 /* move the data into the send scatter */ 815 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 816 count = 0; 817 for (i=0; i<nrecvs; i++) { 818 values = rvalues + i*nmax; 819 for (j=0; j<lens[i]; j++) { 820 lrows[count++] = values[j] - base; 821 } 822 } 823 ierr = PetscFree(rvalues);CHKERRQ(ierr); 824 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 825 ierr = PetscFree(owner);CHKERRQ(ierr); 826 ierr = PetscFree(nprocs);CHKERRQ(ierr); 827 828 /* fix right hand side if needed */ 829 if (x && b) { 830 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 831 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 832 for (i=0; i<slen; i++) { 833 bb[lrows[i]] = diag*xx[lrows[i]]; 834 } 835 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 836 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 837 } 838 /* 839 Zero the required rows. If the "diagonal block" of the matrix 840 is square and the user wishes to set the diagonal we use separate 841 code so that MatSetValues() is not called for each diagonal allocating 842 new memory, thus calling lots of mallocs and slowing things down. 843 844 */ 845 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 846 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 848 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 849 } else if (diag != 0.0) { 850 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 851 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 852 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 854 } 855 for (i = 0; i < slen; i++) { 856 row = lrows[i] + rstart; 857 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 858 } 859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 } else { 862 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(lrows);CHKERRQ(ierr); 865 866 /* wait on sends */ 867 if (nsends) { 868 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 869 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 870 ierr = PetscFree(send_status);CHKERRQ(ierr); 871 } 872 ierr = PetscFree(send_waits);CHKERRQ(ierr); 873 ierr = PetscFree(svalues);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 880 { 881 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 882 PetscErrorCode ierr; 883 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 884 PetscInt i,*owners = A->rmap->range; 885 PetscInt *nprocs,j,idx,nsends; 886 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 887 PetscInt *rvalues,count,base,slen,*source; 888 PetscInt *lens,*lrows,*values,m; 889 MPI_Comm comm = ((PetscObject)A)->comm; 890 MPI_Request *send_waits,*recv_waits; 891 MPI_Status recv_status,*send_status; 892 const PetscScalar *xx; 893 PetscScalar *bb,*mask; 894 Vec xmask,lmask; 895 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 896 const PetscInt *aj, *ii,*ridx; 897 PetscScalar *aa; 898 #if defined(PETSC_DEBUG) 899 PetscBool found = PETSC_FALSE; 900 #endif 901 902 PetscFunctionBegin; 903 /* first count number of contributors to each processor */ 904 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 905 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 906 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 907 j = 0; 908 for (i=0; i<N; i++) { 909 if (lastidx > (idx = rows[i])) j = 0; 910 lastidx = idx; 911 for (; j<size; j++) { 912 if (idx >= owners[j] && idx < owners[j+1]) { 913 nprocs[2*j]++; 914 nprocs[2*j+1] = 1; 915 owner[i] = j; 916 #if defined(PETSC_DEBUG) 917 found = PETSC_TRUE; 918 #endif 919 break; 920 } 921 } 922 #if defined(PETSC_DEBUG) 923 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 924 found = PETSC_FALSE; 925 #endif 926 } 927 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 928 929 /* inform other processors of number of messages and max length*/ 930 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 931 932 /* post receives: */ 933 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 934 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 935 for (i=0; i<nrecvs; i++) { 936 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 937 } 938 939 /* do sends: 940 1) starts[i] gives the starting index in svalues for stuff going to 941 the ith processor 942 */ 943 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 944 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 945 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 946 starts[0] = 0; 947 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 948 for (i=0; i<N; i++) { 949 svalues[starts[owner[i]]++] = rows[i]; 950 } 951 952 starts[0] = 0; 953 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 954 count = 0; 955 for (i=0; i<size; i++) { 956 if (nprocs[2*i+1]) { 957 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 958 } 959 } 960 ierr = PetscFree(starts);CHKERRQ(ierr); 961 962 base = owners[rank]; 963 964 /* wait on receives */ 965 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 966 count = nrecvs; slen = 0; 967 while (count) { 968 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 969 /* unpack receives into our local space */ 970 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 971 source[imdex] = recv_status.MPI_SOURCE; 972 lens[imdex] = n; 973 slen += n; 974 count--; 975 } 976 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 977 978 /* move the data into the send scatter */ 979 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 980 count = 0; 981 for (i=0; i<nrecvs; i++) { 982 values = rvalues + i*nmax; 983 for (j=0; j<lens[i]; j++) { 984 lrows[count++] = values[j] - base; 985 } 986 } 987 ierr = PetscFree(rvalues);CHKERRQ(ierr); 988 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 989 ierr = PetscFree(owner);CHKERRQ(ierr); 990 ierr = PetscFree(nprocs);CHKERRQ(ierr); 991 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 992 993 /* zero diagonal part of matrix */ 994 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 995 996 /* handle off diagonal part of matrix */ 997 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 998 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 999 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1000 for (i=0; i<slen; i++) { 1001 bb[lrows[i]] = 1; 1002 } 1003 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1007 if (x) { 1008 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1012 } 1013 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1014 1015 /* remove zeroed rows of off diagonal matrix */ 1016 ii = aij->i; 1017 for (i=0; i<slen; i++) { 1018 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1019 } 1020 1021 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1022 if (aij->compressedrow.use){ 1023 m = aij->compressedrow.nrows; 1024 ii = aij->compressedrow.i; 1025 ridx = aij->compressedrow.rindex; 1026 for (i=0; i<m; i++){ 1027 n = ii[i+1] - ii[i]; 1028 aj = aij->j + ii[i]; 1029 aa = aij->a + ii[i]; 1030 1031 for (j=0; j<n; j++) { 1032 if (PetscAbsScalar(mask[*aj])) { 1033 if (b) bb[*ridx] -= *aa*xx[*aj]; 1034 *aa = 0.0; 1035 } 1036 aa++; 1037 aj++; 1038 } 1039 ridx++; 1040 } 1041 } else { /* do not use compressed row format */ 1042 m = l->B->rmap->n; 1043 for (i=0; i<m; i++) { 1044 n = ii[i+1] - ii[i]; 1045 aj = aij->j + ii[i]; 1046 aa = aij->a + ii[i]; 1047 for (j=0; j<n; j++) { 1048 if (PetscAbsScalar(mask[*aj])) { 1049 if (b) bb[i] -= *aa*xx[*aj]; 1050 *aa = 0.0; 1051 } 1052 aa++; 1053 aj++; 1054 } 1055 } 1056 } 1057 if (x) { 1058 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1059 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1060 } 1061 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1062 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1063 ierr = PetscFree(lrows);CHKERRQ(ierr); 1064 1065 /* wait on sends */ 1066 if (nsends) { 1067 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1068 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1069 ierr = PetscFree(send_status);CHKERRQ(ierr); 1070 } 1071 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1072 ierr = PetscFree(svalues);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMult_MPIAIJ" 1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt nt; 1084 1085 PetscFunctionBegin; 1086 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1087 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1088 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscBool merged; 1129 1130 PetscFunctionBegin; 1131 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1132 /* do nondiagonal part */ 1133 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1134 if (!merged) { 1135 /* send it on its way */ 1136 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 /* do local part */ 1138 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1139 /* receive remote parts: note this assumes the values are not actually */ 1140 /* added in yy until the next line, */ 1141 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 } else { 1143 /* do local part */ 1144 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1145 /* send it on its way */ 1146 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 /* values actually were received in the Begin() but we need to call this nop */ 1148 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1149 } 1150 PetscFunctionReturn(0); 1151 } 1152 1153 EXTERN_C_BEGIN 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1157 { 1158 MPI_Comm comm; 1159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1160 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1161 IS Me,Notme; 1162 PetscErrorCode ierr; 1163 PetscInt M,N,first,last,*notme,i; 1164 PetscMPIInt size; 1165 1166 PetscFunctionBegin; 1167 1168 /* Easy test: symmetric diagonal block */ 1169 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1170 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1171 if (!*f) PetscFunctionReturn(0); 1172 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1174 if (size == 1) PetscFunctionReturn(0); 1175 1176 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1177 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1178 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1179 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1180 for (i=0; i<first; i++) notme[i] = i; 1181 for (i=last; i<M; i++) notme[i-last+first] = i; 1182 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1183 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1184 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1185 Aoff = Aoffs[0]; 1186 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1187 Boff = Boffs[0]; 1188 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1189 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1191 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1193 ierr = PetscFree(notme);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 /* do nondiagonal part */ 1207 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1208 /* send it on its way */ 1209 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 /* do local part */ 1211 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1212 /* receive remote parts */ 1213 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* 1218 This only works correctly for square matrices where the subblock A->A is the 1219 diagonal block 1220 */ 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1224 { 1225 PetscErrorCode ierr; 1226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1227 1228 PetscFunctionBegin; 1229 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1230 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1231 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatScale_MPIAIJ" 1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1238 { 1239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1244 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatDestroy_MPIAIJ" 1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 #if defined(PETSC_USE_LOG) 1257 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1258 #endif 1259 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1263 #if defined (PETSC_USE_CTABLE) 1264 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1267 #endif 1268 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1270 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1271 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1272 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1273 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1274 1275 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1293 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1294 PetscErrorCode ierr; 1295 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1296 int fd; 1297 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1298 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1299 PetscScalar *column_values; 1300 PetscInt message_count,flowcontrolcount; 1301 1302 PetscFunctionBegin; 1303 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1304 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1305 nz = A->nz + B->nz; 1306 if (!rank) { 1307 header[0] = MAT_FILE_CLASSID; 1308 header[1] = mat->rmap->N; 1309 header[2] = mat->cmap->N; 1310 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1311 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1312 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 /* get largest number of rows any processor has */ 1314 rlen = mat->rmap->n; 1315 range = mat->rmap->range; 1316 for (i=1; i<size; i++) { 1317 rlen = PetscMax(rlen,range[i+1] - range[i]); 1318 } 1319 } else { 1320 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1321 rlen = mat->rmap->n; 1322 } 1323 1324 /* load up the local row counts */ 1325 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1326 for (i=0; i<mat->rmap->n; i++) { 1327 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1328 } 1329 1330 /* store the row lengths to the file */ 1331 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1332 if (!rank) { 1333 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 for (i=1; i<size; i++) { 1335 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1336 rlen = range[i+1] - range[i]; 1337 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1338 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1339 } 1340 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1343 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1345 } 1346 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1347 1348 /* load up the local column indices */ 1349 nzmax = nz; /* )th processor needs space a largest processor needs */ 1350 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1351 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1352 cnt = 0; 1353 for (i=0; i<mat->rmap->n; i++) { 1354 for (j=B->i[i]; j<B->i[i+1]; j++) { 1355 if ( (col = garray[B->j[j]]) > cstart) break; 1356 column_indices[cnt++] = col; 1357 } 1358 for (k=A->i[i]; k<A->i[i+1]; k++) { 1359 column_indices[cnt++] = A->j[k] + cstart; 1360 } 1361 for (; j<B->i[i+1]; j++) { 1362 column_indices[cnt++] = garray[B->j[j]]; 1363 } 1364 } 1365 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1366 1367 /* store the column indices to the file */ 1368 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1369 if (!rank) { 1370 MPI_Status status; 1371 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1372 for (i=1; i<size; i++) { 1373 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1374 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1375 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1376 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1378 } 1379 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1380 } else { 1381 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1382 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1385 } 1386 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1387 1388 /* load up the local column values */ 1389 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1390 cnt = 0; 1391 for (i=0; i<mat->rmap->n; i++) { 1392 for (j=B->i[i]; j<B->i[i+1]; j++) { 1393 if ( garray[B->j[j]] > cstart) break; 1394 column_values[cnt++] = B->a[j]; 1395 } 1396 for (k=A->i[i]; k<A->i[i+1]; k++) { 1397 column_values[cnt++] = A->a[k]; 1398 } 1399 for (; j<B->i[i+1]; j++) { 1400 column_values[cnt++] = B->a[j]; 1401 } 1402 } 1403 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1404 1405 /* store the column values to the file */ 1406 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1407 if (!rank) { 1408 MPI_Status status; 1409 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1410 for (i=1; i<size; i++) { 1411 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1412 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1413 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1414 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1415 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1416 } 1417 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1418 } else { 1419 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1420 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1421 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1423 } 1424 ierr = PetscFree(column_values);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1431 { 1432 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1433 PetscErrorCode ierr; 1434 PetscMPIInt rank = aij->rank,size = aij->size; 1435 PetscBool isdraw,iascii,isbinary; 1436 PetscViewer sviewer; 1437 PetscViewerFormat format; 1438 1439 PetscFunctionBegin; 1440 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1441 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1442 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1443 if (iascii) { 1444 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1445 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1446 MatInfo info; 1447 PetscBool inodes; 1448 1449 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1450 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1451 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1452 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1453 if (!inodes) { 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1455 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1456 } else { 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1458 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1459 } 1460 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1462 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1464 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1467 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1470 PetscInt inodecount,inodelimit,*inodes; 1471 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1472 if (inodes) { 1473 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1474 } else { 1475 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1476 } 1477 PetscFunctionReturn(0); 1478 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1479 PetscFunctionReturn(0); 1480 } 1481 } else if (isbinary) { 1482 if (size == 1) { 1483 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1484 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1485 } else { 1486 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } else if (isdraw) { 1490 PetscDraw draw; 1491 PetscBool isnull; 1492 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1493 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1494 } 1495 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 /* assemble the entire matrix onto first processor. */ 1501 Mat A; 1502 Mat_SeqAIJ *Aloc; 1503 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1504 MatScalar *a; 1505 1506 if (mat->rmap->N > 1024) { 1507 PetscBool flg = PETSC_FALSE; 1508 1509 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1510 if (!flg) { 1511 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1512 } 1513 } 1514 1515 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1516 if (!rank) { 1517 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1518 } else { 1519 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1520 } 1521 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1522 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1523 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1524 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1525 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1526 1527 /* copy over the A part */ 1528 Aloc = (Mat_SeqAIJ*)aij->A->data; 1529 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1530 row = mat->rmap->rstart; 1531 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1532 for (i=0; i<m; i++) { 1533 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1534 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1535 } 1536 aj = Aloc->j; 1537 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1538 1539 /* copy over the B part */ 1540 Aloc = (Mat_SeqAIJ*)aij->B->data; 1541 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1544 ct = cols; 1545 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1546 for (i=0; i<m; i++) { 1547 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1548 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1549 } 1550 ierr = PetscFree(ct);CHKERRQ(ierr); 1551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 /* 1554 Everyone has to call to draw the matrix since the graphics waits are 1555 synchronized across all processors that share the PetscDraw object 1556 */ 1557 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1558 if (!rank) { 1559 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1560 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1561 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1562 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1563 } 1564 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1565 ierr = MatDestroy(&A);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatView_MPIAIJ" 1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1573 { 1574 PetscErrorCode ierr; 1575 PetscBool iascii,isdraw,issocket,isbinary; 1576 1577 PetscFunctionBegin; 1578 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1579 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1580 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1581 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1582 if (iascii || isdraw || isbinary || issocket) { 1583 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1584 } else { 1585 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSOR_MPIAIJ" 1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1593 { 1594 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1595 PetscErrorCode ierr; 1596 Vec bb1 = 0; 1597 PetscBool hasop; 1598 1599 PetscFunctionBegin; 1600 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1601 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1602 } 1603 1604 if (flag == SOR_APPLY_UPPER) { 1605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1610 if (flag & SOR_ZERO_INITIAL_GUESS) { 1611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1612 its--; 1613 } 1614 1615 while (its--) { 1616 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 1619 /* update rhs: bb1 = bb - B*x */ 1620 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1621 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1622 1623 /* local sweep */ 1624 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1625 } 1626 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1627 if (flag & SOR_ZERO_INITIAL_GUESS) { 1628 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1629 its--; 1630 } 1631 while (its--) { 1632 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 1635 /* update rhs: bb1 = bb - B*x */ 1636 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1637 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1638 1639 /* local sweep */ 1640 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1641 } 1642 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1643 if (flag & SOR_ZERO_INITIAL_GUESS) { 1644 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1645 its--; 1646 } 1647 while (its--) { 1648 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 1651 /* update rhs: bb1 = bb - B*x */ 1652 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1653 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1654 1655 /* local sweep */ 1656 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1657 } 1658 } else if (flag & SOR_EISENSTAT) { 1659 Vec xx1; 1660 1661 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1662 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1663 1664 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 if (!mat->diag) { 1667 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1668 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1669 } 1670 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1671 if (hasop) { 1672 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1673 } else { 1674 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1675 } 1676 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1677 1678 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1679 1680 /* local sweep */ 1681 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1682 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1683 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1684 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1685 1686 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatPermute_MPIAIJ" 1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1693 { 1694 MPI_Comm comm; 1695 PetscInt first,local_rowsize,local_colsize; 1696 const PetscInt *rows; 1697 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1702 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1703 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1704 /* collect the global row permutation and invert it */ 1705 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1706 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1707 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1708 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1709 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1710 /* get the local target indices */ 1711 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1712 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1713 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1714 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1715 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1716 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1717 /* the column permutation is so much easier; 1718 make a local version of 'colp' and invert it */ 1719 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1720 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1721 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1722 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1723 /* now we just get the submatrix */ 1724 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1725 /* clean up */ 1726 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1727 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1734 { 1735 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1736 Mat A = mat->A,B = mat->B; 1737 PetscErrorCode ierr; 1738 PetscReal isend[5],irecv[5]; 1739 1740 PetscFunctionBegin; 1741 info->block_size = 1.0; 1742 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1743 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1744 isend[3] = info->memory; isend[4] = info->mallocs; 1745 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1746 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1747 isend[3] += info->memory; isend[4] += info->mallocs; 1748 if (flag == MAT_LOCAL) { 1749 info->nz_used = isend[0]; 1750 info->nz_allocated = isend[1]; 1751 info->nz_unneeded = isend[2]; 1752 info->memory = isend[3]; 1753 info->mallocs = isend[4]; 1754 } else if (flag == MAT_GLOBAL_MAX) { 1755 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1756 info->nz_used = irecv[0]; 1757 info->nz_allocated = irecv[1]; 1758 info->nz_unneeded = irecv[2]; 1759 info->memory = irecv[3]; 1760 info->mallocs = irecv[4]; 1761 } else if (flag == MAT_GLOBAL_SUM) { 1762 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1763 info->nz_used = irecv[0]; 1764 info->nz_allocated = irecv[1]; 1765 info->nz_unneeded = irecv[2]; 1766 info->memory = irecv[3]; 1767 info->mallocs = irecv[4]; 1768 } 1769 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1770 info->fill_ratio_needed = 0; 1771 info->factor_mallocs = 0; 1772 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatSetOption_MPIAIJ" 1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1779 { 1780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 switch (op) { 1785 case MAT_NEW_NONZERO_LOCATIONS: 1786 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1787 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1788 case MAT_KEEP_NONZERO_PATTERN: 1789 case MAT_NEW_NONZERO_LOCATION_ERR: 1790 case MAT_USE_INODES: 1791 case MAT_IGNORE_ZERO_ENTRIES: 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_ROW_ORIENTED: 1796 a->roworiented = flg; 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1799 break; 1800 case MAT_NEW_DIAGONALS: 1801 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1802 break; 1803 case MAT_IGNORE_OFF_PROC_ENTRIES: 1804 a->donotstash = flg; 1805 break; 1806 case MAT_SPD: 1807 A->spd_set = PETSC_TRUE; 1808 A->spd = flg; 1809 if (flg) { 1810 A->symmetric = PETSC_TRUE; 1811 A->structurally_symmetric = PETSC_TRUE; 1812 A->symmetric_set = PETSC_TRUE; 1813 A->structurally_symmetric_set = PETSC_TRUE; 1814 } 1815 break; 1816 case MAT_SYMMETRIC: 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_STRUCTURALLY_SYMMETRIC: 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 break; 1822 case MAT_HERMITIAN: 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_SYMMETRY_ETERNAL: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 default: 1829 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1830 } 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatGetRow_MPIAIJ" 1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1837 { 1838 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1839 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1840 PetscErrorCode ierr; 1841 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1842 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1843 PetscInt *cmap,*idx_p; 1844 1845 PetscFunctionBegin; 1846 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1847 mat->getrowactive = PETSC_TRUE; 1848 1849 if (!mat->rowvalues && (idx || v)) { 1850 /* 1851 allocate enough space to hold information from the longest row. 1852 */ 1853 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1854 PetscInt max = 1,tmp; 1855 for (i=0; i<matin->rmap->n; i++) { 1856 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1857 if (max < tmp) { max = tmp; } 1858 } 1859 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1860 } 1861 1862 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1863 lrow = row - rstart; 1864 1865 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1866 if (!v) {pvA = 0; pvB = 0;} 1867 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1868 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1869 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1870 nztot = nzA + nzB; 1871 1872 cmap = mat->garray; 1873 if (v || idx) { 1874 if (nztot) { 1875 /* Sort by increasing column numbers, assuming A and B already sorted */ 1876 PetscInt imark = -1; 1877 if (v) { 1878 *v = v_p = mat->rowvalues; 1879 for (i=0; i<nzB; i++) { 1880 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1881 else break; 1882 } 1883 imark = i; 1884 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1885 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1886 } 1887 if (idx) { 1888 *idx = idx_p = mat->rowindices; 1889 if (imark > -1) { 1890 for (i=0; i<imark; i++) { 1891 idx_p[i] = cmap[cworkB[i]]; 1892 } 1893 } else { 1894 for (i=0; i<nzB; i++) { 1895 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1896 else break; 1897 } 1898 imark = i; 1899 } 1900 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1901 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1902 } 1903 } else { 1904 if (idx) *idx = 0; 1905 if (v) *v = 0; 1906 } 1907 } 1908 *nz = nztot; 1909 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1910 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1917 { 1918 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1919 1920 PetscFunctionBegin; 1921 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1922 aij->getrowactive = PETSC_FALSE; 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatNorm_MPIAIJ" 1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1929 { 1930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1931 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1932 PetscErrorCode ierr; 1933 PetscInt i,j,cstart = mat->cmap->rstart; 1934 PetscReal sum = 0.0; 1935 MatScalar *v; 1936 1937 PetscFunctionBegin; 1938 if (aij->size == 1) { 1939 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1940 } else { 1941 if (type == NORM_FROBENIUS) { 1942 v = amat->a; 1943 for (i=0; i<amat->nz; i++) { 1944 #if defined(PETSC_USE_COMPLEX) 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 #else 1947 sum += (*v)*(*v); v++; 1948 #endif 1949 } 1950 v = bmat->a; 1951 for (i=0; i<bmat->nz; i++) { 1952 #if defined(PETSC_USE_COMPLEX) 1953 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1954 #else 1955 sum += (*v)*(*v); v++; 1956 #endif 1957 } 1958 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1959 *norm = PetscSqrtReal(*norm); 1960 } else if (type == NORM_1) { /* max column norm */ 1961 PetscReal *tmp,*tmp2; 1962 PetscInt *jj,*garray = aij->garray; 1963 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1965 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1966 *norm = 0.0; 1967 v = amat->a; jj = amat->j; 1968 for (j=0; j<amat->nz; j++) { 1969 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1970 } 1971 v = bmat->a; jj = bmat->j; 1972 for (j=0; j<bmat->nz; j++) { 1973 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1974 } 1975 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1976 for (j=0; j<mat->cmap->N; j++) { 1977 if (tmp2[j] > *norm) *norm = tmp2[j]; 1978 } 1979 ierr = PetscFree(tmp);CHKERRQ(ierr); 1980 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1981 } else if (type == NORM_INFINITY) { /* max row norm */ 1982 PetscReal ntemp = 0.0; 1983 for (j=0; j<aij->A->rmap->n; j++) { 1984 v = amat->a + amat->i[j]; 1985 sum = 0.0; 1986 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 v = bmat->a + bmat->i[j]; 1990 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1991 sum += PetscAbsScalar(*v); v++; 1992 } 1993 if (sum > ntemp) ntemp = sum; 1994 } 1995 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1996 } else { 1997 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1998 } 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatTranspose_MPIAIJ" 2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2006 { 2007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2008 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2009 PetscErrorCode ierr; 2010 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2011 PetscInt cstart=A->cmap->rstart,ncol; 2012 Mat B; 2013 MatScalar *array; 2014 2015 PetscFunctionBegin; 2016 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2017 2018 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2019 ai = Aloc->i; aj = Aloc->j; 2020 bi = Bloc->i; bj = Bloc->j; 2021 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2022 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2023 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2024 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2025 for (i=0; i<ai[ma]; i++){ 2026 d_nnz[aj[i]] ++; 2027 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2028 } 2029 2030 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2032 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2033 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2034 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2035 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2036 } else { 2037 B = *matout; 2038 } 2039 2040 /* copy over the A part */ 2041 array = Aloc->a; 2042 row = A->rmap->rstart; 2043 for (i=0; i<ma; i++) { 2044 ncol = ai[i+1]-ai[i]; 2045 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2046 row++; array += ncol; aj += ncol; 2047 } 2048 aj = Aloc->j; 2049 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2050 2051 /* copy over the B part */ 2052 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2053 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2054 array = Bloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2057 cols_tmp = cols; 2058 for (i=0; i<mb; i++) { 2059 ncol = bi[i+1]-bi[i]; 2060 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2061 row++; array += ncol; cols_tmp += ncol; 2062 } 2063 ierr = PetscFree(cols);CHKERRQ(ierr); 2064 2065 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2068 *matout = B; 2069 } else { 2070 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2078 { 2079 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2080 Mat a = aij->A,b = aij->B; 2081 PetscErrorCode ierr; 2082 PetscInt s1,s2,s3; 2083 2084 PetscFunctionBegin; 2085 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2086 if (rr) { 2087 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2088 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2089 /* Overlap communication with computation. */ 2090 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2091 } 2092 if (ll) { 2093 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2094 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2095 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2096 } 2097 /* scale the diagonal block */ 2098 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2099 2100 if (rr) { 2101 /* Do a scatter end and then right scale the off-diagonal block */ 2102 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2104 } 2105 2106 PetscFunctionReturn(0); 2107 } 2108 2109 #undef __FUNCT__ 2110 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2111 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2112 { 2113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatEqual_MPIAIJ" 2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2124 { 2125 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2126 Mat a,b,c,d; 2127 PetscBool flg; 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 a = matA->A; b = matA->B; 2132 c = matB->A; d = matB->B; 2133 2134 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2135 if (flg) { 2136 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2137 } 2138 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatCopy_MPIAIJ" 2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2145 { 2146 PetscErrorCode ierr; 2147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2148 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2149 2150 PetscFunctionBegin; 2151 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2152 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2153 /* because of the column compression in the off-processor part of the matrix a->B, 2154 the number of columns in a->B and b->B may be different, hence we cannot call 2155 the MatCopy() directly on the two parts. If need be, we can provide a more 2156 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2157 then copying the submatrices */ 2158 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2159 } else { 2160 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2161 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatSetUp_MPIAIJ" 2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2169 { 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatAXPY_MPIAIJ" 2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2180 { 2181 PetscErrorCode ierr; 2182 PetscInt i; 2183 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2184 PetscBLASInt bnz,one=1; 2185 Mat_SeqAIJ *x,*y; 2186 2187 PetscFunctionBegin; 2188 if (str == SAME_NONZERO_PATTERN) { 2189 PetscScalar alpha = a; 2190 x = (Mat_SeqAIJ *)xx->A->data; 2191 y = (Mat_SeqAIJ *)yy->A->data; 2192 bnz = PetscBLASIntCast(x->nz); 2193 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2194 x = (Mat_SeqAIJ *)xx->B->data; 2195 y = (Mat_SeqAIJ *)yy->B->data; 2196 bnz = PetscBLASIntCast(x->nz); 2197 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2198 } else if (str == SUBSET_NONZERO_PATTERN) { 2199 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2200 2201 x = (Mat_SeqAIJ *)xx->B->data; 2202 y = (Mat_SeqAIJ *)yy->B->data; 2203 if (y->xtoy && y->XtoY != xx->B) { 2204 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2205 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2206 } 2207 if (!y->xtoy) { /* get xtoy */ 2208 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2209 y->XtoY = xx->B; 2210 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2211 } 2212 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2213 } else { 2214 Mat B; 2215 PetscInt *nnz_d,*nnz_o; 2216 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2217 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2218 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2219 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2220 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2221 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2222 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2223 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2224 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2225 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2226 ierr = MatHeaderReplace(Y,B); 2227 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2228 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatConjugate_MPIAIJ" 2237 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2238 { 2239 #if defined(PETSC_USE_COMPLEX) 2240 PetscErrorCode ierr; 2241 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2242 2243 PetscFunctionBegin; 2244 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2245 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2246 #else 2247 PetscFunctionBegin; 2248 #endif 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatRealPart_MPIAIJ" 2254 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2255 { 2256 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2257 PetscErrorCode ierr; 2258 2259 PetscFunctionBegin; 2260 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2261 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2267 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2268 { 2269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 2272 PetscFunctionBegin; 2273 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2274 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #ifdef PETSC_HAVE_PBGL 2279 2280 #include <boost/parallel/mpi/bsp_process_group.hpp> 2281 #include <boost/graph/distributed/ilu_default_graph.hpp> 2282 #include <boost/graph/distributed/ilu_0_block.hpp> 2283 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2284 #include <boost/graph/distributed/petsc/interface.hpp> 2285 #include <boost/multi_array.hpp> 2286 #include <boost/parallel/distributed_property_map->hpp> 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2290 /* 2291 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2292 */ 2293 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2294 { 2295 namespace petsc = boost::distributed::petsc; 2296 2297 namespace graph_dist = boost::graph::distributed; 2298 using boost::graph::distributed::ilu_default::process_group_type; 2299 using boost::graph::ilu_permuted; 2300 2301 PetscBool row_identity, col_identity; 2302 PetscContainer c; 2303 PetscInt m, n, M, N; 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2308 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2309 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2310 if (!row_identity || !col_identity) { 2311 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2312 } 2313 2314 process_group_type pg; 2315 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2316 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2317 lgraph_type& level_graph = *lgraph_p; 2318 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2319 2320 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2321 ilu_permuted(level_graph); 2322 2323 /* put together the new matrix */ 2324 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2325 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2326 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2327 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2328 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2329 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2330 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2331 2332 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2333 ierr = PetscContainerSetPointer(c, lgraph_p); 2334 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2335 ierr = PetscContainerDestroy(&c); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2341 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2342 { 2343 PetscFunctionBegin; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatSolve_MPIAIJ" 2349 /* 2350 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2351 */ 2352 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2353 { 2354 namespace graph_dist = boost::graph::distributed; 2355 2356 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2357 lgraph_type* lgraph_p; 2358 PetscContainer c; 2359 PetscErrorCode ierr; 2360 2361 PetscFunctionBegin; 2362 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2363 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2364 ierr = VecCopy(b, x);CHKERRQ(ierr); 2365 2366 PetscScalar* array_x; 2367 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2368 PetscInt sx; 2369 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2370 2371 PetscScalar* array_b; 2372 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2373 PetscInt sb; 2374 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2375 2376 lgraph_type& level_graph = *lgraph_p; 2377 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2378 2379 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2380 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2381 ref_x(array_x, boost::extents[num_vertices(graph)]); 2382 2383 typedef boost::iterator_property_map<array_ref_type::iterator, 2384 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2385 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2386 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2387 2388 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2389 2390 PetscFunctionReturn(0); 2391 } 2392 #endif 2393 2394 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2395 PetscInt nzlocal,nsends,nrecvs; 2396 PetscMPIInt *send_rank,*recv_rank; 2397 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2398 PetscScalar *sbuf_a,**rbuf_a; 2399 PetscErrorCode (*Destroy)(Mat); 2400 } Mat_Redundant; 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2404 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2405 { 2406 PetscErrorCode ierr; 2407 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2408 PetscInt i; 2409 2410 PetscFunctionBegin; 2411 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2412 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2413 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2414 for (i=0; i<redund->nrecvs; i++){ 2415 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2416 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2417 } 2418 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2419 ierr = PetscFree(redund);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatDestroy_MatRedundant" 2425 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2426 { 2427 PetscErrorCode ierr; 2428 PetscContainer container; 2429 Mat_Redundant *redund=PETSC_NULL; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2433 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2434 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2435 A->ops->destroy = redund->Destroy; 2436 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2437 if (A->ops->destroy) { 2438 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2439 } 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2445 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2446 { 2447 PetscMPIInt rank,size; 2448 MPI_Comm comm=((PetscObject)mat)->comm; 2449 PetscErrorCode ierr; 2450 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2451 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2452 PetscInt *rowrange=mat->rmap->range; 2453 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2454 Mat A=aij->A,B=aij->B,C=*matredundant; 2455 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2456 PetscScalar *sbuf_a; 2457 PetscInt nzlocal=a->nz+b->nz; 2458 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2459 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2460 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2461 MatScalar *aworkA,*aworkB; 2462 PetscScalar *vals; 2463 PetscMPIInt tag1,tag2,tag3,imdex; 2464 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2465 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2466 MPI_Status recv_status,*send_status; 2467 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2468 PetscInt **rbuf_j=PETSC_NULL; 2469 PetscScalar **rbuf_a=PETSC_NULL; 2470 Mat_Redundant *redund=PETSC_NULL; 2471 PetscContainer container; 2472 2473 PetscFunctionBegin; 2474 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2475 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2476 2477 if (reuse == MAT_REUSE_MATRIX) { 2478 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2479 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2480 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2481 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2482 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2483 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2484 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2485 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2486 2487 nsends = redund->nsends; 2488 nrecvs = redund->nrecvs; 2489 send_rank = redund->send_rank; 2490 recv_rank = redund->recv_rank; 2491 sbuf_nz = redund->sbuf_nz; 2492 rbuf_nz = redund->rbuf_nz; 2493 sbuf_j = redund->sbuf_j; 2494 sbuf_a = redund->sbuf_a; 2495 rbuf_j = redund->rbuf_j; 2496 rbuf_a = redund->rbuf_a; 2497 } 2498 2499 if (reuse == MAT_INITIAL_MATRIX){ 2500 PetscMPIInt subrank,subsize; 2501 PetscInt nleftover,np_subcomm; 2502 /* get the destination processors' id send_rank, nsends and nrecvs */ 2503 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2504 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2505 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2506 np_subcomm = size/nsubcomm; 2507 nleftover = size - nsubcomm*np_subcomm; 2508 nsends = 0; nrecvs = 0; 2509 for (i=0; i<size; i++){ /* i=rank*/ 2510 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2511 send_rank[nsends] = i; nsends++; 2512 recv_rank[nrecvs++] = i; 2513 } 2514 } 2515 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2516 i = size-nleftover-1; 2517 j = 0; 2518 while (j < nsubcomm - nleftover){ 2519 send_rank[nsends++] = i; 2520 i--; j++; 2521 } 2522 } 2523 2524 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2525 for (i=0; i<nleftover; i++){ 2526 recv_rank[nrecvs++] = size-nleftover+i; 2527 } 2528 } 2529 2530 /* allocate sbuf_j, sbuf_a */ 2531 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2532 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2533 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2534 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2535 2536 /* copy mat's local entries into the buffers */ 2537 if (reuse == MAT_INITIAL_MATRIX){ 2538 rownz_max = 0; 2539 rptr = sbuf_j; 2540 cols = sbuf_j + rend-rstart + 1; 2541 vals = sbuf_a; 2542 rptr[0] = 0; 2543 for (i=0; i<rend-rstart; i++){ 2544 row = i + rstart; 2545 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2546 ncols = nzA + nzB; 2547 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2548 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2549 /* load the column indices for this row into cols */ 2550 lwrite = 0; 2551 for (l=0; l<nzB; l++) { 2552 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2553 vals[lwrite] = aworkB[l]; 2554 cols[lwrite++] = ctmp; 2555 } 2556 } 2557 for (l=0; l<nzA; l++){ 2558 vals[lwrite] = aworkA[l]; 2559 cols[lwrite++] = cstart + cworkA[l]; 2560 } 2561 for (l=0; l<nzB; l++) { 2562 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2563 vals[lwrite] = aworkB[l]; 2564 cols[lwrite++] = ctmp; 2565 } 2566 } 2567 vals += ncols; 2568 cols += ncols; 2569 rptr[i+1] = rptr[i] + ncols; 2570 if (rownz_max < ncols) rownz_max = ncols; 2571 } 2572 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2573 } else { /* only copy matrix values into sbuf_a */ 2574 rptr = sbuf_j; 2575 vals = sbuf_a; 2576 rptr[0] = 0; 2577 for (i=0; i<rend-rstart; i++){ 2578 row = i + rstart; 2579 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2580 ncols = nzA + nzB; 2581 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2582 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2583 lwrite = 0; 2584 for (l=0; l<nzB; l++) { 2585 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2586 } 2587 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2588 for (l=0; l<nzB; l++) { 2589 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2590 } 2591 vals += ncols; 2592 rptr[i+1] = rptr[i] + ncols; 2593 } 2594 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2595 2596 /* send nzlocal to others, and recv other's nzlocal */ 2597 /*--------------------------------------------------*/ 2598 if (reuse == MAT_INITIAL_MATRIX){ 2599 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2600 s_waits2 = s_waits3 + nsends; 2601 s_waits1 = s_waits2 + nsends; 2602 r_waits1 = s_waits1 + nsends; 2603 r_waits2 = r_waits1 + nrecvs; 2604 r_waits3 = r_waits2 + nrecvs; 2605 } else { 2606 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2607 r_waits3 = s_waits3 + nsends; 2608 } 2609 2610 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2611 if (reuse == MAT_INITIAL_MATRIX){ 2612 /* get new tags to keep the communication clean */ 2613 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2614 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2615 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2616 2617 /* post receives of other's nzlocal */ 2618 for (i=0; i<nrecvs; i++){ 2619 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2620 } 2621 /* send nzlocal to others */ 2622 for (i=0; i<nsends; i++){ 2623 sbuf_nz[i] = nzlocal; 2624 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2625 } 2626 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2627 count = nrecvs; 2628 while (count) { 2629 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2630 recv_rank[imdex] = recv_status.MPI_SOURCE; 2631 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2632 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2633 2634 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2635 rbuf_nz[imdex] += i + 2; 2636 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2637 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2638 count--; 2639 } 2640 /* wait on sends of nzlocal */ 2641 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2642 /* send mat->i,j to others, and recv from other's */ 2643 /*------------------------------------------------*/ 2644 for (i=0; i<nsends; i++){ 2645 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2646 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2647 } 2648 /* wait on receives of mat->i,j */ 2649 /*------------------------------*/ 2650 count = nrecvs; 2651 while (count) { 2652 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2653 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2654 count--; 2655 } 2656 /* wait on sends of mat->i,j */ 2657 /*---------------------------*/ 2658 if (nsends) { 2659 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2660 } 2661 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2662 2663 /* post receives, send and receive mat->a */ 2664 /*----------------------------------------*/ 2665 for (imdex=0; imdex<nrecvs; imdex++) { 2666 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2667 } 2668 for (i=0; i<nsends; i++){ 2669 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2670 } 2671 count = nrecvs; 2672 while (count) { 2673 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2674 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2675 count--; 2676 } 2677 if (nsends) { 2678 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2679 } 2680 2681 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2682 2683 /* create redundant matrix */ 2684 /*-------------------------*/ 2685 if (reuse == MAT_INITIAL_MATRIX){ 2686 /* compute rownz_max for preallocation */ 2687 for (imdex=0; imdex<nrecvs; imdex++){ 2688 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2689 rptr = rbuf_j[imdex]; 2690 for (i=0; i<j; i++){ 2691 ncols = rptr[i+1] - rptr[i]; 2692 if (rownz_max < ncols) rownz_max = ncols; 2693 } 2694 } 2695 2696 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2697 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2698 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2699 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2700 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2701 } else { 2702 C = *matredundant; 2703 } 2704 2705 /* insert local matrix entries */ 2706 rptr = sbuf_j; 2707 cols = sbuf_j + rend-rstart + 1; 2708 vals = sbuf_a; 2709 for (i=0; i<rend-rstart; i++){ 2710 row = i + rstart; 2711 ncols = rptr[i+1] - rptr[i]; 2712 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2713 vals += ncols; 2714 cols += ncols; 2715 } 2716 /* insert received matrix entries */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 rstart = rowrange[recv_rank[imdex]]; 2719 rend = rowrange[recv_rank[imdex]+1]; 2720 rptr = rbuf_j[imdex]; 2721 cols = rbuf_j[imdex] + rend-rstart + 1; 2722 vals = rbuf_a[imdex]; 2723 for (i=0; i<rend-rstart; i++){ 2724 row = i + rstart; 2725 ncols = rptr[i+1] - rptr[i]; 2726 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2727 vals += ncols; 2728 cols += ncols; 2729 } 2730 } 2731 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2734 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2735 if (reuse == MAT_INITIAL_MATRIX) { 2736 PetscContainer container; 2737 *matredundant = C; 2738 /* create a supporting struct and attach it to C for reuse */ 2739 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2740 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2741 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2742 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2743 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2744 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2745 2746 redund->nzlocal = nzlocal; 2747 redund->nsends = nsends; 2748 redund->nrecvs = nrecvs; 2749 redund->send_rank = send_rank; 2750 redund->recv_rank = recv_rank; 2751 redund->sbuf_nz = sbuf_nz; 2752 redund->rbuf_nz = rbuf_nz; 2753 redund->sbuf_j = sbuf_j; 2754 redund->sbuf_a = sbuf_a; 2755 redund->rbuf_j = rbuf_j; 2756 redund->rbuf_a = rbuf_a; 2757 2758 redund->Destroy = C->ops->destroy; 2759 C->ops->destroy = MatDestroy_MatRedundant; 2760 } 2761 PetscFunctionReturn(0); 2762 } 2763 2764 #undef __FUNCT__ 2765 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2766 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2767 { 2768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2769 PetscErrorCode ierr; 2770 PetscInt i,*idxb = 0; 2771 PetscScalar *va,*vb; 2772 Vec vtmp; 2773 2774 PetscFunctionBegin; 2775 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2776 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2777 if (idx) { 2778 for (i=0; i<A->rmap->n; i++) { 2779 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2780 } 2781 } 2782 2783 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2784 if (idx) { 2785 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2786 } 2787 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2788 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2789 2790 for (i=0; i<A->rmap->n; i++){ 2791 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2792 va[i] = vb[i]; 2793 if (idx) idx[i] = a->garray[idxb[i]]; 2794 } 2795 } 2796 2797 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2798 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2799 ierr = PetscFree(idxb);CHKERRQ(ierr); 2800 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2806 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2807 { 2808 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2809 PetscErrorCode ierr; 2810 PetscInt i,*idxb = 0; 2811 PetscScalar *va,*vb; 2812 Vec vtmp; 2813 2814 PetscFunctionBegin; 2815 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2816 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2817 if (idx) { 2818 for (i=0; i<A->cmap->n; i++) { 2819 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2820 } 2821 } 2822 2823 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2824 if (idx) { 2825 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2826 } 2827 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2828 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2829 2830 for (i=0; i<A->rmap->n; i++){ 2831 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2832 va[i] = vb[i]; 2833 if (idx) idx[i] = a->garray[idxb[i]]; 2834 } 2835 } 2836 2837 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2838 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2839 ierr = PetscFree(idxb);CHKERRQ(ierr); 2840 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 #undef __FUNCT__ 2845 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2846 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2847 { 2848 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2849 PetscInt n = A->rmap->n; 2850 PetscInt cstart = A->cmap->rstart; 2851 PetscInt *cmap = mat->garray; 2852 PetscInt *diagIdx, *offdiagIdx; 2853 Vec diagV, offdiagV; 2854 PetscScalar *a, *diagA, *offdiagA; 2855 PetscInt r; 2856 PetscErrorCode ierr; 2857 2858 PetscFunctionBegin; 2859 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2860 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2861 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2862 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2863 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2864 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2865 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2866 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2867 for(r = 0; r < n; ++r) { 2868 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2869 a[r] = diagA[r]; 2870 idx[r] = cstart + diagIdx[r]; 2871 } else { 2872 a[r] = offdiagA[r]; 2873 idx[r] = cmap[offdiagIdx[r]]; 2874 } 2875 } 2876 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2877 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2878 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2879 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2880 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2881 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2887 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2888 { 2889 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2890 PetscInt n = A->rmap->n; 2891 PetscInt cstart = A->cmap->rstart; 2892 PetscInt *cmap = mat->garray; 2893 PetscInt *diagIdx, *offdiagIdx; 2894 Vec diagV, offdiagV; 2895 PetscScalar *a, *diagA, *offdiagA; 2896 PetscInt r; 2897 PetscErrorCode ierr; 2898 2899 PetscFunctionBegin; 2900 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2901 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2902 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2903 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2904 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2906 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2907 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2908 for(r = 0; r < n; ++r) { 2909 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2910 a[r] = diagA[r]; 2911 idx[r] = cstart + diagIdx[r]; 2912 } else { 2913 a[r] = offdiagA[r]; 2914 idx[r] = cmap[offdiagIdx[r]]; 2915 } 2916 } 2917 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2918 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2919 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2920 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2921 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2922 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2928 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2929 { 2930 PetscErrorCode ierr; 2931 Mat *dummy; 2932 2933 PetscFunctionBegin; 2934 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2935 *newmat = *dummy; 2936 ierr = PetscFree(dummy);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2944 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2945 { 2946 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2951 PetscFunctionReturn(0); 2952 } 2953 2954 2955 /* -------------------------------------------------------------------*/ 2956 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2957 MatGetRow_MPIAIJ, 2958 MatRestoreRow_MPIAIJ, 2959 MatMult_MPIAIJ, 2960 /* 4*/ MatMultAdd_MPIAIJ, 2961 MatMultTranspose_MPIAIJ, 2962 MatMultTransposeAdd_MPIAIJ, 2963 #ifdef PETSC_HAVE_PBGL 2964 MatSolve_MPIAIJ, 2965 #else 2966 0, 2967 #endif 2968 0, 2969 0, 2970 /*10*/ 0, 2971 0, 2972 0, 2973 MatSOR_MPIAIJ, 2974 MatTranspose_MPIAIJ, 2975 /*15*/ MatGetInfo_MPIAIJ, 2976 MatEqual_MPIAIJ, 2977 MatGetDiagonal_MPIAIJ, 2978 MatDiagonalScale_MPIAIJ, 2979 MatNorm_MPIAIJ, 2980 /*20*/ MatAssemblyBegin_MPIAIJ, 2981 MatAssemblyEnd_MPIAIJ, 2982 MatSetOption_MPIAIJ, 2983 MatZeroEntries_MPIAIJ, 2984 /*24*/ MatZeroRows_MPIAIJ, 2985 0, 2986 #ifdef PETSC_HAVE_PBGL 2987 0, 2988 #else 2989 0, 2990 #endif 2991 0, 2992 0, 2993 /*29*/ MatSetUp_MPIAIJ, 2994 #ifdef PETSC_HAVE_PBGL 2995 0, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 0, 3002 /*34*/ MatDuplicate_MPIAIJ, 3003 0, 3004 0, 3005 0, 3006 0, 3007 /*39*/ MatAXPY_MPIAIJ, 3008 MatGetSubMatrices_MPIAIJ, 3009 MatIncreaseOverlap_MPIAIJ, 3010 MatGetValues_MPIAIJ, 3011 MatCopy_MPIAIJ, 3012 /*44*/ MatGetRowMax_MPIAIJ, 3013 MatScale_MPIAIJ, 3014 0, 3015 0, 3016 MatZeroRowsColumns_MPIAIJ, 3017 /*49*/ 0, 3018 0, 3019 0, 3020 0, 3021 0, 3022 /*54*/ MatFDColoringCreate_MPIAIJ, 3023 0, 3024 MatSetUnfactored_MPIAIJ, 3025 MatPermute_MPIAIJ, 3026 0, 3027 /*59*/ MatGetSubMatrix_MPIAIJ, 3028 MatDestroy_MPIAIJ, 3029 MatView_MPIAIJ, 3030 0, 3031 0, 3032 /*64*/ 0, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3038 MatGetRowMinAbs_MPIAIJ, 3039 0, 3040 MatSetColoring_MPIAIJ, 3041 #if defined(PETSC_HAVE_ADIC) 3042 MatSetValuesAdic_MPIAIJ, 3043 #else 3044 0, 3045 #endif 3046 MatSetValuesAdifor_MPIAIJ, 3047 /*75*/ MatFDColoringApply_AIJ, 3048 0, 3049 0, 3050 0, 3051 0, 3052 /*80*/ 0, 3053 0, 3054 0, 3055 /*83*/ MatLoad_MPIAIJ, 3056 0, 3057 0, 3058 0, 3059 0, 3060 0, 3061 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3062 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3063 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3064 MatPtAP_Basic, 3065 MatPtAPSymbolic_MPIAIJ, 3066 /*94*/ MatPtAPNumeric_MPIAIJ, 3067 0, 3068 0, 3069 0, 3070 0, 3071 /*99*/ 0, 3072 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3073 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3074 MatConjugate_MPIAIJ, 3075 0, 3076 /*104*/MatSetValuesRow_MPIAIJ, 3077 MatRealPart_MPIAIJ, 3078 MatImaginaryPart_MPIAIJ, 3079 0, 3080 0, 3081 /*109*/0, 3082 MatGetRedundantMatrix_MPIAIJ, 3083 MatGetRowMin_MPIAIJ, 3084 0, 3085 0, 3086 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 /*119*/0, 3092 0, 3093 0, 3094 0, 3095 MatGetMultiProcBlock_MPIAIJ, 3096 /*124*/MatFindNonZeroRows_MPIAIJ, 3097 MatGetColumnNorms_MPIAIJ, 3098 MatInvertBlockDiagonal_MPIAIJ, 3099 0, 3100 MatGetSubMatricesParallel_MPIAIJ, 3101 /*129*/0, 3102 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3103 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3104 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3105 0, 3106 /*134*/0, 3107 0, 3108 0, 3109 0, 3110 0 3111 }; 3112 3113 /* ----------------------------------------------------------------------------------------*/ 3114 3115 EXTERN_C_BEGIN 3116 #undef __FUNCT__ 3117 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3118 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3119 { 3120 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3121 PetscErrorCode ierr; 3122 3123 PetscFunctionBegin; 3124 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3125 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 EXTERN_C_END 3129 3130 EXTERN_C_BEGIN 3131 #undef __FUNCT__ 3132 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3133 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3134 { 3135 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3136 PetscErrorCode ierr; 3137 3138 PetscFunctionBegin; 3139 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3140 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3141 PetscFunctionReturn(0); 3142 } 3143 EXTERN_C_END 3144 3145 EXTERN_C_BEGIN 3146 #undef __FUNCT__ 3147 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3148 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3149 { 3150 Mat_MPIAIJ *b; 3151 PetscErrorCode ierr; 3152 PetscInt i; 3153 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3154 3155 PetscFunctionBegin; 3156 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3157 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3158 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3159 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3160 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3161 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3162 3163 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3164 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3165 if (d_nnz) { 3166 for (i=0; i<B->rmap->n; i++) { 3167 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3168 } 3169 } 3170 if (o_nnz) { 3171 for (i=0; i<B->rmap->n; i++) { 3172 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3173 } 3174 } 3175 b = (Mat_MPIAIJ*)B->data; 3176 3177 if (!B->preallocated) { 3178 /* Explicitly create 2 MATSEQAIJ matrices. */ 3179 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3180 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3181 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3182 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3183 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3184 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3185 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3186 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3187 } 3188 3189 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3190 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3191 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3192 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3193 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3194 B->preallocated = PETSC_TRUE; 3195 PetscFunctionReturn(0); 3196 } 3197 EXTERN_C_END 3198 3199 #undef __FUNCT__ 3200 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3201 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3202 { 3203 Mat mat; 3204 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3205 PetscErrorCode ierr; 3206 3207 PetscFunctionBegin; 3208 *newmat = 0; 3209 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3210 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3211 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3212 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3213 a = (Mat_MPIAIJ*)mat->data; 3214 3215 mat->factortype = matin->factortype; 3216 mat->rmap->bs = matin->rmap->bs; 3217 mat->assembled = PETSC_TRUE; 3218 mat->insertmode = NOT_SET_VALUES; 3219 mat->preallocated = PETSC_TRUE; 3220 3221 a->size = oldmat->size; 3222 a->rank = oldmat->rank; 3223 a->donotstash = oldmat->donotstash; 3224 a->roworiented = oldmat->roworiented; 3225 a->rowindices = 0; 3226 a->rowvalues = 0; 3227 a->getrowactive = PETSC_FALSE; 3228 3229 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3230 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3231 3232 if (oldmat->colmap) { 3233 #if defined (PETSC_USE_CTABLE) 3234 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3235 #else 3236 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3237 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3238 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3239 #endif 3240 } else a->colmap = 0; 3241 if (oldmat->garray) { 3242 PetscInt len; 3243 len = oldmat->B->cmap->n; 3244 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3245 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3246 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3247 } else a->garray = 0; 3248 3249 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3250 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3251 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3253 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3254 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3255 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3257 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3258 *newmat = mat; 3259 PetscFunctionReturn(0); 3260 } 3261 3262 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatLoad_MPIAIJ" 3266 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3267 { 3268 PetscScalar *vals,*svals; 3269 MPI_Comm comm = ((PetscObject)viewer)->comm; 3270 PetscErrorCode ierr; 3271 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3272 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3273 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3274 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3275 PetscInt cend,cstart,n,*rowners,sizesset=1; 3276 int fd; 3277 3278 PetscFunctionBegin; 3279 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3280 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3281 if (!rank) { 3282 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3283 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3284 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3285 } 3286 3287 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3288 3289 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3290 M = header[1]; N = header[2]; 3291 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3292 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3293 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3294 3295 /* If global sizes are set, check if they are consistent with that given in the file */ 3296 if (sizesset) { 3297 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3298 } 3299 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3300 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3301 3302 /* determine ownership of all rows */ 3303 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3304 else m = newMat->rmap->n; /* Set by user */ 3305 3306 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3307 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3308 3309 /* First process needs enough room for process with most rows */ 3310 if (!rank) { 3311 mmax = rowners[1]; 3312 for (i=2; i<size; i++) { 3313 mmax = PetscMax(mmax,rowners[i]); 3314 } 3315 } else mmax = m; 3316 3317 rowners[0] = 0; 3318 for (i=2; i<=size; i++) { 3319 rowners[i] += rowners[i-1]; 3320 } 3321 rstart = rowners[rank]; 3322 rend = rowners[rank+1]; 3323 3324 /* distribute row lengths to all processors */ 3325 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3326 if (!rank) { 3327 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3328 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3329 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3330 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3331 for (j=0; j<m; j++) { 3332 procsnz[0] += ourlens[j]; 3333 } 3334 for (i=1; i<size; i++) { 3335 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3336 /* calculate the number of nonzeros on each processor */ 3337 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3338 procsnz[i] += rowlengths[j]; 3339 } 3340 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3341 } 3342 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3343 } else { 3344 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3345 } 3346 3347 if (!rank) { 3348 /* determine max buffer needed and allocate it */ 3349 maxnz = 0; 3350 for (i=0; i<size; i++) { 3351 maxnz = PetscMax(maxnz,procsnz[i]); 3352 } 3353 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3354 3355 /* read in my part of the matrix column indices */ 3356 nz = procsnz[0]; 3357 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3358 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3359 3360 /* read in every one elses and ship off */ 3361 for (i=1; i<size; i++) { 3362 nz = procsnz[i]; 3363 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3364 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3365 } 3366 ierr = PetscFree(cols);CHKERRQ(ierr); 3367 } else { 3368 /* determine buffer space needed for message */ 3369 nz = 0; 3370 for (i=0; i<m; i++) { 3371 nz += ourlens[i]; 3372 } 3373 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3374 3375 /* receive message of column indices*/ 3376 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3377 } 3378 3379 /* determine column ownership if matrix is not square */ 3380 if (N != M) { 3381 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3382 else n = newMat->cmap->n; 3383 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3384 cstart = cend - n; 3385 } else { 3386 cstart = rstart; 3387 cend = rend; 3388 n = cend - cstart; 3389 } 3390 3391 /* loop over local rows, determining number of off diagonal entries */ 3392 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3393 jj = 0; 3394 for (i=0; i<m; i++) { 3395 for (j=0; j<ourlens[i]; j++) { 3396 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3397 jj++; 3398 } 3399 } 3400 3401 for (i=0; i<m; i++) { 3402 ourlens[i] -= offlens[i]; 3403 } 3404 if (!sizesset) { 3405 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3406 } 3407 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3408 3409 for (i=0; i<m; i++) { 3410 ourlens[i] += offlens[i]; 3411 } 3412 3413 if (!rank) { 3414 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3415 3416 /* read in my part of the matrix numerical values */ 3417 nz = procsnz[0]; 3418 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3419 3420 /* insert into matrix */ 3421 jj = rstart; 3422 smycols = mycols; 3423 svals = vals; 3424 for (i=0; i<m; i++) { 3425 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3426 smycols += ourlens[i]; 3427 svals += ourlens[i]; 3428 jj++; 3429 } 3430 3431 /* read in other processors and ship out */ 3432 for (i=1; i<size; i++) { 3433 nz = procsnz[i]; 3434 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3435 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3436 } 3437 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3438 } else { 3439 /* receive numeric values */ 3440 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3441 3442 /* receive message of values*/ 3443 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3444 3445 /* insert into matrix */ 3446 jj = rstart; 3447 smycols = mycols; 3448 svals = vals; 3449 for (i=0; i<m; i++) { 3450 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3451 smycols += ourlens[i]; 3452 svals += ourlens[i]; 3453 jj++; 3454 } 3455 } 3456 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3457 ierr = PetscFree(vals);CHKERRQ(ierr); 3458 ierr = PetscFree(mycols);CHKERRQ(ierr); 3459 ierr = PetscFree(rowners);CHKERRQ(ierr); 3460 3461 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3462 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 #undef __FUNCT__ 3467 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3468 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3469 { 3470 PetscErrorCode ierr; 3471 IS iscol_local; 3472 PetscInt csize; 3473 3474 PetscFunctionBegin; 3475 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3476 if (call == MAT_REUSE_MATRIX) { 3477 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3478 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3479 } else { 3480 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3481 } 3482 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3483 if (call == MAT_INITIAL_MATRIX) { 3484 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3485 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3486 } 3487 PetscFunctionReturn(0); 3488 } 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3492 /* 3493 Not great since it makes two copies of the submatrix, first an SeqAIJ 3494 in local and then by concatenating the local matrices the end result. 3495 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3496 3497 Note: This requires a sequential iscol with all indices. 3498 */ 3499 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3500 { 3501 PetscErrorCode ierr; 3502 PetscMPIInt rank,size; 3503 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3504 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3505 Mat *local,M,Mreuse; 3506 MatScalar *vwork,*aa; 3507 MPI_Comm comm = ((PetscObject)mat)->comm; 3508 Mat_SeqAIJ *aij; 3509 3510 3511 PetscFunctionBegin; 3512 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3514 3515 if (call == MAT_REUSE_MATRIX) { 3516 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3517 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3518 local = &Mreuse; 3519 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3520 } else { 3521 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3522 Mreuse = *local; 3523 ierr = PetscFree(local);CHKERRQ(ierr); 3524 } 3525 3526 /* 3527 m - number of local rows 3528 n - number of columns (same on all processors) 3529 rstart - first row in new global matrix generated 3530 */ 3531 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3532 if (call == MAT_INITIAL_MATRIX) { 3533 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3534 ii = aij->i; 3535 jj = aij->j; 3536 3537 /* 3538 Determine the number of non-zeros in the diagonal and off-diagonal 3539 portions of the matrix in order to do correct preallocation 3540 */ 3541 3542 /* first get start and end of "diagonal" columns */ 3543 if (csize == PETSC_DECIDE) { 3544 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3545 if (mglobal == n) { /* square matrix */ 3546 nlocal = m; 3547 } else { 3548 nlocal = n/size + ((n % size) > rank); 3549 } 3550 } else { 3551 nlocal = csize; 3552 } 3553 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3554 rstart = rend - nlocal; 3555 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3556 3557 /* next, compute all the lengths */ 3558 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3559 olens = dlens + m; 3560 for (i=0; i<m; i++) { 3561 jend = ii[i+1] - ii[i]; 3562 olen = 0; 3563 dlen = 0; 3564 for (j=0; j<jend; j++) { 3565 if (*jj < rstart || *jj >= rend) olen++; 3566 else dlen++; 3567 jj++; 3568 } 3569 olens[i] = olen; 3570 dlens[i] = dlen; 3571 } 3572 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3573 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3574 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3575 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3576 ierr = PetscFree(dlens);CHKERRQ(ierr); 3577 } else { 3578 PetscInt ml,nl; 3579 3580 M = *newmat; 3581 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3582 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3583 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3584 /* 3585 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3586 rather than the slower MatSetValues(). 3587 */ 3588 M->was_assembled = PETSC_TRUE; 3589 M->assembled = PETSC_FALSE; 3590 } 3591 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3592 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3593 ii = aij->i; 3594 jj = aij->j; 3595 aa = aij->a; 3596 for (i=0; i<m; i++) { 3597 row = rstart + i; 3598 nz = ii[i+1] - ii[i]; 3599 cwork = jj; jj += nz; 3600 vwork = aa; aa += nz; 3601 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3602 } 3603 3604 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3605 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 *newmat = M; 3607 3608 /* save submatrix used in processor for next request */ 3609 if (call == MAT_INITIAL_MATRIX) { 3610 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3611 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3612 } 3613 3614 PetscFunctionReturn(0); 3615 } 3616 3617 EXTERN_C_BEGIN 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3620 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3621 { 3622 PetscInt m,cstart, cend,j,nnz,i,d; 3623 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3624 const PetscInt *JJ; 3625 PetscScalar *values; 3626 PetscErrorCode ierr; 3627 3628 PetscFunctionBegin; 3629 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3630 3631 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3632 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3633 m = B->rmap->n; 3634 cstart = B->cmap->rstart; 3635 cend = B->cmap->rend; 3636 rstart = B->rmap->rstart; 3637 3638 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3639 3640 #if defined(PETSC_USE_DEBUGGING) 3641 for (i=0; i<m; i++) { 3642 nnz = Ii[i+1]- Ii[i]; 3643 JJ = J + Ii[i]; 3644 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3645 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3646 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3647 } 3648 #endif 3649 3650 for (i=0; i<m; i++) { 3651 nnz = Ii[i+1]- Ii[i]; 3652 JJ = J + Ii[i]; 3653 nnz_max = PetscMax(nnz_max,nnz); 3654 d = 0; 3655 for (j=0; j<nnz; j++) { 3656 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3657 } 3658 d_nnz[i] = d; 3659 o_nnz[i] = nnz - d; 3660 } 3661 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3662 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3663 3664 if (v) values = (PetscScalar*)v; 3665 else { 3666 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3667 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3668 } 3669 3670 for (i=0; i<m; i++) { 3671 ii = i + rstart; 3672 nnz = Ii[i+1]- Ii[i]; 3673 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3674 } 3675 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3676 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3677 3678 if (!v) { 3679 ierr = PetscFree(values);CHKERRQ(ierr); 3680 } 3681 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3682 PetscFunctionReturn(0); 3683 } 3684 EXTERN_C_END 3685 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3688 /*@ 3689 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3690 (the default parallel PETSc format). 3691 3692 Collective on MPI_Comm 3693 3694 Input Parameters: 3695 + B - the matrix 3696 . i - the indices into j for the start of each local row (starts with zero) 3697 . j - the column indices for each local row (starts with zero) 3698 - v - optional values in the matrix 3699 3700 Level: developer 3701 3702 Notes: 3703 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3704 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3705 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3706 3707 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3708 3709 The format which is used for the sparse matrix input, is equivalent to a 3710 row-major ordering.. i.e for the following matrix, the input data expected is 3711 as shown: 3712 3713 1 0 0 3714 2 0 3 P0 3715 ------- 3716 4 5 6 P1 3717 3718 Process0 [P0]: rows_owned=[0,1] 3719 i = {0,1,3} [size = nrow+1 = 2+1] 3720 j = {0,0,2} [size = nz = 6] 3721 v = {1,2,3} [size = nz = 6] 3722 3723 Process1 [P1]: rows_owned=[2] 3724 i = {0,3} [size = nrow+1 = 1+1] 3725 j = {0,1,2} [size = nz = 6] 3726 v = {4,5,6} [size = nz = 6] 3727 3728 .keywords: matrix, aij, compressed row, sparse, parallel 3729 3730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3731 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3732 @*/ 3733 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3734 { 3735 PetscErrorCode ierr; 3736 3737 PetscFunctionBegin; 3738 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3739 PetscFunctionReturn(0); 3740 } 3741 3742 #undef __FUNCT__ 3743 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3744 /*@C 3745 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3746 (the default parallel PETSc format). For good matrix assembly performance 3747 the user should preallocate the matrix storage by setting the parameters 3748 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3749 performance can be increased by more than a factor of 50. 3750 3751 Collective on MPI_Comm 3752 3753 Input Parameters: 3754 + A - the matrix 3755 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3756 (same value is used for all local rows) 3757 . d_nnz - array containing the number of nonzeros in the various rows of the 3758 DIAGONAL portion of the local submatrix (possibly different for each row) 3759 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3760 The size of this array is equal to the number of local rows, i.e 'm'. 3761 For matrices that will be factored, you must leave room for (and set) 3762 the diagonal entry even if it is zero. 3763 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3764 submatrix (same value is used for all local rows). 3765 - o_nnz - array containing the number of nonzeros in the various rows of the 3766 OFF-DIAGONAL portion of the local submatrix (possibly different for 3767 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3768 structure. The size of this array is equal to the number 3769 of local rows, i.e 'm'. 3770 3771 If the *_nnz parameter is given then the *_nz parameter is ignored 3772 3773 The AIJ format (also called the Yale sparse matrix format or 3774 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3775 storage. The stored row and column indices begin with zero. 3776 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3777 3778 The parallel matrix is partitioned such that the first m0 rows belong to 3779 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3780 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3781 3782 The DIAGONAL portion of the local submatrix of a processor can be defined 3783 as the submatrix which is obtained by extraction the part corresponding to 3784 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3785 first row that belongs to the processor, r2 is the last row belonging to 3786 the this processor, and c1-c2 is range of indices of the local part of a 3787 vector suitable for applying the matrix to. This is an mxn matrix. In the 3788 common case of a square matrix, the row and column ranges are the same and 3789 the DIAGONAL part is also square. The remaining portion of the local 3790 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3791 3792 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3793 3794 You can call MatGetInfo() to get information on how effective the preallocation was; 3795 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3796 You can also run with the option -info and look for messages with the string 3797 malloc in them to see if additional memory allocation was needed. 3798 3799 Example usage: 3800 3801 Consider the following 8x8 matrix with 34 non-zero values, that is 3802 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3803 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3804 as follows: 3805 3806 .vb 3807 1 2 0 | 0 3 0 | 0 4 3808 Proc0 0 5 6 | 7 0 0 | 8 0 3809 9 0 10 | 11 0 0 | 12 0 3810 ------------------------------------- 3811 13 0 14 | 15 16 17 | 0 0 3812 Proc1 0 18 0 | 19 20 21 | 0 0 3813 0 0 0 | 22 23 0 | 24 0 3814 ------------------------------------- 3815 Proc2 25 26 27 | 0 0 28 | 29 0 3816 30 0 0 | 31 32 33 | 0 34 3817 .ve 3818 3819 This can be represented as a collection of submatrices as: 3820 3821 .vb 3822 A B C 3823 D E F 3824 G H I 3825 .ve 3826 3827 Where the submatrices A,B,C are owned by proc0, D,E,F are 3828 owned by proc1, G,H,I are owned by proc2. 3829 3830 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3831 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3832 The 'M','N' parameters are 8,8, and have the same values on all procs. 3833 3834 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3835 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3836 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3837 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3838 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3839 matrix, ans [DF] as another SeqAIJ matrix. 3840 3841 When d_nz, o_nz parameters are specified, d_nz storage elements are 3842 allocated for every row of the local diagonal submatrix, and o_nz 3843 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3844 One way to choose d_nz and o_nz is to use the max nonzerors per local 3845 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3846 In this case, the values of d_nz,o_nz are: 3847 .vb 3848 proc0 : dnz = 2, o_nz = 2 3849 proc1 : dnz = 3, o_nz = 2 3850 proc2 : dnz = 1, o_nz = 4 3851 .ve 3852 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3853 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3854 for proc3. i.e we are using 12+15+10=37 storage locations to store 3855 34 values. 3856 3857 When d_nnz, o_nnz parameters are specified, the storage is specified 3858 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3859 In the above case the values for d_nnz,o_nnz are: 3860 .vb 3861 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3862 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3863 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3864 .ve 3865 Here the space allocated is sum of all the above values i.e 34, and 3866 hence pre-allocation is perfect. 3867 3868 Level: intermediate 3869 3870 .keywords: matrix, aij, compressed row, sparse, parallel 3871 3872 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3873 MPIAIJ, MatGetInfo() 3874 @*/ 3875 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3876 { 3877 PetscErrorCode ierr; 3878 3879 PetscFunctionBegin; 3880 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3881 PetscValidType(B,1); 3882 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3883 PetscFunctionReturn(0); 3884 } 3885 3886 #undef __FUNCT__ 3887 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3888 /*@ 3889 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3890 CSR format the local rows. 3891 3892 Collective on MPI_Comm 3893 3894 Input Parameters: 3895 + comm - MPI communicator 3896 . m - number of local rows (Cannot be PETSC_DECIDE) 3897 . n - This value should be the same as the local size used in creating the 3898 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3899 calculated if N is given) For square matrices n is almost always m. 3900 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3901 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3902 . i - row indices 3903 . j - column indices 3904 - a - matrix values 3905 3906 Output Parameter: 3907 . mat - the matrix 3908 3909 Level: intermediate 3910 3911 Notes: 3912 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3913 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3914 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3915 3916 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3917 3918 The format which is used for the sparse matrix input, is equivalent to a 3919 row-major ordering.. i.e for the following matrix, the input data expected is 3920 as shown: 3921 3922 1 0 0 3923 2 0 3 P0 3924 ------- 3925 4 5 6 P1 3926 3927 Process0 [P0]: rows_owned=[0,1] 3928 i = {0,1,3} [size = nrow+1 = 2+1] 3929 j = {0,0,2} [size = nz = 6] 3930 v = {1,2,3} [size = nz = 6] 3931 3932 Process1 [P1]: rows_owned=[2] 3933 i = {0,3} [size = nrow+1 = 1+1] 3934 j = {0,1,2} [size = nz = 6] 3935 v = {4,5,6} [size = nz = 6] 3936 3937 .keywords: matrix, aij, compressed row, sparse, parallel 3938 3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3940 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3941 @*/ 3942 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3948 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3949 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3950 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3951 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3952 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3953 PetscFunctionReturn(0); 3954 } 3955 3956 #undef __FUNCT__ 3957 #define __FUNCT__ "MatCreateAIJ" 3958 /*@C 3959 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3960 (the default parallel PETSc format). For good matrix assembly performance 3961 the user should preallocate the matrix storage by setting the parameters 3962 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3963 performance can be increased by more than a factor of 50. 3964 3965 Collective on MPI_Comm 3966 3967 Input Parameters: 3968 + comm - MPI communicator 3969 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3970 This value should be the same as the local size used in creating the 3971 y vector for the matrix-vector product y = Ax. 3972 . n - This value should be the same as the local size used in creating the 3973 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3974 calculated if N is given) For square matrices n is almost always m. 3975 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3976 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3977 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3978 (same value is used for all local rows) 3979 . d_nnz - array containing the number of nonzeros in the various rows of the 3980 DIAGONAL portion of the local submatrix (possibly different for each row) 3981 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3982 The size of this array is equal to the number of local rows, i.e 'm'. 3983 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3984 submatrix (same value is used for all local rows). 3985 - o_nnz - array containing the number of nonzeros in the various rows of the 3986 OFF-DIAGONAL portion of the local submatrix (possibly different for 3987 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3988 structure. The size of this array is equal to the number 3989 of local rows, i.e 'm'. 3990 3991 Output Parameter: 3992 . A - the matrix 3993 3994 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3995 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3996 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3997 3998 Notes: 3999 If the *_nnz parameter is given then the *_nz parameter is ignored 4000 4001 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4002 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4003 storage requirements for this matrix. 4004 4005 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4006 processor than it must be used on all processors that share the object for 4007 that argument. 4008 4009 The user MUST specify either the local or global matrix dimensions 4010 (possibly both). 4011 4012 The parallel matrix is partitioned across processors such that the 4013 first m0 rows belong to process 0, the next m1 rows belong to 4014 process 1, the next m2 rows belong to process 2 etc.. where 4015 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4016 values corresponding to [m x N] submatrix. 4017 4018 The columns are logically partitioned with the n0 columns belonging 4019 to 0th partition, the next n1 columns belonging to the next 4020 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4021 4022 The DIAGONAL portion of the local submatrix on any given processor 4023 is the submatrix corresponding to the rows and columns m,n 4024 corresponding to the given processor. i.e diagonal matrix on 4025 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4026 etc. The remaining portion of the local submatrix [m x (N-n)] 4027 constitute the OFF-DIAGONAL portion. The example below better 4028 illustrates this concept. 4029 4030 For a square global matrix we define each processor's diagonal portion 4031 to be its local rows and the corresponding columns (a square submatrix); 4032 each processor's off-diagonal portion encompasses the remainder of the 4033 local matrix (a rectangular submatrix). 4034 4035 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4036 4037 When calling this routine with a single process communicator, a matrix of 4038 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4039 type of communicator, use the construction mechanism: 4040 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4041 4042 By default, this format uses inodes (identical nodes) when possible. 4043 We search for consecutive rows with the same nonzero structure, thereby 4044 reusing matrix information to achieve increased efficiency. 4045 4046 Options Database Keys: 4047 + -mat_no_inode - Do not use inodes 4048 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4049 - -mat_aij_oneindex - Internally use indexing starting at 1 4050 rather than 0. Note that when calling MatSetValues(), 4051 the user still MUST index entries starting at 0! 4052 4053 4054 Example usage: 4055 4056 Consider the following 8x8 matrix with 34 non-zero values, that is 4057 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4058 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4059 as follows: 4060 4061 .vb 4062 1 2 0 | 0 3 0 | 0 4 4063 Proc0 0 5 6 | 7 0 0 | 8 0 4064 9 0 10 | 11 0 0 | 12 0 4065 ------------------------------------- 4066 13 0 14 | 15 16 17 | 0 0 4067 Proc1 0 18 0 | 19 20 21 | 0 0 4068 0 0 0 | 22 23 0 | 24 0 4069 ------------------------------------- 4070 Proc2 25 26 27 | 0 0 28 | 29 0 4071 30 0 0 | 31 32 33 | 0 34 4072 .ve 4073 4074 This can be represented as a collection of submatrices as: 4075 4076 .vb 4077 A B C 4078 D E F 4079 G H I 4080 .ve 4081 4082 Where the submatrices A,B,C are owned by proc0, D,E,F are 4083 owned by proc1, G,H,I are owned by proc2. 4084 4085 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4086 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4087 The 'M','N' parameters are 8,8, and have the same values on all procs. 4088 4089 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4090 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4091 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4092 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4093 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4094 matrix, ans [DF] as another SeqAIJ matrix. 4095 4096 When d_nz, o_nz parameters are specified, d_nz storage elements are 4097 allocated for every row of the local diagonal submatrix, and o_nz 4098 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4099 One way to choose d_nz and o_nz is to use the max nonzerors per local 4100 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4101 In this case, the values of d_nz,o_nz are: 4102 .vb 4103 proc0 : dnz = 2, o_nz = 2 4104 proc1 : dnz = 3, o_nz = 2 4105 proc2 : dnz = 1, o_nz = 4 4106 .ve 4107 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4108 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4109 for proc3. i.e we are using 12+15+10=37 storage locations to store 4110 34 values. 4111 4112 When d_nnz, o_nnz parameters are specified, the storage is specified 4113 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4114 In the above case the values for d_nnz,o_nnz are: 4115 .vb 4116 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4117 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4118 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4119 .ve 4120 Here the space allocated is sum of all the above values i.e 34, and 4121 hence pre-allocation is perfect. 4122 4123 Level: intermediate 4124 4125 .keywords: matrix, aij, compressed row, sparse, parallel 4126 4127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4128 MPIAIJ, MatCreateMPIAIJWithArrays() 4129 @*/ 4130 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4131 { 4132 PetscErrorCode ierr; 4133 PetscMPIInt size; 4134 4135 PetscFunctionBegin; 4136 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4137 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4138 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4139 if (size > 1) { 4140 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4141 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4142 } else { 4143 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4144 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4145 } 4146 PetscFunctionReturn(0); 4147 } 4148 4149 #undef __FUNCT__ 4150 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4151 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4152 { 4153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4154 4155 PetscFunctionBegin; 4156 *Ad = a->A; 4157 *Ao = a->B; 4158 *colmap = a->garray; 4159 PetscFunctionReturn(0); 4160 } 4161 4162 #undef __FUNCT__ 4163 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4164 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4165 { 4166 PetscErrorCode ierr; 4167 PetscInt i; 4168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4169 4170 PetscFunctionBegin; 4171 if (coloring->ctype == IS_COLORING_GLOBAL) { 4172 ISColoringValue *allcolors,*colors; 4173 ISColoring ocoloring; 4174 4175 /* set coloring for diagonal portion */ 4176 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4177 4178 /* set coloring for off-diagonal portion */ 4179 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4180 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4181 for (i=0; i<a->B->cmap->n; i++) { 4182 colors[i] = allcolors[a->garray[i]]; 4183 } 4184 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4185 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4186 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4187 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4188 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4189 ISColoringValue *colors; 4190 PetscInt *larray; 4191 ISColoring ocoloring; 4192 4193 /* set coloring for diagonal portion */ 4194 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4195 for (i=0; i<a->A->cmap->n; i++) { 4196 larray[i] = i + A->cmap->rstart; 4197 } 4198 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4199 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4200 for (i=0; i<a->A->cmap->n; i++) { 4201 colors[i] = coloring->colors[larray[i]]; 4202 } 4203 ierr = PetscFree(larray);CHKERRQ(ierr); 4204 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4205 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4206 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4207 4208 /* set coloring for off-diagonal portion */ 4209 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4210 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4211 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4212 for (i=0; i<a->B->cmap->n; i++) { 4213 colors[i] = coloring->colors[larray[i]]; 4214 } 4215 ierr = PetscFree(larray);CHKERRQ(ierr); 4216 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4217 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4218 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4219 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4220 4221 PetscFunctionReturn(0); 4222 } 4223 4224 #if defined(PETSC_HAVE_ADIC) 4225 #undef __FUNCT__ 4226 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4227 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4228 { 4229 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4230 PetscErrorCode ierr; 4231 4232 PetscFunctionBegin; 4233 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4234 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4235 PetscFunctionReturn(0); 4236 } 4237 #endif 4238 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4241 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4242 { 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 PetscErrorCode ierr; 4245 4246 PetscFunctionBegin; 4247 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4248 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4249 PetscFunctionReturn(0); 4250 } 4251 4252 #undef __FUNCT__ 4253 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4254 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4255 { 4256 PetscErrorCode ierr; 4257 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum; 4258 PetscInt *indx; 4259 4260 PetscFunctionBegin; 4261 /* This routine will ONLY return MPIAIJ type matrix */ 4262 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4263 if (n == PETSC_DECIDE){ 4264 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4265 } 4266 /* Check sum(n) = N */ 4267 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4268 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4269 4270 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4271 rstart -= m; 4272 4273 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4274 for (i=0;i<m;i++) { 4275 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4276 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4277 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4278 } 4279 4280 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4281 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4282 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4283 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4284 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4285 PetscFunctionReturn(0); 4286 } 4287 4288 #undef __FUNCT__ 4289 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4290 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4291 { 4292 PetscErrorCode ierr; 4293 PetscInt m,N,i,rstart,nnz,Ii; 4294 PetscInt *indx; 4295 PetscScalar *values; 4296 4297 PetscFunctionBegin; 4298 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4299 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4300 for (i=0;i<m;i++) { 4301 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4302 Ii = i + rstart; 4303 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4304 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4305 } 4306 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4307 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4308 PetscFunctionReturn(0); 4309 } 4310 4311 #undef __FUNCT__ 4312 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4313 /*@ 4314 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4315 matrices from each processor 4316 4317 Collective on MPI_Comm 4318 4319 Input Parameters: 4320 + comm - the communicators the parallel matrix will live on 4321 . inmat - the input sequential matrices 4322 . n - number of local columns (or PETSC_DECIDE) 4323 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4324 4325 Output Parameter: 4326 . outmat - the parallel matrix generated 4327 4328 Level: advanced 4329 4330 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4331 4332 @*/ 4333 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4334 { 4335 PetscErrorCode ierr; 4336 4337 PetscFunctionBegin; 4338 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4339 if (scall == MAT_INITIAL_MATRIX){ 4340 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4341 } 4342 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4343 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "MatFileSplit" 4349 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4350 { 4351 PetscErrorCode ierr; 4352 PetscMPIInt rank; 4353 PetscInt m,N,i,rstart,nnz; 4354 size_t len; 4355 const PetscInt *indx; 4356 PetscViewer out; 4357 char *name; 4358 Mat B; 4359 const PetscScalar *values; 4360 4361 PetscFunctionBegin; 4362 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4363 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4364 /* Should this be the type of the diagonal block of A? */ 4365 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4366 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4367 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4368 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4369 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4370 for (i=0;i<m;i++) { 4371 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4372 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4373 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4374 } 4375 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4376 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4377 4378 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4379 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4380 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4381 sprintf(name,"%s.%d",outfile,rank); 4382 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4383 ierr = PetscFree(name); 4384 ierr = MatView(B,out);CHKERRQ(ierr); 4385 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4386 ierr = MatDestroy(&B);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 4390 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4393 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4394 { 4395 PetscErrorCode ierr; 4396 Mat_Merge_SeqsToMPI *merge; 4397 PetscContainer container; 4398 4399 PetscFunctionBegin; 4400 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4401 if (container) { 4402 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4403 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4404 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4405 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4406 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4407 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4408 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4409 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4410 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4411 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4412 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4415 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4416 ierr = PetscFree(merge);CHKERRQ(ierr); 4417 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4418 } 4419 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #include <../src/mat/utils/freespace.h> 4424 #include <petscbt.h> 4425 4426 #undef __FUNCT__ 4427 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4428 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4429 { 4430 PetscErrorCode ierr; 4431 MPI_Comm comm=((PetscObject)mpimat)->comm; 4432 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4433 PetscMPIInt size,rank,taga,*len_s; 4434 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4435 PetscInt proc,m; 4436 PetscInt **buf_ri,**buf_rj; 4437 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4438 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4439 MPI_Request *s_waits,*r_waits; 4440 MPI_Status *status; 4441 MatScalar *aa=a->a; 4442 MatScalar **abuf_r,*ba_i; 4443 Mat_Merge_SeqsToMPI *merge; 4444 PetscContainer container; 4445 4446 PetscFunctionBegin; 4447 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4448 4449 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4450 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4451 4452 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4453 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4454 4455 bi = merge->bi; 4456 bj = merge->bj; 4457 buf_ri = merge->buf_ri; 4458 buf_rj = merge->buf_rj; 4459 4460 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4461 owners = merge->rowmap->range; 4462 len_s = merge->len_s; 4463 4464 /* send and recv matrix values */ 4465 /*-----------------------------*/ 4466 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4467 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4468 4469 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4470 for (proc=0,k=0; proc<size; proc++){ 4471 if (!len_s[proc]) continue; 4472 i = owners[proc]; 4473 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4474 k++; 4475 } 4476 4477 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4478 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4479 ierr = PetscFree(status);CHKERRQ(ierr); 4480 4481 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4482 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4483 4484 /* insert mat values of mpimat */ 4485 /*----------------------------*/ 4486 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4487 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4488 4489 for (k=0; k<merge->nrecv; k++){ 4490 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4491 nrows = *(buf_ri_k[k]); 4492 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4493 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4494 } 4495 4496 /* set values of ba */ 4497 m = merge->rowmap->n; 4498 for (i=0; i<m; i++) { 4499 arow = owners[rank] + i; 4500 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4501 bnzi = bi[i+1] - bi[i]; 4502 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4503 4504 /* add local non-zero vals of this proc's seqmat into ba */ 4505 anzi = ai[arow+1] - ai[arow]; 4506 aj = a->j + ai[arow]; 4507 aa = a->a + ai[arow]; 4508 nextaj = 0; 4509 for (j=0; nextaj<anzi; j++){ 4510 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4511 ba_i[j] += aa[nextaj++]; 4512 } 4513 } 4514 4515 /* add received vals into ba */ 4516 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4517 /* i-th row */ 4518 if (i == *nextrow[k]) { 4519 anzi = *(nextai[k]+1) - *nextai[k]; 4520 aj = buf_rj[k] + *(nextai[k]); 4521 aa = abuf_r[k] + *(nextai[k]); 4522 nextaj = 0; 4523 for (j=0; nextaj<anzi; j++){ 4524 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4525 ba_i[j] += aa[nextaj++]; 4526 } 4527 } 4528 nextrow[k]++; nextai[k]++; 4529 } 4530 } 4531 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4532 } 4533 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4534 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4535 4536 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4537 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4538 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4539 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4540 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4541 PetscFunctionReturn(0); 4542 } 4543 4544 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4545 4546 #undef __FUNCT__ 4547 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4548 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4549 { 4550 PetscErrorCode ierr; 4551 Mat B_mpi; 4552 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4553 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4554 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4555 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4556 PetscInt len,proc,*dnz,*onz; 4557 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4558 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4559 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4560 MPI_Status *status; 4561 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4562 PetscBT lnkbt; 4563 Mat_Merge_SeqsToMPI *merge; 4564 PetscContainer container; 4565 4566 PetscFunctionBegin; 4567 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4568 4569 /* make sure it is a PETSc comm */ 4570 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4571 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4572 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4573 4574 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4575 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4576 4577 /* determine row ownership */ 4578 /*---------------------------------------------------------*/ 4579 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4580 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4581 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4582 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4583 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4584 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4585 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4586 4587 m = merge->rowmap->n; 4588 M = merge->rowmap->N; 4589 owners = merge->rowmap->range; 4590 4591 /* determine the number of messages to send, their lengths */ 4592 /*---------------------------------------------------------*/ 4593 len_s = merge->len_s; 4594 4595 len = 0; /* length of buf_si[] */ 4596 merge->nsend = 0; 4597 for (proc=0; proc<size; proc++){ 4598 len_si[proc] = 0; 4599 if (proc == rank){ 4600 len_s[proc] = 0; 4601 } else { 4602 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4603 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4604 } 4605 if (len_s[proc]) { 4606 merge->nsend++; 4607 nrows = 0; 4608 for (i=owners[proc]; i<owners[proc+1]; i++){ 4609 if (ai[i+1] > ai[i]) nrows++; 4610 } 4611 len_si[proc] = 2*(nrows+1); 4612 len += len_si[proc]; 4613 } 4614 } 4615 4616 /* determine the number and length of messages to receive for ij-structure */ 4617 /*-------------------------------------------------------------------------*/ 4618 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4619 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4620 4621 /* post the Irecv of j-structure */ 4622 /*-------------------------------*/ 4623 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4624 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4625 4626 /* post the Isend of j-structure */ 4627 /*--------------------------------*/ 4628 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4629 4630 for (proc=0, k=0; proc<size; proc++){ 4631 if (!len_s[proc]) continue; 4632 i = owners[proc]; 4633 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4634 k++; 4635 } 4636 4637 /* receives and sends of j-structure are complete */ 4638 /*------------------------------------------------*/ 4639 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4640 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4641 4642 /* send and recv i-structure */ 4643 /*---------------------------*/ 4644 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4645 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4646 4647 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4648 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4649 for (proc=0,k=0; proc<size; proc++){ 4650 if (!len_s[proc]) continue; 4651 /* form outgoing message for i-structure: 4652 buf_si[0]: nrows to be sent 4653 [1:nrows]: row index (global) 4654 [nrows+1:2*nrows+1]: i-structure index 4655 */ 4656 /*-------------------------------------------*/ 4657 nrows = len_si[proc]/2 - 1; 4658 buf_si_i = buf_si + nrows+1; 4659 buf_si[0] = nrows; 4660 buf_si_i[0] = 0; 4661 nrows = 0; 4662 for (i=owners[proc]; i<owners[proc+1]; i++){ 4663 anzi = ai[i+1] - ai[i]; 4664 if (anzi) { 4665 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4666 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4667 nrows++; 4668 } 4669 } 4670 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4671 k++; 4672 buf_si += len_si[proc]; 4673 } 4674 4675 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4676 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4677 4678 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4679 for (i=0; i<merge->nrecv; i++){ 4680 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4681 } 4682 4683 ierr = PetscFree(len_si);CHKERRQ(ierr); 4684 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4685 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4686 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4687 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4688 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4689 ierr = PetscFree(status);CHKERRQ(ierr); 4690 4691 /* compute a local seq matrix in each processor */ 4692 /*----------------------------------------------*/ 4693 /* allocate bi array and free space for accumulating nonzero column info */ 4694 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4695 bi[0] = 0; 4696 4697 /* create and initialize a linked list */ 4698 nlnk = N+1; 4699 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4700 4701 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4702 len = 0; 4703 len = ai[owners[rank+1]] - ai[owners[rank]]; 4704 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4705 current_space = free_space; 4706 4707 /* determine symbolic info for each local row */ 4708 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4709 4710 for (k=0; k<merge->nrecv; k++){ 4711 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4712 nrows = *buf_ri_k[k]; 4713 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4714 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4715 } 4716 4717 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4718 len = 0; 4719 for (i=0;i<m;i++) { 4720 bnzi = 0; 4721 /* add local non-zero cols of this proc's seqmat into lnk */ 4722 arow = owners[rank] + i; 4723 anzi = ai[arow+1] - ai[arow]; 4724 aj = a->j + ai[arow]; 4725 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4726 bnzi += nlnk; 4727 /* add received col data into lnk */ 4728 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4729 if (i == *nextrow[k]) { /* i-th row */ 4730 anzi = *(nextai[k]+1) - *nextai[k]; 4731 aj = buf_rj[k] + *nextai[k]; 4732 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4733 bnzi += nlnk; 4734 nextrow[k]++; nextai[k]++; 4735 } 4736 } 4737 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4738 4739 /* if free space is not available, make more free space */ 4740 if (current_space->local_remaining<bnzi) { 4741 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4742 nspacedouble++; 4743 } 4744 /* copy data into free space, then initialize lnk */ 4745 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4746 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4747 4748 current_space->array += bnzi; 4749 current_space->local_used += bnzi; 4750 current_space->local_remaining -= bnzi; 4751 4752 bi[i+1] = bi[i] + bnzi; 4753 } 4754 4755 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4756 4757 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4758 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4759 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4760 4761 /* create symbolic parallel matrix B_mpi */ 4762 /*---------------------------------------*/ 4763 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4764 if (n==PETSC_DECIDE) { 4765 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4766 } else { 4767 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4768 } 4769 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4770 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4771 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4772 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4773 4774 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4775 B_mpi->assembled = PETSC_FALSE; 4776 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4777 merge->bi = bi; 4778 merge->bj = bj; 4779 merge->buf_ri = buf_ri; 4780 merge->buf_rj = buf_rj; 4781 merge->coi = PETSC_NULL; 4782 merge->coj = PETSC_NULL; 4783 merge->owners_co = PETSC_NULL; 4784 4785 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4786 4787 /* attach the supporting struct to B_mpi for reuse */ 4788 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4789 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4790 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4791 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4792 *mpimat = B_mpi; 4793 4794 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4795 PetscFunctionReturn(0); 4796 } 4797 4798 #undef __FUNCT__ 4799 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4800 /*@C 4801 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4802 matrices from each processor 4803 4804 Collective on MPI_Comm 4805 4806 Input Parameters: 4807 + comm - the communicators the parallel matrix will live on 4808 . seqmat - the input sequential matrices 4809 . m - number of local rows (or PETSC_DECIDE) 4810 . n - number of local columns (or PETSC_DECIDE) 4811 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4812 4813 Output Parameter: 4814 . mpimat - the parallel matrix generated 4815 4816 Level: advanced 4817 4818 Notes: 4819 The dimensions of the sequential matrix in each processor MUST be the same. 4820 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4821 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4822 @*/ 4823 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4824 { 4825 PetscErrorCode ierr; 4826 PetscMPIInt size; 4827 4828 PetscFunctionBegin; 4829 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4830 if (size == 1){ 4831 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4832 if (scall == MAT_INITIAL_MATRIX){ 4833 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4834 } else { 4835 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4836 } 4837 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4838 PetscFunctionReturn(0); 4839 } 4840 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4841 if (scall == MAT_INITIAL_MATRIX){ 4842 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4843 } 4844 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4845 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4846 PetscFunctionReturn(0); 4847 } 4848 4849 #undef __FUNCT__ 4850 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4851 /*@ 4852 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4853 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4854 with MatGetSize() 4855 4856 Not Collective 4857 4858 Input Parameters: 4859 + A - the matrix 4860 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4861 4862 Output Parameter: 4863 . A_loc - the local sequential matrix generated 4864 4865 Level: developer 4866 4867 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4868 4869 @*/ 4870 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4871 { 4872 PetscErrorCode ierr; 4873 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4874 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4875 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4876 MatScalar *aa=a->a,*ba=b->a,*cam; 4877 PetscScalar *ca; 4878 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4879 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4880 PetscBool match; 4881 4882 PetscFunctionBegin; 4883 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4884 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4885 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4886 if (scall == MAT_INITIAL_MATRIX){ 4887 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4888 ci[0] = 0; 4889 for (i=0; i<am; i++){ 4890 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4891 } 4892 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4893 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4894 k = 0; 4895 for (i=0; i<am; i++) { 4896 ncols_o = bi[i+1] - bi[i]; 4897 ncols_d = ai[i+1] - ai[i]; 4898 /* off-diagonal portion of A */ 4899 for (jo=0; jo<ncols_o; jo++) { 4900 col = cmap[*bj]; 4901 if (col >= cstart) break; 4902 cj[k] = col; bj++; 4903 ca[k++] = *ba++; 4904 } 4905 /* diagonal portion of A */ 4906 for (j=0; j<ncols_d; j++) { 4907 cj[k] = cstart + *aj++; 4908 ca[k++] = *aa++; 4909 } 4910 /* off-diagonal portion of A */ 4911 for (j=jo; j<ncols_o; j++) { 4912 cj[k] = cmap[*bj++]; 4913 ca[k++] = *ba++; 4914 } 4915 } 4916 /* put together the new matrix */ 4917 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4918 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4919 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4920 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4921 mat->free_a = PETSC_TRUE; 4922 mat->free_ij = PETSC_TRUE; 4923 mat->nonew = 0; 4924 } else if (scall == MAT_REUSE_MATRIX){ 4925 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4926 ci = mat->i; cj = mat->j; cam = mat->a; 4927 for (i=0; i<am; i++) { 4928 /* off-diagonal portion of A */ 4929 ncols_o = bi[i+1] - bi[i]; 4930 for (jo=0; jo<ncols_o; jo++) { 4931 col = cmap[*bj]; 4932 if (col >= cstart) break; 4933 *cam++ = *ba++; bj++; 4934 } 4935 /* diagonal portion of A */ 4936 ncols_d = ai[i+1] - ai[i]; 4937 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4938 /* off-diagonal portion of A */ 4939 for (j=jo; j<ncols_o; j++) { 4940 *cam++ = *ba++; bj++; 4941 } 4942 } 4943 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4944 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4945 PetscFunctionReturn(0); 4946 } 4947 4948 #undef __FUNCT__ 4949 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4950 /*@C 4951 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4952 4953 Not Collective 4954 4955 Input Parameters: 4956 + A - the matrix 4957 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4958 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4959 4960 Output Parameter: 4961 . A_loc - the local sequential matrix generated 4962 4963 Level: developer 4964 4965 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4966 4967 @*/ 4968 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4969 { 4970 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4971 PetscErrorCode ierr; 4972 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4973 IS isrowa,iscola; 4974 Mat *aloc; 4975 PetscBool match; 4976 4977 PetscFunctionBegin; 4978 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4979 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4980 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4981 if (!row){ 4982 start = A->rmap->rstart; end = A->rmap->rend; 4983 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4984 } else { 4985 isrowa = *row; 4986 } 4987 if (!col){ 4988 start = A->cmap->rstart; 4989 cmap = a->garray; 4990 nzA = a->A->cmap->n; 4991 nzB = a->B->cmap->n; 4992 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4993 ncols = 0; 4994 for (i=0; i<nzB; i++) { 4995 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4996 else break; 4997 } 4998 imark = i; 4999 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5000 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5001 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5002 } else { 5003 iscola = *col; 5004 } 5005 if (scall != MAT_INITIAL_MATRIX){ 5006 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5007 aloc[0] = *A_loc; 5008 } 5009 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5010 *A_loc = aloc[0]; 5011 ierr = PetscFree(aloc);CHKERRQ(ierr); 5012 if (!row){ 5013 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5014 } 5015 if (!col){ 5016 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5017 } 5018 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatGetBrowsOfAcols" 5024 /*@C 5025 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5026 5027 Collective on Mat 5028 5029 Input Parameters: 5030 + A,B - the matrices in mpiaij format 5031 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5033 5034 Output Parameter: 5035 + rowb, colb - index sets of rows and columns of B to extract 5036 - B_seq - the sequential matrix generated 5037 5038 Level: developer 5039 5040 @*/ 5041 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5042 { 5043 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5044 PetscErrorCode ierr; 5045 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5046 IS isrowb,iscolb; 5047 Mat *bseq=PETSC_NULL; 5048 5049 PetscFunctionBegin; 5050 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5051 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5052 } 5053 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5054 5055 if (scall == MAT_INITIAL_MATRIX){ 5056 start = A->cmap->rstart; 5057 cmap = a->garray; 5058 nzA = a->A->cmap->n; 5059 nzB = a->B->cmap->n; 5060 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5061 ncols = 0; 5062 for (i=0; i<nzB; i++) { /* row < local row index */ 5063 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5064 else break; 5065 } 5066 imark = i; 5067 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5068 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5069 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5070 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5071 } else { 5072 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5073 isrowb = *rowb; iscolb = *colb; 5074 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5075 bseq[0] = *B_seq; 5076 } 5077 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5078 *B_seq = bseq[0]; 5079 ierr = PetscFree(bseq);CHKERRQ(ierr); 5080 if (!rowb){ 5081 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5082 } else { 5083 *rowb = isrowb; 5084 } 5085 if (!colb){ 5086 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5087 } else { 5088 *colb = iscolb; 5089 } 5090 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 5094 #undef __FUNCT__ 5095 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5096 /* 5097 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5098 of the OFF-DIAGONAL portion of local A 5099 5100 Collective on Mat 5101 5102 Input Parameters: 5103 + A,B - the matrices in mpiaij format 5104 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5105 5106 Output Parameter: 5107 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5108 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5109 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5110 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5111 5112 Level: developer 5113 5114 */ 5115 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5116 { 5117 VecScatter_MPI_General *gen_to,*gen_from; 5118 PetscErrorCode ierr; 5119 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5120 Mat_SeqAIJ *b_oth; 5121 VecScatter ctx=a->Mvctx; 5122 MPI_Comm comm=((PetscObject)ctx)->comm; 5123 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5124 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5125 PetscScalar *rvalues,*svalues; 5126 MatScalar *b_otha,*bufa,*bufA; 5127 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5128 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5129 MPI_Status *sstatus,rstatus; 5130 PetscMPIInt jj; 5131 PetscInt *cols,sbs,rbs; 5132 PetscScalar *vals; 5133 5134 PetscFunctionBegin; 5135 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5136 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5137 } 5138 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5139 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5140 5141 gen_to = (VecScatter_MPI_General*)ctx->todata; 5142 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5143 rvalues = gen_from->values; /* holds the length of receiving row */ 5144 svalues = gen_to->values; /* holds the length of sending row */ 5145 nrecvs = gen_from->n; 5146 nsends = gen_to->n; 5147 5148 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5149 srow = gen_to->indices; /* local row index to be sent */ 5150 sstarts = gen_to->starts; 5151 sprocs = gen_to->procs; 5152 sstatus = gen_to->sstatus; 5153 sbs = gen_to->bs; 5154 rstarts = gen_from->starts; 5155 rprocs = gen_from->procs; 5156 rbs = gen_from->bs; 5157 5158 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5159 if (scall == MAT_INITIAL_MATRIX){ 5160 /* i-array */ 5161 /*---------*/ 5162 /* post receives */ 5163 for (i=0; i<nrecvs; i++){ 5164 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5165 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5166 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5167 } 5168 5169 /* pack the outgoing message */ 5170 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5171 sstartsj[0] = 0; rstartsj[0] = 0; 5172 len = 0; /* total length of j or a array to be sent */ 5173 k = 0; 5174 for (i=0; i<nsends; i++){ 5175 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5176 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5177 for (j=0; j<nrows; j++) { 5178 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5179 for (l=0; l<sbs; l++){ 5180 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5181 rowlen[j*sbs+l] = ncols; 5182 len += ncols; 5183 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5184 } 5185 k++; 5186 } 5187 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5188 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5189 } 5190 /* recvs and sends of i-array are completed */ 5191 i = nrecvs; 5192 while (i--) { 5193 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5194 } 5195 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5196 5197 /* allocate buffers for sending j and a arrays */ 5198 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5199 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5200 5201 /* create i-array of B_oth */ 5202 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5203 b_othi[0] = 0; 5204 len = 0; /* total length of j or a array to be received */ 5205 k = 0; 5206 for (i=0; i<nrecvs; i++){ 5207 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5208 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5209 for (j=0; j<nrows; j++) { 5210 b_othi[k+1] = b_othi[k] + rowlen[j]; 5211 len += rowlen[j]; k++; 5212 } 5213 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5214 } 5215 5216 /* allocate space for j and a arrrays of B_oth */ 5217 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5218 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5219 5220 /* j-array */ 5221 /*---------*/ 5222 /* post receives of j-array */ 5223 for (i=0; i<nrecvs; i++){ 5224 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5225 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5226 } 5227 5228 /* pack the outgoing message j-array */ 5229 k = 0; 5230 for (i=0; i<nsends; i++){ 5231 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5232 bufJ = bufj+sstartsj[i]; 5233 for (j=0; j<nrows; j++) { 5234 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5235 for (ll=0; ll<sbs; ll++){ 5236 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5237 for (l=0; l<ncols; l++){ 5238 *bufJ++ = cols[l]; 5239 } 5240 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5241 } 5242 } 5243 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5244 } 5245 5246 /* recvs and sends of j-array are completed */ 5247 i = nrecvs; 5248 while (i--) { 5249 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5250 } 5251 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5252 } else if (scall == MAT_REUSE_MATRIX){ 5253 sstartsj = *startsj_s; 5254 rstartsj = *startsj_r; 5255 bufa = *bufa_ptr; 5256 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5257 b_otha = b_oth->a; 5258 } else { 5259 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5260 } 5261 5262 /* a-array */ 5263 /*---------*/ 5264 /* post receives of a-array */ 5265 for (i=0; i<nrecvs; i++){ 5266 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5267 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5268 } 5269 5270 /* pack the outgoing message a-array */ 5271 k = 0; 5272 for (i=0; i<nsends; i++){ 5273 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5274 bufA = bufa+sstartsj[i]; 5275 for (j=0; j<nrows; j++) { 5276 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5277 for (ll=0; ll<sbs; ll++){ 5278 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5279 for (l=0; l<ncols; l++){ 5280 *bufA++ = vals[l]; 5281 } 5282 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5283 } 5284 } 5285 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5286 } 5287 /* recvs and sends of a-array are completed */ 5288 i = nrecvs; 5289 while (i--) { 5290 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5291 } 5292 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5293 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5294 5295 if (scall == MAT_INITIAL_MATRIX){ 5296 /* put together the new matrix */ 5297 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5298 5299 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5300 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5301 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5302 b_oth->free_a = PETSC_TRUE; 5303 b_oth->free_ij = PETSC_TRUE; 5304 b_oth->nonew = 0; 5305 5306 ierr = PetscFree(bufj);CHKERRQ(ierr); 5307 if (!startsj_s || !bufa_ptr){ 5308 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5309 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5310 } else { 5311 *startsj_s = sstartsj; 5312 *startsj_r = rstartsj; 5313 *bufa_ptr = bufa; 5314 } 5315 } 5316 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5317 PetscFunctionReturn(0); 5318 } 5319 5320 #undef __FUNCT__ 5321 #define __FUNCT__ "MatGetCommunicationStructs" 5322 /*@C 5323 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5324 5325 Not Collective 5326 5327 Input Parameters: 5328 . A - The matrix in mpiaij format 5329 5330 Output Parameter: 5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5332 . colmap - A map from global column index to local index into lvec 5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5334 5335 Level: developer 5336 5337 @*/ 5338 #if defined (PETSC_USE_CTABLE) 5339 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5340 #else 5341 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5342 #endif 5343 { 5344 Mat_MPIAIJ *a; 5345 5346 PetscFunctionBegin; 5347 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5348 PetscValidPointer(lvec, 2); 5349 PetscValidPointer(colmap, 3); 5350 PetscValidPointer(multScatter, 4); 5351 a = (Mat_MPIAIJ *) A->data; 5352 if (lvec) *lvec = a->lvec; 5353 if (colmap) *colmap = a->colmap; 5354 if (multScatter) *multScatter = a->Mvctx; 5355 PetscFunctionReturn(0); 5356 } 5357 5358 EXTERN_C_BEGIN 5359 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5360 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5361 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5362 EXTERN_C_END 5363 5364 #undef __FUNCT__ 5365 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5366 /* 5367 Computes (B'*A')' since computing B*A directly is untenable 5368 5369 n p p 5370 ( ) ( ) ( ) 5371 m ( A ) * n ( B ) = m ( C ) 5372 ( ) ( ) ( ) 5373 5374 */ 5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5376 { 5377 PetscErrorCode ierr; 5378 Mat At,Bt,Ct; 5379 5380 PetscFunctionBegin; 5381 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5382 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5383 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5384 ierr = MatDestroy(&At);CHKERRQ(ierr); 5385 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5386 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5387 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5388 PetscFunctionReturn(0); 5389 } 5390 5391 #undef __FUNCT__ 5392 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5393 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5394 { 5395 PetscErrorCode ierr; 5396 PetscInt m=A->rmap->n,n=B->cmap->n; 5397 Mat Cmat; 5398 5399 PetscFunctionBegin; 5400 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5401 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5402 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5403 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5404 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5405 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5406 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5407 *C = Cmat; 5408 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5409 PetscFunctionReturn(0); 5410 } 5411 5412 /* ----------------------------------------------------------------*/ 5413 #undef __FUNCT__ 5414 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5415 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5416 { 5417 PetscErrorCode ierr; 5418 5419 PetscFunctionBegin; 5420 if (scall == MAT_INITIAL_MATRIX){ 5421 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5422 } 5423 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5424 PetscFunctionReturn(0); 5425 } 5426 5427 EXTERN_C_BEGIN 5428 #if defined(PETSC_HAVE_MUMPS) 5429 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5430 #endif 5431 #if defined(PETSC_HAVE_PASTIX) 5432 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5433 #endif 5434 #if defined(PETSC_HAVE_SUPERLU_DIST) 5435 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5436 #endif 5437 #if defined(PETSC_HAVE_SPOOLES) 5438 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5439 #endif 5440 EXTERN_C_END 5441 5442 /*MC 5443 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5444 5445 Options Database Keys: 5446 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5447 5448 Level: beginner 5449 5450 .seealso: MatCreateAIJ() 5451 M*/ 5452 5453 EXTERN_C_BEGIN 5454 #undef __FUNCT__ 5455 #define __FUNCT__ "MatCreate_MPIAIJ" 5456 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5457 { 5458 Mat_MPIAIJ *b; 5459 PetscErrorCode ierr; 5460 PetscMPIInt size; 5461 5462 PetscFunctionBegin; 5463 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5464 5465 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5466 B->data = (void*)b; 5467 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5468 B->assembled = PETSC_FALSE; 5469 5470 B->insertmode = NOT_SET_VALUES; 5471 b->size = size; 5472 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5473 5474 /* build cache for off array entries formed */ 5475 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5476 b->donotstash = PETSC_FALSE; 5477 b->colmap = 0; 5478 b->garray = 0; 5479 b->roworiented = PETSC_TRUE; 5480 5481 /* stuff used for matrix vector multiply */ 5482 b->lvec = PETSC_NULL; 5483 b->Mvctx = PETSC_NULL; 5484 5485 /* stuff for MatGetRow() */ 5486 b->rowindices = 0; 5487 b->rowvalues = 0; 5488 b->getrowactive = PETSC_FALSE; 5489 5490 #if defined(PETSC_HAVE_SPOOLES) 5491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5492 "MatGetFactor_mpiaij_spooles", 5493 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5494 #endif 5495 #if defined(PETSC_HAVE_MUMPS) 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5497 "MatGetFactor_aij_mumps", 5498 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5499 #endif 5500 #if defined(PETSC_HAVE_PASTIX) 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5502 "MatGetFactor_mpiaij_pastix", 5503 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5504 #endif 5505 #if defined(PETSC_HAVE_SUPERLU_DIST) 5506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5507 "MatGetFactor_mpiaij_superlu_dist", 5508 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5509 #endif 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5511 "MatStoreValues_MPIAIJ", 5512 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5514 "MatRetrieveValues_MPIAIJ", 5515 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5517 "MatGetDiagonalBlock_MPIAIJ", 5518 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5520 "MatIsTranspose_MPIAIJ", 5521 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5523 "MatMPIAIJSetPreallocation_MPIAIJ", 5524 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5526 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5527 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5528 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5529 "MatDiagonalScaleLocal_MPIAIJ", 5530 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5532 "MatConvert_MPIAIJ_MPIAIJPERM", 5533 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5535 "MatConvert_MPIAIJ_MPIAIJCRL", 5536 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5538 "MatConvert_MPIAIJ_MPISBAIJ", 5539 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5540 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5541 "MatMatMult_MPIDense_MPIAIJ", 5542 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5544 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5545 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5546 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5547 "MatMatMultNumeric_MPIDense_MPIAIJ", 5548 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5549 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 EXTERN_C_END 5553 5554 #undef __FUNCT__ 5555 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5556 /*@ 5557 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5558 and "off-diagonal" part of the matrix in CSR format. 5559 5560 Collective on MPI_Comm 5561 5562 Input Parameters: 5563 + comm - MPI communicator 5564 . m - number of local rows (Cannot be PETSC_DECIDE) 5565 . n - This value should be the same as the local size used in creating the 5566 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5567 calculated if N is given) For square matrices n is almost always m. 5568 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5569 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5570 . i - row indices for "diagonal" portion of matrix 5571 . j - column indices 5572 . a - matrix values 5573 . oi - row indices for "off-diagonal" portion of matrix 5574 . oj - column indices 5575 - oa - matrix values 5576 5577 Output Parameter: 5578 . mat - the matrix 5579 5580 Level: advanced 5581 5582 Notes: 5583 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5584 must free the arrays once the matrix has been destroyed and not before. 5585 5586 The i and j indices are 0 based 5587 5588 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5589 5590 This sets local rows and cannot be used to set off-processor values. 5591 5592 You cannot later use MatSetValues() to change values in this matrix. 5593 5594 .keywords: matrix, aij, compressed row, sparse, parallel 5595 5596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5597 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5598 @*/ 5599 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5600 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5601 { 5602 PetscErrorCode ierr; 5603 Mat_MPIAIJ *maij; 5604 5605 PetscFunctionBegin; 5606 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5607 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5608 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5609 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5610 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5611 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5612 maij = (Mat_MPIAIJ*) (*mat)->data; 5613 maij->donotstash = PETSC_TRUE; 5614 (*mat)->preallocated = PETSC_TRUE; 5615 5616 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5617 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5618 5619 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5620 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5621 5622 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5623 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5624 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 5627 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5629 PetscFunctionReturn(0); 5630 } 5631 5632 /* 5633 Special version for direct calls from Fortran 5634 */ 5635 #include <petsc-private/fortranimpl.h> 5636 5637 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5638 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5639 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5640 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5641 #endif 5642 5643 /* Change these macros so can be used in void function */ 5644 #undef CHKERRQ 5645 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5646 #undef SETERRQ2 5647 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5648 #undef SETERRQ3 5649 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5650 #undef SETERRQ 5651 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5652 5653 EXTERN_C_BEGIN 5654 #undef __FUNCT__ 5655 #define __FUNCT__ "matsetvaluesmpiaij_" 5656 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5657 { 5658 Mat mat = *mmat; 5659 PetscInt m = *mm, n = *mn; 5660 InsertMode addv = *maddv; 5661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5662 PetscScalar value; 5663 PetscErrorCode ierr; 5664 5665 MatCheckPreallocated(mat,1); 5666 if (mat->insertmode == NOT_SET_VALUES) { 5667 mat->insertmode = addv; 5668 } 5669 #if defined(PETSC_USE_DEBUG) 5670 else if (mat->insertmode != addv) { 5671 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5672 } 5673 #endif 5674 { 5675 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5676 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5677 PetscBool roworiented = aij->roworiented; 5678 5679 /* Some Variables required in the macro */ 5680 Mat A = aij->A; 5681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5682 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5683 MatScalar *aa = a->a; 5684 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5685 Mat B = aij->B; 5686 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5687 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5688 MatScalar *ba = b->a; 5689 5690 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5691 PetscInt nonew = a->nonew; 5692 MatScalar *ap1,*ap2; 5693 5694 PetscFunctionBegin; 5695 for (i=0; i<m; i++) { 5696 if (im[i] < 0) continue; 5697 #if defined(PETSC_USE_DEBUG) 5698 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5699 #endif 5700 if (im[i] >= rstart && im[i] < rend) { 5701 row = im[i] - rstart; 5702 lastcol1 = -1; 5703 rp1 = aj + ai[row]; 5704 ap1 = aa + ai[row]; 5705 rmax1 = aimax[row]; 5706 nrow1 = ailen[row]; 5707 low1 = 0; 5708 high1 = nrow1; 5709 lastcol2 = -1; 5710 rp2 = bj + bi[row]; 5711 ap2 = ba + bi[row]; 5712 rmax2 = bimax[row]; 5713 nrow2 = bilen[row]; 5714 low2 = 0; 5715 high2 = nrow2; 5716 5717 for (j=0; j<n; j++) { 5718 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5719 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5720 if (in[j] >= cstart && in[j] < cend){ 5721 col = in[j] - cstart; 5722 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5723 } else if (in[j] < 0) continue; 5724 #if defined(PETSC_USE_DEBUG) 5725 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5726 #endif 5727 else { 5728 if (mat->was_assembled) { 5729 if (!aij->colmap) { 5730 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5731 } 5732 #if defined (PETSC_USE_CTABLE) 5733 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5734 col--; 5735 #else 5736 col = aij->colmap[in[j]] - 1; 5737 #endif 5738 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5739 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5740 col = in[j]; 5741 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5742 B = aij->B; 5743 b = (Mat_SeqAIJ*)B->data; 5744 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5745 rp2 = bj + bi[row]; 5746 ap2 = ba + bi[row]; 5747 rmax2 = bimax[row]; 5748 nrow2 = bilen[row]; 5749 low2 = 0; 5750 high2 = nrow2; 5751 bm = aij->B->rmap->n; 5752 ba = b->a; 5753 } 5754 } else col = in[j]; 5755 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5756 } 5757 } 5758 } else { 5759 if (!aij->donotstash) { 5760 if (roworiented) { 5761 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5762 } else { 5763 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5764 } 5765 } 5766 } 5767 }} 5768 PetscFunctionReturnVoid(); 5769 } 5770 EXTERN_C_END 5771 5772