1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 334 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 342 #if defined (PETSC_USE_CTABLE) 343 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 344 for (i=0; i<n; i++){ 345 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 346 } 347 #else 348 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 349 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 352 #endif 353 PetscFunctionReturn(0); 354 } 355 356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 357 { \ 358 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 359 lastcol1 = col;\ 360 while (high1-low1 > 5) { \ 361 t = (low1+high1)/2; \ 362 if (rp1[t] > col) high1 = t; \ 363 else low1 = t; \ 364 } \ 365 for (_i=low1; _i<high1; _i++) { \ 366 if (rp1[_i] > col) break; \ 367 if (rp1[_i] == col) { \ 368 if (addv == ADD_VALUES) ap1[_i] += value; \ 369 else ap1[_i] = value; \ 370 goto a_noinsert; \ 371 } \ 372 } \ 373 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 374 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 375 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 376 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 377 N = nrow1++ - 1; a->nz++; high1++; \ 378 /* shift up all the later entries in this row */ \ 379 for (ii=N; ii>=_i; ii--) { \ 380 rp1[ii+1] = rp1[ii]; \ 381 ap1[ii+1] = ap1[ii]; \ 382 } \ 383 rp1[_i] = col; \ 384 ap1[_i] = value; \ 385 a_noinsert: ; \ 386 ailen[row] = nrow1; \ 387 } 388 389 390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 391 { \ 392 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 393 lastcol2 = col;\ 394 while (high2-low2 > 5) { \ 395 t = (low2+high2)/2; \ 396 if (rp2[t] > col) high2 = t; \ 397 else low2 = t; \ 398 } \ 399 for (_i=low2; _i<high2; _i++) { \ 400 if (rp2[_i] > col) break; \ 401 if (rp2[_i] == col) { \ 402 if (addv == ADD_VALUES) ap2[_i] += value; \ 403 else ap2[_i] = value; \ 404 goto b_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 410 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 411 N = nrow2++ - 1; b->nz++; high2++; \ 412 /* shift up all the later entries in this row */ \ 413 for (ii=N; ii>=_i; ii--) { \ 414 rp2[ii+1] = rp2[ii]; \ 415 ap2[ii+1] = ap2[ii]; \ 416 } \ 417 rp2[_i] = col; \ 418 ap2[_i] = value; \ 419 b_noinsert: ; \ 420 bilen[row] = nrow2; \ 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 426 { 427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 429 PetscErrorCode ierr; 430 PetscInt l,*garray = mat->garray,diag; 431 432 PetscFunctionBegin; 433 /* code only works for square matrices A */ 434 435 /* find size of row to the left of the diagonal part */ 436 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 437 row = row - diag; 438 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 439 if (garray[b->j[b->i[row]+l]] > diag) break; 440 } 441 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 442 443 /* diagonal part */ 444 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 445 446 /* right of diagonal part */ 447 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValues_MPIAIJ" 453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscScalar value; 457 PetscErrorCode ierr; 458 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 459 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 460 PetscBool roworiented = aij->roworiented; 461 462 /* Some Variables required in the macro */ 463 Mat A = aij->A; 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 465 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 466 MatScalar *aa = a->a; 467 PetscBool ignorezeroentries = a->ignorezeroentries; 468 Mat B = aij->B; 469 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 470 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 471 MatScalar *ba = b->a; 472 473 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 474 PetscInt nonew; 475 MatScalar *ap1,*ap2; 476 477 PetscFunctionBegin; 478 if (v) PetscValidScalarPointer(v,6); 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 lastcol1 = -1; 487 rp1 = aj + ai[row]; 488 ap1 = aa + ai[row]; 489 rmax1 = aimax[row]; 490 nrow1 = ailen[row]; 491 low1 = 0; 492 high1 = nrow1; 493 lastcol2 = -1; 494 rp2 = bj + bi[row]; 495 ap2 = ba + bi[row]; 496 rmax2 = bimax[row]; 497 nrow2 = bilen[row]; 498 low2 = 0; 499 high2 = nrow2; 500 501 for (j=0; j<n; j++) { 502 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 503 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 504 if (in[j] >= cstart && in[j] < cend){ 505 col = in[j] - cstart; 506 nonew = a->nonew; 507 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 508 } else if (in[j] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 511 #endif 512 else { 513 if (mat->was_assembled) { 514 if (!aij->colmap) { 515 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 516 } 517 #if defined (PETSC_USE_CTABLE) 518 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 519 col--; 520 #else 521 col = aij->colmap[in[j]] - 1; 522 #endif 523 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 524 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 col = in[j]; 526 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 527 B = aij->B; 528 b = (Mat_SeqAIJ*)B->data; 529 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 530 rp2 = bj + bi[row]; 531 ap2 = ba + bi[row]; 532 rmax2 = bimax[row]; 533 nrow2 = bilen[row]; 534 low2 = 0; 535 high2 = nrow2; 536 bm = aij->B->rmap->n; 537 ba = b->a; 538 } 539 } else col = in[j]; 540 nonew = b->nonew; 541 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 542 } 543 } 544 } else { 545 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 546 if (!aij->donotstash) { 547 mat->assembled = PETSC_FALSE; 548 if (roworiented) { 549 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatGetValues_MPIAIJ" 561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 562 { 563 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 564 PetscErrorCode ierr; 565 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 566 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 567 568 PetscFunctionBegin; 569 for (i=0; i<m; i++) { 570 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 571 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 572 if (idxm[i] >= rstart && idxm[i] < rend) { 573 row = idxm[i] - rstart; 574 for (j=0; j<n; j++) { 575 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 576 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 577 if (idxn[j] >= cstart && idxn[j] < cend){ 578 col = idxn[j] - cstart; 579 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 580 } else { 581 if (!aij->colmap) { 582 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 583 } 584 #if defined (PETSC_USE_CTABLE) 585 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 586 col --; 587 #else 588 col = aij->colmap[idxn[j]] - 1; 589 #endif 590 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 591 else { 592 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 593 } 594 } 595 } 596 } else { 597 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 598 } 599 } 600 PetscFunctionReturn(0); 601 } 602 603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 608 { 609 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 610 PetscErrorCode ierr; 611 PetscInt nstash,reallocs; 612 InsertMode addv; 613 614 PetscFunctionBegin; 615 if (aij->donotstash || mat->nooffprocentries) { 616 PetscFunctionReturn(0); 617 } 618 619 /* make sure all processors are either in INSERTMODE or ADDMODE */ 620 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 621 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 622 mat->insertmode = addv; /* in case this processor had no cache */ 623 624 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 625 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 626 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 636 PetscErrorCode ierr; 637 PetscMPIInt n; 638 PetscInt i,j,rstart,ncols,flg; 639 PetscInt *row,*col; 640 PetscBool other_disassembled; 641 PetscScalar *val; 642 InsertMode addv = mat->insertmode; 643 644 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 645 PetscFunctionBegin; 646 if (!aij->donotstash && !mat->nooffprocentries) { 647 while (1) { 648 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 649 if (!flg) break; 650 651 for (i=0; i<n;) { 652 /* Now identify the consecutive vals belonging to the same row */ 653 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 654 if (j < n) ncols = j-i; 655 else ncols = n-i; 656 /* Now assemble all these values with a single function call */ 657 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 658 i = j; 659 } 660 } 661 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 662 } 663 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 664 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 665 666 /* determine if any processor has disassembled, if so we must 667 also disassemble ourselfs, in order that we may reassemble. */ 668 /* 669 if nonzero structure of submatrix B cannot change then we know that 670 no processor disassembled thus we can skip this stuff 671 */ 672 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 673 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 674 if (mat->was_assembled && !other_disassembled) { 675 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 } 678 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 679 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 680 } 681 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 682 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 684 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 685 686 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 687 aij->rowvalues = 0; 688 689 /* used by MatAXPY() */ 690 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 691 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 692 693 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 694 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 701 { 702 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 703 PetscErrorCode ierr; 704 705 PetscFunctionBegin; 706 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 707 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatZeroRows_MPIAIJ" 713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 714 { 715 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 718 PetscInt i,*owners = A->rmap->range; 719 PetscInt *nprocs,j,idx,nsends,row; 720 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 721 PetscInt *rvalues,count,base,slen,*source; 722 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 723 MPI_Comm comm = ((PetscObject)A)->comm; 724 MPI_Request *send_waits,*recv_waits; 725 MPI_Status recv_status,*send_status; 726 const PetscScalar *xx; 727 PetscScalar *bb; 728 #if defined(PETSC_DEBUG) 729 PetscBool found = PETSC_FALSE; 730 #endif 731 732 PetscFunctionBegin; 733 /* first count number of contributors to each processor */ 734 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 735 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 736 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 737 j = 0; 738 for (i=0; i<N; i++) { 739 if (lastidx > (idx = rows[i])) j = 0; 740 lastidx = idx; 741 for (; j<size; j++) { 742 if (idx >= owners[j] && idx < owners[j+1]) { 743 nprocs[2*j]++; 744 nprocs[2*j+1] = 1; 745 owner[i] = j; 746 #if defined(PETSC_DEBUG) 747 found = PETSC_TRUE; 748 #endif 749 break; 750 } 751 } 752 #if defined(PETSC_DEBUG) 753 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 754 found = PETSC_FALSE; 755 #endif 756 } 757 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 758 759 if (A->nooffproczerorows) { 760 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 761 nrecvs = nsends; 762 nmax = N; 763 } else { 764 /* inform other processors of number of messages and max length*/ 765 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 766 } 767 768 /* post receives: */ 769 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 770 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 771 for (i=0; i<nrecvs; i++) { 772 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 773 } 774 775 /* do sends: 776 1) starts[i] gives the starting index in svalues for stuff going to 777 the ith processor 778 */ 779 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 780 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 781 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 782 starts[0] = 0; 783 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 784 for (i=0; i<N; i++) { 785 svalues[starts[owner[i]]++] = rows[i]; 786 } 787 788 starts[0] = 0; 789 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 790 count = 0; 791 for (i=0; i<size; i++) { 792 if (nprocs[2*i+1]) { 793 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 794 } 795 } 796 ierr = PetscFree(starts);CHKERRQ(ierr); 797 798 base = owners[rank]; 799 800 /* wait on receives */ 801 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 802 count = nrecvs; slen = 0; 803 while (count) { 804 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 805 /* unpack receives into our local space */ 806 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 807 source[imdex] = recv_status.MPI_SOURCE; 808 lens[imdex] = n; 809 slen += n; 810 count--; 811 } 812 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 813 814 /* move the data into the send scatter */ 815 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 816 count = 0; 817 for (i=0; i<nrecvs; i++) { 818 values = rvalues + i*nmax; 819 for (j=0; j<lens[i]; j++) { 820 lrows[count++] = values[j] - base; 821 } 822 } 823 ierr = PetscFree(rvalues);CHKERRQ(ierr); 824 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 825 ierr = PetscFree(owner);CHKERRQ(ierr); 826 ierr = PetscFree(nprocs);CHKERRQ(ierr); 827 828 /* fix right hand side if needed */ 829 if (x && b) { 830 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 831 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 832 for (i=0; i<slen; i++) { 833 bb[lrows[i]] = diag*xx[lrows[i]]; 834 } 835 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 836 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 837 } 838 /* 839 Zero the required rows. If the "diagonal block" of the matrix 840 is square and the user wishes to set the diagonal we use separate 841 code so that MatSetValues() is not called for each diagonal allocating 842 new memory, thus calling lots of mallocs and slowing things down. 843 844 */ 845 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 846 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 848 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 849 } else if (diag != 0.0) { 850 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 851 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 852 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 854 } 855 for (i = 0; i < slen; i++) { 856 row = lrows[i] + rstart; 857 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 858 } 859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 } else { 862 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(lrows);CHKERRQ(ierr); 865 866 /* wait on sends */ 867 if (nsends) { 868 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 869 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 870 ierr = PetscFree(send_status);CHKERRQ(ierr); 871 } 872 ierr = PetscFree(send_waits);CHKERRQ(ierr); 873 ierr = PetscFree(svalues);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 880 { 881 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 882 PetscErrorCode ierr; 883 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 884 PetscInt i,*owners = A->rmap->range; 885 PetscInt *nprocs,j,idx,nsends; 886 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 887 PetscInt *rvalues,count,base,slen,*source; 888 PetscInt *lens,*lrows,*values,m; 889 MPI_Comm comm = ((PetscObject)A)->comm; 890 MPI_Request *send_waits,*recv_waits; 891 MPI_Status recv_status,*send_status; 892 const PetscScalar *xx; 893 PetscScalar *bb,*mask; 894 Vec xmask,lmask; 895 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 896 const PetscInt *aj, *ii,*ridx; 897 PetscScalar *aa; 898 #if defined(PETSC_DEBUG) 899 PetscBool found = PETSC_FALSE; 900 #endif 901 902 PetscFunctionBegin; 903 /* first count number of contributors to each processor */ 904 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 905 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 906 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 907 j = 0; 908 for (i=0; i<N; i++) { 909 if (lastidx > (idx = rows[i])) j = 0; 910 lastidx = idx; 911 for (; j<size; j++) { 912 if (idx >= owners[j] && idx < owners[j+1]) { 913 nprocs[2*j]++; 914 nprocs[2*j+1] = 1; 915 owner[i] = j; 916 #if defined(PETSC_DEBUG) 917 found = PETSC_TRUE; 918 #endif 919 break; 920 } 921 } 922 #if defined(PETSC_DEBUG) 923 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 924 found = PETSC_FALSE; 925 #endif 926 } 927 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 928 929 /* inform other processors of number of messages and max length*/ 930 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 931 932 /* post receives: */ 933 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 934 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 935 for (i=0; i<nrecvs; i++) { 936 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 937 } 938 939 /* do sends: 940 1) starts[i] gives the starting index in svalues for stuff going to 941 the ith processor 942 */ 943 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 944 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 945 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 946 starts[0] = 0; 947 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 948 for (i=0; i<N; i++) { 949 svalues[starts[owner[i]]++] = rows[i]; 950 } 951 952 starts[0] = 0; 953 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 954 count = 0; 955 for (i=0; i<size; i++) { 956 if (nprocs[2*i+1]) { 957 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 958 } 959 } 960 ierr = PetscFree(starts);CHKERRQ(ierr); 961 962 base = owners[rank]; 963 964 /* wait on receives */ 965 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 966 count = nrecvs; slen = 0; 967 while (count) { 968 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 969 /* unpack receives into our local space */ 970 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 971 source[imdex] = recv_status.MPI_SOURCE; 972 lens[imdex] = n; 973 slen += n; 974 count--; 975 } 976 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 977 978 /* move the data into the send scatter */ 979 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 980 count = 0; 981 for (i=0; i<nrecvs; i++) { 982 values = rvalues + i*nmax; 983 for (j=0; j<lens[i]; j++) { 984 lrows[count++] = values[j] - base; 985 } 986 } 987 ierr = PetscFree(rvalues);CHKERRQ(ierr); 988 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 989 ierr = PetscFree(owner);CHKERRQ(ierr); 990 ierr = PetscFree(nprocs);CHKERRQ(ierr); 991 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 992 993 /* zero diagonal part of matrix */ 994 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 995 996 /* handle off diagonal part of matrix */ 997 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 998 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 999 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1000 for (i=0; i<slen; i++) { 1001 bb[lrows[i]] = 1; 1002 } 1003 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1007 if (x) { 1008 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1012 } 1013 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1014 1015 /* remove zeroed rows of off diagonal matrix */ 1016 ii = aij->i; 1017 for (i=0; i<slen; i++) { 1018 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1019 } 1020 1021 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1022 if (aij->compressedrow.use){ 1023 m = aij->compressedrow.nrows; 1024 ii = aij->compressedrow.i; 1025 ridx = aij->compressedrow.rindex; 1026 for (i=0; i<m; i++){ 1027 n = ii[i+1] - ii[i]; 1028 aj = aij->j + ii[i]; 1029 aa = aij->a + ii[i]; 1030 1031 for (j=0; j<n; j++) { 1032 if (PetscAbsScalar(mask[*aj])) { 1033 if (b) bb[*ridx] -= *aa*xx[*aj]; 1034 *aa = 0.0; 1035 } 1036 aa++; 1037 aj++; 1038 } 1039 ridx++; 1040 } 1041 } else { /* do not use compressed row format */ 1042 m = l->B->rmap->n; 1043 for (i=0; i<m; i++) { 1044 n = ii[i+1] - ii[i]; 1045 aj = aij->j + ii[i]; 1046 aa = aij->a + ii[i]; 1047 for (j=0; j<n; j++) { 1048 if (PetscAbsScalar(mask[*aj])) { 1049 if (b) bb[i] -= *aa*xx[*aj]; 1050 *aa = 0.0; 1051 } 1052 aa++; 1053 aj++; 1054 } 1055 } 1056 } 1057 if (x) { 1058 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1059 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1060 } 1061 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1062 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1063 ierr = PetscFree(lrows);CHKERRQ(ierr); 1064 1065 /* wait on sends */ 1066 if (nsends) { 1067 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1068 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1069 ierr = PetscFree(send_status);CHKERRQ(ierr); 1070 } 1071 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1072 ierr = PetscFree(svalues);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMult_MPIAIJ" 1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt nt; 1084 1085 PetscFunctionBegin; 1086 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1087 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1088 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscBool merged; 1129 1130 PetscFunctionBegin; 1131 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1132 /* do nondiagonal part */ 1133 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1134 if (!merged) { 1135 /* send it on its way */ 1136 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 /* do local part */ 1138 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1139 /* receive remote parts: note this assumes the values are not actually */ 1140 /* added in yy until the next line, */ 1141 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 } else { 1143 /* do local part */ 1144 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1145 /* send it on its way */ 1146 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 /* values actually were received in the Begin() but we need to call this nop */ 1148 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1149 } 1150 PetscFunctionReturn(0); 1151 } 1152 1153 EXTERN_C_BEGIN 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1157 { 1158 MPI_Comm comm; 1159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1160 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1161 IS Me,Notme; 1162 PetscErrorCode ierr; 1163 PetscInt M,N,first,last,*notme,i; 1164 PetscMPIInt size; 1165 1166 PetscFunctionBegin; 1167 1168 /* Easy test: symmetric diagonal block */ 1169 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1170 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1171 if (!*f) PetscFunctionReturn(0); 1172 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1174 if (size == 1) PetscFunctionReturn(0); 1175 1176 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1177 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1178 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1179 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1180 for (i=0; i<first; i++) notme[i] = i; 1181 for (i=last; i<M; i++) notme[i-last+first] = i; 1182 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1183 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1184 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1185 Aoff = Aoffs[0]; 1186 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1187 Boff = Boffs[0]; 1188 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1189 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1191 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1193 ierr = PetscFree(notme);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 /* do nondiagonal part */ 1207 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1208 /* send it on its way */ 1209 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 /* do local part */ 1211 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1212 /* receive remote parts */ 1213 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* 1218 This only works correctly for square matrices where the subblock A->A is the 1219 diagonal block 1220 */ 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1224 { 1225 PetscErrorCode ierr; 1226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1227 1228 PetscFunctionBegin; 1229 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1230 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1231 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatScale_MPIAIJ" 1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1238 { 1239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1244 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatDestroy_MPIAIJ" 1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 #if defined(PETSC_USE_LOG) 1257 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1258 #endif 1259 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1263 #if defined (PETSC_USE_CTABLE) 1264 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1267 #endif 1268 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1270 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1271 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1272 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1273 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1274 1275 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1293 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1294 PetscErrorCode ierr; 1295 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1296 int fd; 1297 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1298 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1299 PetscScalar *column_values; 1300 PetscInt message_count,flowcontrolcount; 1301 1302 PetscFunctionBegin; 1303 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1304 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1305 nz = A->nz + B->nz; 1306 if (!rank) { 1307 header[0] = MAT_FILE_CLASSID; 1308 header[1] = mat->rmap->N; 1309 header[2] = mat->cmap->N; 1310 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1311 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1312 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 /* get largest number of rows any processor has */ 1314 rlen = mat->rmap->n; 1315 range = mat->rmap->range; 1316 for (i=1; i<size; i++) { 1317 rlen = PetscMax(rlen,range[i+1] - range[i]); 1318 } 1319 } else { 1320 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1321 rlen = mat->rmap->n; 1322 } 1323 1324 /* load up the local row counts */ 1325 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1326 for (i=0; i<mat->rmap->n; i++) { 1327 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1328 } 1329 1330 /* store the row lengths to the file */ 1331 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1332 if (!rank) { 1333 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 for (i=1; i<size; i++) { 1335 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1336 rlen = range[i+1] - range[i]; 1337 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1338 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1339 } 1340 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1343 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1345 } 1346 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1347 1348 /* load up the local column indices */ 1349 nzmax = nz; /* )th processor needs space a largest processor needs */ 1350 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1351 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1352 cnt = 0; 1353 for (i=0; i<mat->rmap->n; i++) { 1354 for (j=B->i[i]; j<B->i[i+1]; j++) { 1355 if ( (col = garray[B->j[j]]) > cstart) break; 1356 column_indices[cnt++] = col; 1357 } 1358 for (k=A->i[i]; k<A->i[i+1]; k++) { 1359 column_indices[cnt++] = A->j[k] + cstart; 1360 } 1361 for (; j<B->i[i+1]; j++) { 1362 column_indices[cnt++] = garray[B->j[j]]; 1363 } 1364 } 1365 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1366 1367 /* store the column indices to the file */ 1368 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1369 if (!rank) { 1370 MPI_Status status; 1371 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1372 for (i=1; i<size; i++) { 1373 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1374 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1375 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1376 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1378 } 1379 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1380 } else { 1381 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1382 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1385 } 1386 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1387 1388 /* load up the local column values */ 1389 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1390 cnt = 0; 1391 for (i=0; i<mat->rmap->n; i++) { 1392 for (j=B->i[i]; j<B->i[i+1]; j++) { 1393 if ( garray[B->j[j]] > cstart) break; 1394 column_values[cnt++] = B->a[j]; 1395 } 1396 for (k=A->i[i]; k<A->i[i+1]; k++) { 1397 column_values[cnt++] = A->a[k]; 1398 } 1399 for (; j<B->i[i+1]; j++) { 1400 column_values[cnt++] = B->a[j]; 1401 } 1402 } 1403 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1404 1405 /* store the column values to the file */ 1406 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1407 if (!rank) { 1408 MPI_Status status; 1409 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1410 for (i=1; i<size; i++) { 1411 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1412 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1413 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1414 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1415 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1416 } 1417 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1418 } else { 1419 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1420 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1421 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1423 } 1424 ierr = PetscFree(column_values);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1431 { 1432 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1433 PetscErrorCode ierr; 1434 PetscMPIInt rank = aij->rank,size = aij->size; 1435 PetscBool isdraw,iascii,isbinary; 1436 PetscViewer sviewer; 1437 PetscViewerFormat format; 1438 1439 PetscFunctionBegin; 1440 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1441 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1442 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1443 if (iascii) { 1444 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1445 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1446 MatInfo info; 1447 PetscBool inodes; 1448 1449 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1450 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1451 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1452 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1453 if (!inodes) { 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1455 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1456 } else { 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1458 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1459 } 1460 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1462 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1464 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1467 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1470 PetscInt inodecount,inodelimit,*inodes; 1471 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1472 if (inodes) { 1473 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1474 } else { 1475 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1476 } 1477 PetscFunctionReturn(0); 1478 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1479 PetscFunctionReturn(0); 1480 } 1481 } else if (isbinary) { 1482 if (size == 1) { 1483 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1484 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1485 } else { 1486 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } else if (isdraw) { 1490 PetscDraw draw; 1491 PetscBool isnull; 1492 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1493 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1494 } 1495 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 /* assemble the entire matrix onto first processor. */ 1501 Mat A; 1502 Mat_SeqAIJ *Aloc; 1503 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1504 MatScalar *a; 1505 1506 if (mat->rmap->N > 1024) { 1507 PetscBool flg = PETSC_FALSE; 1508 1509 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1510 if (!flg) { 1511 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1512 } 1513 } 1514 1515 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1516 if (!rank) { 1517 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1518 } else { 1519 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1520 } 1521 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1522 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1523 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1524 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1525 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1526 1527 /* copy over the A part */ 1528 Aloc = (Mat_SeqAIJ*)aij->A->data; 1529 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1530 row = mat->rmap->rstart; 1531 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1532 for (i=0; i<m; i++) { 1533 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1534 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1535 } 1536 aj = Aloc->j; 1537 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1538 1539 /* copy over the B part */ 1540 Aloc = (Mat_SeqAIJ*)aij->B->data; 1541 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1544 ct = cols; 1545 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1546 for (i=0; i<m; i++) { 1547 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1548 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1549 } 1550 ierr = PetscFree(ct);CHKERRQ(ierr); 1551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 /* 1554 Everyone has to call to draw the matrix since the graphics waits are 1555 synchronized across all processors that share the PetscDraw object 1556 */ 1557 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1558 if (!rank) { 1559 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1560 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1561 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1562 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1563 } 1564 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1565 ierr = MatDestroy(&A);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatView_MPIAIJ" 1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1573 { 1574 PetscErrorCode ierr; 1575 PetscBool iascii,isdraw,issocket,isbinary; 1576 1577 PetscFunctionBegin; 1578 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1579 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1580 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1581 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1582 if (iascii || isdraw || isbinary || issocket) { 1583 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1584 } else { 1585 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSOR_MPIAIJ" 1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1593 { 1594 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1595 PetscErrorCode ierr; 1596 Vec bb1 = 0; 1597 PetscBool hasop; 1598 1599 PetscFunctionBegin; 1600 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1601 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1602 } 1603 1604 if (flag == SOR_APPLY_UPPER) { 1605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1610 if (flag & SOR_ZERO_INITIAL_GUESS) { 1611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1612 its--; 1613 } 1614 1615 while (its--) { 1616 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 1619 /* update rhs: bb1 = bb - B*x */ 1620 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1621 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1622 1623 /* local sweep */ 1624 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1625 } 1626 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1627 if (flag & SOR_ZERO_INITIAL_GUESS) { 1628 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1629 its--; 1630 } 1631 while (its--) { 1632 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 1635 /* update rhs: bb1 = bb - B*x */ 1636 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1637 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1638 1639 /* local sweep */ 1640 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1641 } 1642 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1643 if (flag & SOR_ZERO_INITIAL_GUESS) { 1644 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1645 its--; 1646 } 1647 while (its--) { 1648 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 1651 /* update rhs: bb1 = bb - B*x */ 1652 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1653 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1654 1655 /* local sweep */ 1656 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1657 } 1658 } else if (flag & SOR_EISENSTAT) { 1659 Vec xx1; 1660 1661 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1662 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1663 1664 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 if (!mat->diag) { 1667 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1668 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1669 } 1670 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1671 if (hasop) { 1672 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1673 } else { 1674 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1675 } 1676 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1677 1678 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1679 1680 /* local sweep */ 1681 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1682 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1683 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1684 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1685 1686 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatPermute_MPIAIJ" 1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1693 { 1694 MPI_Comm comm; 1695 PetscInt first,local_rowsize,local_colsize; 1696 const PetscInt *rows; 1697 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1702 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1703 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1704 /* collect the global row permutation and invert it */ 1705 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1706 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1707 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1708 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1709 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1710 /* get the local target indices */ 1711 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1712 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1713 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1714 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1715 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1716 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1717 /* the column permutation is so much easier; 1718 make a local version of 'colp' and invert it */ 1719 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1720 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1721 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1722 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1723 /* now we just get the submatrix */ 1724 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1725 /* clean up */ 1726 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1727 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1734 { 1735 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1736 Mat A = mat->A,B = mat->B; 1737 PetscErrorCode ierr; 1738 PetscReal isend[5],irecv[5]; 1739 1740 PetscFunctionBegin; 1741 info->block_size = 1.0; 1742 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1743 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1744 isend[3] = info->memory; isend[4] = info->mallocs; 1745 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1746 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1747 isend[3] += info->memory; isend[4] += info->mallocs; 1748 if (flag == MAT_LOCAL) { 1749 info->nz_used = isend[0]; 1750 info->nz_allocated = isend[1]; 1751 info->nz_unneeded = isend[2]; 1752 info->memory = isend[3]; 1753 info->mallocs = isend[4]; 1754 } else if (flag == MAT_GLOBAL_MAX) { 1755 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1756 info->nz_used = irecv[0]; 1757 info->nz_allocated = irecv[1]; 1758 info->nz_unneeded = irecv[2]; 1759 info->memory = irecv[3]; 1760 info->mallocs = irecv[4]; 1761 } else if (flag == MAT_GLOBAL_SUM) { 1762 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1763 info->nz_used = irecv[0]; 1764 info->nz_allocated = irecv[1]; 1765 info->nz_unneeded = irecv[2]; 1766 info->memory = irecv[3]; 1767 info->mallocs = irecv[4]; 1768 } 1769 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1770 info->fill_ratio_needed = 0; 1771 info->factor_mallocs = 0; 1772 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatSetOption_MPIAIJ" 1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1779 { 1780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 switch (op) { 1785 case MAT_NEW_NONZERO_LOCATIONS: 1786 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1787 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1788 case MAT_KEEP_NONZERO_PATTERN: 1789 case MAT_NEW_NONZERO_LOCATION_ERR: 1790 case MAT_USE_INODES: 1791 case MAT_IGNORE_ZERO_ENTRIES: 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_ROW_ORIENTED: 1796 a->roworiented = flg; 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1799 break; 1800 case MAT_NEW_DIAGONALS: 1801 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1802 break; 1803 case MAT_IGNORE_OFF_PROC_ENTRIES: 1804 a->donotstash = flg; 1805 break; 1806 case MAT_SPD: 1807 A->spd_set = PETSC_TRUE; 1808 A->spd = flg; 1809 if (flg) { 1810 A->symmetric = PETSC_TRUE; 1811 A->structurally_symmetric = PETSC_TRUE; 1812 A->symmetric_set = PETSC_TRUE; 1813 A->structurally_symmetric_set = PETSC_TRUE; 1814 } 1815 break; 1816 case MAT_SYMMETRIC: 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_STRUCTURALLY_SYMMETRIC: 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 break; 1822 case MAT_HERMITIAN: 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_SYMMETRY_ETERNAL: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 default: 1829 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1830 } 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatGetRow_MPIAIJ" 1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1837 { 1838 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1839 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1840 PetscErrorCode ierr; 1841 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1842 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1843 PetscInt *cmap,*idx_p; 1844 1845 PetscFunctionBegin; 1846 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1847 mat->getrowactive = PETSC_TRUE; 1848 1849 if (!mat->rowvalues && (idx || v)) { 1850 /* 1851 allocate enough space to hold information from the longest row. 1852 */ 1853 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1854 PetscInt max = 1,tmp; 1855 for (i=0; i<matin->rmap->n; i++) { 1856 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1857 if (max < tmp) { max = tmp; } 1858 } 1859 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1860 } 1861 1862 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1863 lrow = row - rstart; 1864 1865 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1866 if (!v) {pvA = 0; pvB = 0;} 1867 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1868 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1869 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1870 nztot = nzA + nzB; 1871 1872 cmap = mat->garray; 1873 if (v || idx) { 1874 if (nztot) { 1875 /* Sort by increasing column numbers, assuming A and B already sorted */ 1876 PetscInt imark = -1; 1877 if (v) { 1878 *v = v_p = mat->rowvalues; 1879 for (i=0; i<nzB; i++) { 1880 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1881 else break; 1882 } 1883 imark = i; 1884 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1885 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1886 } 1887 if (idx) { 1888 *idx = idx_p = mat->rowindices; 1889 if (imark > -1) { 1890 for (i=0; i<imark; i++) { 1891 idx_p[i] = cmap[cworkB[i]]; 1892 } 1893 } else { 1894 for (i=0; i<nzB; i++) { 1895 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1896 else break; 1897 } 1898 imark = i; 1899 } 1900 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1901 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1902 } 1903 } else { 1904 if (idx) *idx = 0; 1905 if (v) *v = 0; 1906 } 1907 } 1908 *nz = nztot; 1909 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1910 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1917 { 1918 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1919 1920 PetscFunctionBegin; 1921 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1922 aij->getrowactive = PETSC_FALSE; 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatNorm_MPIAIJ" 1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1929 { 1930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1931 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1932 PetscErrorCode ierr; 1933 PetscInt i,j,cstart = mat->cmap->rstart; 1934 PetscReal sum = 0.0; 1935 MatScalar *v; 1936 1937 PetscFunctionBegin; 1938 if (aij->size == 1) { 1939 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1940 } else { 1941 if (type == NORM_FROBENIUS) { 1942 v = amat->a; 1943 for (i=0; i<amat->nz; i++) { 1944 #if defined(PETSC_USE_COMPLEX) 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 #else 1947 sum += (*v)*(*v); v++; 1948 #endif 1949 } 1950 v = bmat->a; 1951 for (i=0; i<bmat->nz; i++) { 1952 #if defined(PETSC_USE_COMPLEX) 1953 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1954 #else 1955 sum += (*v)*(*v); v++; 1956 #endif 1957 } 1958 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1959 *norm = PetscSqrtReal(*norm); 1960 } else if (type == NORM_1) { /* max column norm */ 1961 PetscReal *tmp,*tmp2; 1962 PetscInt *jj,*garray = aij->garray; 1963 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1965 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1966 *norm = 0.0; 1967 v = amat->a; jj = amat->j; 1968 for (j=0; j<amat->nz; j++) { 1969 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1970 } 1971 v = bmat->a; jj = bmat->j; 1972 for (j=0; j<bmat->nz; j++) { 1973 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1974 } 1975 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1976 for (j=0; j<mat->cmap->N; j++) { 1977 if (tmp2[j] > *norm) *norm = tmp2[j]; 1978 } 1979 ierr = PetscFree(tmp);CHKERRQ(ierr); 1980 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1981 } else if (type == NORM_INFINITY) { /* max row norm */ 1982 PetscReal ntemp = 0.0; 1983 for (j=0; j<aij->A->rmap->n; j++) { 1984 v = amat->a + amat->i[j]; 1985 sum = 0.0; 1986 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 v = bmat->a + bmat->i[j]; 1990 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1991 sum += PetscAbsScalar(*v); v++; 1992 } 1993 if (sum > ntemp) ntemp = sum; 1994 } 1995 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1996 } else { 1997 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1998 } 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatTranspose_MPIAIJ" 2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2006 { 2007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2008 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2009 PetscErrorCode ierr; 2010 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2011 PetscInt cstart=A->cmap->rstart,ncol; 2012 Mat B; 2013 MatScalar *array; 2014 2015 PetscFunctionBegin; 2016 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2017 2018 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2019 ai = Aloc->i; aj = Aloc->j; 2020 bi = Bloc->i; bj = Bloc->j; 2021 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2022 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2023 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2024 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2025 for (i=0; i<ai[ma]; i++){ 2026 d_nnz[aj[i]] ++; 2027 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2028 } 2029 2030 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2032 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2033 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2034 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2035 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2036 } else { 2037 B = *matout; 2038 } 2039 2040 /* copy over the A part */ 2041 array = Aloc->a; 2042 row = A->rmap->rstart; 2043 for (i=0; i<ma; i++) { 2044 ncol = ai[i+1]-ai[i]; 2045 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2046 row++; array += ncol; aj += ncol; 2047 } 2048 aj = Aloc->j; 2049 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2050 2051 /* copy over the B part */ 2052 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2053 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2054 array = Bloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2057 cols_tmp = cols; 2058 for (i=0; i<mb; i++) { 2059 ncol = bi[i+1]-bi[i]; 2060 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2061 row++; array += ncol; cols_tmp += ncol; 2062 } 2063 ierr = PetscFree(cols);CHKERRQ(ierr); 2064 2065 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2068 *matout = B; 2069 } else { 2070 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2078 { 2079 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2080 Mat a = aij->A,b = aij->B; 2081 PetscErrorCode ierr; 2082 PetscInt s1,s2,s3; 2083 2084 PetscFunctionBegin; 2085 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2086 if (rr) { 2087 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2088 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2089 /* Overlap communication with computation. */ 2090 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2091 } 2092 if (ll) { 2093 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2094 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2095 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2096 } 2097 /* scale the diagonal block */ 2098 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2099 2100 if (rr) { 2101 /* Do a scatter end and then right scale the off-diagonal block */ 2102 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2104 } 2105 2106 PetscFunctionReturn(0); 2107 } 2108 2109 #undef __FUNCT__ 2110 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2111 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2112 { 2113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatEqual_MPIAIJ" 2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2124 { 2125 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2126 Mat a,b,c,d; 2127 PetscBool flg; 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 a = matA->A; b = matA->B; 2132 c = matB->A; d = matB->B; 2133 2134 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2135 if (flg) { 2136 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2137 } 2138 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatCopy_MPIAIJ" 2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2145 { 2146 PetscErrorCode ierr; 2147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2148 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2149 2150 PetscFunctionBegin; 2151 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2152 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2153 /* because of the column compression in the off-processor part of the matrix a->B, 2154 the number of columns in a->B and b->B may be different, hence we cannot call 2155 the MatCopy() directly on the two parts. If need be, we can provide a more 2156 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2157 then copying the submatrices */ 2158 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2159 } else { 2160 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2161 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatSetUp_MPIAIJ" 2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2169 { 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatAXPY_MPIAIJ" 2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2180 { 2181 PetscErrorCode ierr; 2182 PetscInt i; 2183 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2184 PetscBLASInt bnz,one=1; 2185 Mat_SeqAIJ *x,*y; 2186 2187 PetscFunctionBegin; 2188 if (str == SAME_NONZERO_PATTERN) { 2189 PetscScalar alpha = a; 2190 x = (Mat_SeqAIJ *)xx->A->data; 2191 y = (Mat_SeqAIJ *)yy->A->data; 2192 bnz = PetscBLASIntCast(x->nz); 2193 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2194 x = (Mat_SeqAIJ *)xx->B->data; 2195 y = (Mat_SeqAIJ *)yy->B->data; 2196 bnz = PetscBLASIntCast(x->nz); 2197 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2198 } else if (str == SUBSET_NONZERO_PATTERN) { 2199 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2200 2201 x = (Mat_SeqAIJ *)xx->B->data; 2202 y = (Mat_SeqAIJ *)yy->B->data; 2203 if (y->xtoy && y->XtoY != xx->B) { 2204 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2205 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2206 } 2207 if (!y->xtoy) { /* get xtoy */ 2208 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2209 y->XtoY = xx->B; 2210 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2211 } 2212 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2213 } else { 2214 Mat B; 2215 PetscInt *nnz_d,*nnz_o; 2216 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2217 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2218 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2219 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2220 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2221 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2222 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2223 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2224 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2225 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2226 ierr = MatHeaderReplace(Y,B); 2227 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2228 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatConjugate_MPIAIJ" 2237 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2238 { 2239 #if defined(PETSC_USE_COMPLEX) 2240 PetscErrorCode ierr; 2241 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2242 2243 PetscFunctionBegin; 2244 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2245 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2246 #else 2247 PetscFunctionBegin; 2248 #endif 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatRealPart_MPIAIJ" 2254 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2255 { 2256 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2257 PetscErrorCode ierr; 2258 2259 PetscFunctionBegin; 2260 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2261 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2267 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2268 { 2269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 2272 PetscFunctionBegin; 2273 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2274 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #ifdef PETSC_HAVE_PBGL 2279 2280 #include <boost/parallel/mpi/bsp_process_group.hpp> 2281 #include <boost/graph/distributed/ilu_default_graph.hpp> 2282 #include <boost/graph/distributed/ilu_0_block.hpp> 2283 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2284 #include <boost/graph/distributed/petsc/interface.hpp> 2285 #include <boost/multi_array.hpp> 2286 #include <boost/parallel/distributed_property_map->hpp> 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2290 /* 2291 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2292 */ 2293 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2294 { 2295 namespace petsc = boost::distributed::petsc; 2296 2297 namespace graph_dist = boost::graph::distributed; 2298 using boost::graph::distributed::ilu_default::process_group_type; 2299 using boost::graph::ilu_permuted; 2300 2301 PetscBool row_identity, col_identity; 2302 PetscContainer c; 2303 PetscInt m, n, M, N; 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2308 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2309 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2310 if (!row_identity || !col_identity) { 2311 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2312 } 2313 2314 process_group_type pg; 2315 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2316 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2317 lgraph_type& level_graph = *lgraph_p; 2318 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2319 2320 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2321 ilu_permuted(level_graph); 2322 2323 /* put together the new matrix */ 2324 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2325 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2326 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2327 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2328 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2329 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2330 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2331 2332 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2333 ierr = PetscContainerSetPointer(c, lgraph_p); 2334 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2335 ierr = PetscContainerDestroy(&c); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2341 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2342 { 2343 PetscFunctionBegin; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatSolve_MPIAIJ" 2349 /* 2350 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2351 */ 2352 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2353 { 2354 namespace graph_dist = boost::graph::distributed; 2355 2356 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2357 lgraph_type* lgraph_p; 2358 PetscContainer c; 2359 PetscErrorCode ierr; 2360 2361 PetscFunctionBegin; 2362 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2363 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2364 ierr = VecCopy(b, x);CHKERRQ(ierr); 2365 2366 PetscScalar* array_x; 2367 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2368 PetscInt sx; 2369 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2370 2371 PetscScalar* array_b; 2372 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2373 PetscInt sb; 2374 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2375 2376 lgraph_type& level_graph = *lgraph_p; 2377 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2378 2379 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2380 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2381 ref_x(array_x, boost::extents[num_vertices(graph)]); 2382 2383 typedef boost::iterator_property_map<array_ref_type::iterator, 2384 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2385 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2386 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2387 2388 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2389 2390 PetscFunctionReturn(0); 2391 } 2392 #endif 2393 2394 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2395 PetscInt nzlocal,nsends,nrecvs; 2396 PetscMPIInt *send_rank,*recv_rank; 2397 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2398 PetscScalar *sbuf_a,**rbuf_a; 2399 PetscErrorCode (*Destroy)(Mat); 2400 } Mat_Redundant; 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2404 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2405 { 2406 PetscErrorCode ierr; 2407 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2408 PetscInt i; 2409 2410 PetscFunctionBegin; 2411 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2412 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2413 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2414 for (i=0; i<redund->nrecvs; i++){ 2415 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2416 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2417 } 2418 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2419 ierr = PetscFree(redund);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatDestroy_MatRedundant" 2425 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2426 { 2427 PetscErrorCode ierr; 2428 PetscContainer container; 2429 Mat_Redundant *redund=PETSC_NULL; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2433 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2434 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2435 A->ops->destroy = redund->Destroy; 2436 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2437 if (A->ops->destroy) { 2438 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2439 } 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2445 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2446 { 2447 PetscMPIInt rank,size; 2448 MPI_Comm comm=((PetscObject)mat)->comm; 2449 PetscErrorCode ierr; 2450 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2451 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2452 PetscInt *rowrange=mat->rmap->range; 2453 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2454 Mat A=aij->A,B=aij->B,C=*matredundant; 2455 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2456 PetscScalar *sbuf_a; 2457 PetscInt nzlocal=a->nz+b->nz; 2458 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2459 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2460 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2461 MatScalar *aworkA,*aworkB; 2462 PetscScalar *vals; 2463 PetscMPIInt tag1,tag2,tag3,imdex; 2464 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2465 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2466 MPI_Status recv_status,*send_status; 2467 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2468 PetscInt **rbuf_j=PETSC_NULL; 2469 PetscScalar **rbuf_a=PETSC_NULL; 2470 Mat_Redundant *redund=PETSC_NULL; 2471 PetscContainer container; 2472 2473 PetscFunctionBegin; 2474 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2475 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2476 2477 if (reuse == MAT_REUSE_MATRIX) { 2478 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2479 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2480 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2481 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2482 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2483 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2484 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2485 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2486 2487 nsends = redund->nsends; 2488 nrecvs = redund->nrecvs; 2489 send_rank = redund->send_rank; 2490 recv_rank = redund->recv_rank; 2491 sbuf_nz = redund->sbuf_nz; 2492 rbuf_nz = redund->rbuf_nz; 2493 sbuf_j = redund->sbuf_j; 2494 sbuf_a = redund->sbuf_a; 2495 rbuf_j = redund->rbuf_j; 2496 rbuf_a = redund->rbuf_a; 2497 } 2498 2499 if (reuse == MAT_INITIAL_MATRIX){ 2500 PetscMPIInt subrank,subsize; 2501 PetscInt nleftover,np_subcomm; 2502 /* get the destination processors' id send_rank, nsends and nrecvs */ 2503 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2504 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2505 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2506 np_subcomm = size/nsubcomm; 2507 nleftover = size - nsubcomm*np_subcomm; 2508 nsends = 0; nrecvs = 0; 2509 for (i=0; i<size; i++){ /* i=rank*/ 2510 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2511 send_rank[nsends] = i; nsends++; 2512 recv_rank[nrecvs++] = i; 2513 } 2514 } 2515 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2516 i = size-nleftover-1; 2517 j = 0; 2518 while (j < nsubcomm - nleftover){ 2519 send_rank[nsends++] = i; 2520 i--; j++; 2521 } 2522 } 2523 2524 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2525 for (i=0; i<nleftover; i++){ 2526 recv_rank[nrecvs++] = size-nleftover+i; 2527 } 2528 } 2529 2530 /* allocate sbuf_j, sbuf_a */ 2531 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2532 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2533 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2534 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2535 2536 /* copy mat's local entries into the buffers */ 2537 if (reuse == MAT_INITIAL_MATRIX){ 2538 rownz_max = 0; 2539 rptr = sbuf_j; 2540 cols = sbuf_j + rend-rstart + 1; 2541 vals = sbuf_a; 2542 rptr[0] = 0; 2543 for (i=0; i<rend-rstart; i++){ 2544 row = i + rstart; 2545 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2546 ncols = nzA + nzB; 2547 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2548 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2549 /* load the column indices for this row into cols */ 2550 lwrite = 0; 2551 for (l=0; l<nzB; l++) { 2552 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2553 vals[lwrite] = aworkB[l]; 2554 cols[lwrite++] = ctmp; 2555 } 2556 } 2557 for (l=0; l<nzA; l++){ 2558 vals[lwrite] = aworkA[l]; 2559 cols[lwrite++] = cstart + cworkA[l]; 2560 } 2561 for (l=0; l<nzB; l++) { 2562 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2563 vals[lwrite] = aworkB[l]; 2564 cols[lwrite++] = ctmp; 2565 } 2566 } 2567 vals += ncols; 2568 cols += ncols; 2569 rptr[i+1] = rptr[i] + ncols; 2570 if (rownz_max < ncols) rownz_max = ncols; 2571 } 2572 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2573 } else { /* only copy matrix values into sbuf_a */ 2574 rptr = sbuf_j; 2575 vals = sbuf_a; 2576 rptr[0] = 0; 2577 for (i=0; i<rend-rstart; i++){ 2578 row = i + rstart; 2579 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2580 ncols = nzA + nzB; 2581 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2582 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2583 lwrite = 0; 2584 for (l=0; l<nzB; l++) { 2585 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2586 } 2587 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2588 for (l=0; l<nzB; l++) { 2589 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2590 } 2591 vals += ncols; 2592 rptr[i+1] = rptr[i] + ncols; 2593 } 2594 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2595 2596 /* send nzlocal to others, and recv other's nzlocal */ 2597 /*--------------------------------------------------*/ 2598 if (reuse == MAT_INITIAL_MATRIX){ 2599 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2600 s_waits2 = s_waits3 + nsends; 2601 s_waits1 = s_waits2 + nsends; 2602 r_waits1 = s_waits1 + nsends; 2603 r_waits2 = r_waits1 + nrecvs; 2604 r_waits3 = r_waits2 + nrecvs; 2605 } else { 2606 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2607 r_waits3 = s_waits3 + nsends; 2608 } 2609 2610 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2611 if (reuse == MAT_INITIAL_MATRIX){ 2612 /* get new tags to keep the communication clean */ 2613 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2614 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2615 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2616 2617 /* post receives of other's nzlocal */ 2618 for (i=0; i<nrecvs; i++){ 2619 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2620 } 2621 /* send nzlocal to others */ 2622 for (i=0; i<nsends; i++){ 2623 sbuf_nz[i] = nzlocal; 2624 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2625 } 2626 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2627 count = nrecvs; 2628 while (count) { 2629 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2630 recv_rank[imdex] = recv_status.MPI_SOURCE; 2631 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2632 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2633 2634 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2635 rbuf_nz[imdex] += i + 2; 2636 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2637 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2638 count--; 2639 } 2640 /* wait on sends of nzlocal */ 2641 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2642 /* send mat->i,j to others, and recv from other's */ 2643 /*------------------------------------------------*/ 2644 for (i=0; i<nsends; i++){ 2645 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2646 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2647 } 2648 /* wait on receives of mat->i,j */ 2649 /*------------------------------*/ 2650 count = nrecvs; 2651 while (count) { 2652 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2653 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2654 count--; 2655 } 2656 /* wait on sends of mat->i,j */ 2657 /*---------------------------*/ 2658 if (nsends) { 2659 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2660 } 2661 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2662 2663 /* post receives, send and receive mat->a */ 2664 /*----------------------------------------*/ 2665 for (imdex=0; imdex<nrecvs; imdex++) { 2666 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2667 } 2668 for (i=0; i<nsends; i++){ 2669 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2670 } 2671 count = nrecvs; 2672 while (count) { 2673 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2674 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2675 count--; 2676 } 2677 if (nsends) { 2678 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2679 } 2680 2681 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2682 2683 /* create redundant matrix */ 2684 /*-------------------------*/ 2685 if (reuse == MAT_INITIAL_MATRIX){ 2686 /* compute rownz_max for preallocation */ 2687 for (imdex=0; imdex<nrecvs; imdex++){ 2688 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2689 rptr = rbuf_j[imdex]; 2690 for (i=0; i<j; i++){ 2691 ncols = rptr[i+1] - rptr[i]; 2692 if (rownz_max < ncols) rownz_max = ncols; 2693 } 2694 } 2695 2696 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2697 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2698 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2699 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2700 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2701 } else { 2702 C = *matredundant; 2703 } 2704 2705 /* insert local matrix entries */ 2706 rptr = sbuf_j; 2707 cols = sbuf_j + rend-rstart + 1; 2708 vals = sbuf_a; 2709 for (i=0; i<rend-rstart; i++){ 2710 row = i + rstart; 2711 ncols = rptr[i+1] - rptr[i]; 2712 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2713 vals += ncols; 2714 cols += ncols; 2715 } 2716 /* insert received matrix entries */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 rstart = rowrange[recv_rank[imdex]]; 2719 rend = rowrange[recv_rank[imdex]+1]; 2720 rptr = rbuf_j[imdex]; 2721 cols = rbuf_j[imdex] + rend-rstart + 1; 2722 vals = rbuf_a[imdex]; 2723 for (i=0; i<rend-rstart; i++){ 2724 row = i + rstart; 2725 ncols = rptr[i+1] - rptr[i]; 2726 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2727 vals += ncols; 2728 cols += ncols; 2729 } 2730 } 2731 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2734 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2735 if (reuse == MAT_INITIAL_MATRIX) { 2736 PetscContainer container; 2737 *matredundant = C; 2738 /* create a supporting struct and attach it to C for reuse */ 2739 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2740 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2741 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2742 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2743 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2744 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2745 2746 redund->nzlocal = nzlocal; 2747 redund->nsends = nsends; 2748 redund->nrecvs = nrecvs; 2749 redund->send_rank = send_rank; 2750 redund->recv_rank = recv_rank; 2751 redund->sbuf_nz = sbuf_nz; 2752 redund->rbuf_nz = rbuf_nz; 2753 redund->sbuf_j = sbuf_j; 2754 redund->sbuf_a = sbuf_a; 2755 redund->rbuf_j = rbuf_j; 2756 redund->rbuf_a = rbuf_a; 2757 2758 redund->Destroy = C->ops->destroy; 2759 C->ops->destroy = MatDestroy_MatRedundant; 2760 } 2761 PetscFunctionReturn(0); 2762 } 2763 2764 #undef __FUNCT__ 2765 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2766 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2767 { 2768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2769 PetscErrorCode ierr; 2770 PetscInt i,*idxb = 0; 2771 PetscScalar *va,*vb; 2772 Vec vtmp; 2773 2774 PetscFunctionBegin; 2775 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2776 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2777 if (idx) { 2778 for (i=0; i<A->rmap->n; i++) { 2779 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2780 } 2781 } 2782 2783 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2784 if (idx) { 2785 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2786 } 2787 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2788 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2789 2790 for (i=0; i<A->rmap->n; i++){ 2791 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2792 va[i] = vb[i]; 2793 if (idx) idx[i] = a->garray[idxb[i]]; 2794 } 2795 } 2796 2797 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2798 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2799 ierr = PetscFree(idxb);CHKERRQ(ierr); 2800 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2806 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2807 { 2808 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2809 PetscErrorCode ierr; 2810 PetscInt i,*idxb = 0; 2811 PetscScalar *va,*vb; 2812 Vec vtmp; 2813 2814 PetscFunctionBegin; 2815 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2816 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2817 if (idx) { 2818 for (i=0; i<A->cmap->n; i++) { 2819 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2820 } 2821 } 2822 2823 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2824 if (idx) { 2825 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2826 } 2827 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2828 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2829 2830 for (i=0; i<A->rmap->n; i++){ 2831 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2832 va[i] = vb[i]; 2833 if (idx) idx[i] = a->garray[idxb[i]]; 2834 } 2835 } 2836 2837 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2838 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2839 ierr = PetscFree(idxb);CHKERRQ(ierr); 2840 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 #undef __FUNCT__ 2845 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2846 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2847 { 2848 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2849 PetscInt n = A->rmap->n; 2850 PetscInt cstart = A->cmap->rstart; 2851 PetscInt *cmap = mat->garray; 2852 PetscInt *diagIdx, *offdiagIdx; 2853 Vec diagV, offdiagV; 2854 PetscScalar *a, *diagA, *offdiagA; 2855 PetscInt r; 2856 PetscErrorCode ierr; 2857 2858 PetscFunctionBegin; 2859 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2860 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2861 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2862 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2863 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2864 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2865 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2866 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2867 for(r = 0; r < n; ++r) { 2868 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2869 a[r] = diagA[r]; 2870 idx[r] = cstart + diagIdx[r]; 2871 } else { 2872 a[r] = offdiagA[r]; 2873 idx[r] = cmap[offdiagIdx[r]]; 2874 } 2875 } 2876 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2877 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2878 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2879 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2880 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2881 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2887 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2888 { 2889 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2890 PetscInt n = A->rmap->n; 2891 PetscInt cstart = A->cmap->rstart; 2892 PetscInt *cmap = mat->garray; 2893 PetscInt *diagIdx, *offdiagIdx; 2894 Vec diagV, offdiagV; 2895 PetscScalar *a, *diagA, *offdiagA; 2896 PetscInt r; 2897 PetscErrorCode ierr; 2898 2899 PetscFunctionBegin; 2900 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2901 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2902 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2903 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2904 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2906 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2907 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2908 for(r = 0; r < n; ++r) { 2909 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2910 a[r] = diagA[r]; 2911 idx[r] = cstart + diagIdx[r]; 2912 } else { 2913 a[r] = offdiagA[r]; 2914 idx[r] = cmap[offdiagIdx[r]]; 2915 } 2916 } 2917 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2918 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2919 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2920 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2921 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2922 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2928 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2929 { 2930 PetscErrorCode ierr; 2931 Mat *dummy; 2932 2933 PetscFunctionBegin; 2934 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2935 *newmat = *dummy; 2936 ierr = PetscFree(dummy);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2944 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2945 { 2946 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2951 PetscFunctionReturn(0); 2952 } 2953 2954 2955 /* -------------------------------------------------------------------*/ 2956 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2957 MatGetRow_MPIAIJ, 2958 MatRestoreRow_MPIAIJ, 2959 MatMult_MPIAIJ, 2960 /* 4*/ MatMultAdd_MPIAIJ, 2961 MatMultTranspose_MPIAIJ, 2962 MatMultTransposeAdd_MPIAIJ, 2963 #ifdef PETSC_HAVE_PBGL 2964 MatSolve_MPIAIJ, 2965 #else 2966 0, 2967 #endif 2968 0, 2969 0, 2970 /*10*/ 0, 2971 0, 2972 0, 2973 MatSOR_MPIAIJ, 2974 MatTranspose_MPIAIJ, 2975 /*15*/ MatGetInfo_MPIAIJ, 2976 MatEqual_MPIAIJ, 2977 MatGetDiagonal_MPIAIJ, 2978 MatDiagonalScale_MPIAIJ, 2979 MatNorm_MPIAIJ, 2980 /*20*/ MatAssemblyBegin_MPIAIJ, 2981 MatAssemblyEnd_MPIAIJ, 2982 MatSetOption_MPIAIJ, 2983 MatZeroEntries_MPIAIJ, 2984 /*24*/ MatZeroRows_MPIAIJ, 2985 0, 2986 #ifdef PETSC_HAVE_PBGL 2987 0, 2988 #else 2989 0, 2990 #endif 2991 0, 2992 0, 2993 /*29*/ MatSetUp_MPIAIJ, 2994 #ifdef PETSC_HAVE_PBGL 2995 0, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 0, 3002 /*34*/ MatDuplicate_MPIAIJ, 3003 0, 3004 0, 3005 0, 3006 0, 3007 /*39*/ MatAXPY_MPIAIJ, 3008 MatGetSubMatrices_MPIAIJ, 3009 MatIncreaseOverlap_MPIAIJ, 3010 MatGetValues_MPIAIJ, 3011 MatCopy_MPIAIJ, 3012 /*44*/ MatGetRowMax_MPIAIJ, 3013 MatScale_MPIAIJ, 3014 0, 3015 0, 3016 MatZeroRowsColumns_MPIAIJ, 3017 /*49*/ 0, 3018 0, 3019 0, 3020 0, 3021 0, 3022 /*54*/ MatFDColoringCreate_MPIAIJ, 3023 0, 3024 MatSetUnfactored_MPIAIJ, 3025 MatPermute_MPIAIJ, 3026 0, 3027 /*59*/ MatGetSubMatrix_MPIAIJ, 3028 MatDestroy_MPIAIJ, 3029 MatView_MPIAIJ, 3030 0, 3031 0, 3032 /*64*/ 0, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3038 MatGetRowMinAbs_MPIAIJ, 3039 0, 3040 MatSetColoring_MPIAIJ, 3041 #if defined(PETSC_HAVE_ADIC) 3042 MatSetValuesAdic_MPIAIJ, 3043 #else 3044 0, 3045 #endif 3046 MatSetValuesAdifor_MPIAIJ, 3047 /*75*/ MatFDColoringApply_AIJ, 3048 0, 3049 0, 3050 0, 3051 0, 3052 /*80*/ 0, 3053 0, 3054 0, 3055 /*83*/ MatLoad_MPIAIJ, 3056 0, 3057 0, 3058 0, 3059 0, 3060 0, 3061 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3062 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3063 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3064 MatPtAP_Basic, 3065 MatPtAPSymbolic_MPIAIJ, 3066 /*94*/ MatPtAPNumeric_MPIAIJ, 3067 0, 3068 0, 3069 0, 3070 0, 3071 /*99*/ 0, 3072 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3073 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3074 MatConjugate_MPIAIJ, 3075 0, 3076 /*104*/MatSetValuesRow_MPIAIJ, 3077 MatRealPart_MPIAIJ, 3078 MatImaginaryPart_MPIAIJ, 3079 0, 3080 0, 3081 /*109*/0, 3082 MatGetRedundantMatrix_MPIAIJ, 3083 MatGetRowMin_MPIAIJ, 3084 0, 3085 0, 3086 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 /*119*/0, 3092 0, 3093 0, 3094 0, 3095 MatGetMultiProcBlock_MPIAIJ, 3096 /*124*/MatFindNonZeroRows_MPIAIJ, 3097 MatGetColumnNorms_MPIAIJ, 3098 MatInvertBlockDiagonal_MPIAIJ, 3099 0, 3100 MatGetSubMatricesParallel_MPIAIJ, 3101 /*129*/0, 3102 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3103 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3104 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3105 0, 3106 /*134*/0, 3107 0, 3108 0, 3109 0, 3110 0 3111 }; 3112 3113 /* ----------------------------------------------------------------------------------------*/ 3114 3115 EXTERN_C_BEGIN 3116 #undef __FUNCT__ 3117 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3118 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3119 { 3120 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3121 PetscErrorCode ierr; 3122 3123 PetscFunctionBegin; 3124 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3125 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 EXTERN_C_END 3129 3130 EXTERN_C_BEGIN 3131 #undef __FUNCT__ 3132 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3133 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3134 { 3135 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3136 PetscErrorCode ierr; 3137 3138 PetscFunctionBegin; 3139 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3140 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3141 PetscFunctionReturn(0); 3142 } 3143 EXTERN_C_END 3144 3145 EXTERN_C_BEGIN 3146 #undef __FUNCT__ 3147 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3148 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3149 { 3150 Mat_MPIAIJ *b; 3151 PetscErrorCode ierr; 3152 PetscInt i; 3153 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3154 3155 PetscFunctionBegin; 3156 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3157 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3158 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3159 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3160 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3161 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3162 3163 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3164 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3165 if (d_nnz) { 3166 for (i=0; i<B->rmap->n; i++) { 3167 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3168 } 3169 } 3170 if (o_nnz) { 3171 for (i=0; i<B->rmap->n; i++) { 3172 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3173 } 3174 } 3175 b = (Mat_MPIAIJ*)B->data; 3176 3177 if (!B->preallocated) { 3178 /* Explicitly create 2 MATSEQAIJ matrices. */ 3179 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3180 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3181 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3182 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3183 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3184 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3185 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3186 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3187 } 3188 3189 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3190 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3191 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3192 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3193 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3194 B->preallocated = PETSC_TRUE; 3195 PetscFunctionReturn(0); 3196 } 3197 EXTERN_C_END 3198 3199 #undef __FUNCT__ 3200 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3201 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3202 { 3203 Mat mat; 3204 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3205 PetscErrorCode ierr; 3206 3207 PetscFunctionBegin; 3208 *newmat = 0; 3209 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3210 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3211 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3212 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3213 a = (Mat_MPIAIJ*)mat->data; 3214 3215 mat->factortype = matin->factortype; 3216 mat->rmap->bs = matin->rmap->bs; 3217 mat->assembled = PETSC_TRUE; 3218 mat->insertmode = NOT_SET_VALUES; 3219 mat->preallocated = PETSC_TRUE; 3220 3221 a->size = oldmat->size; 3222 a->rank = oldmat->rank; 3223 a->donotstash = oldmat->donotstash; 3224 a->roworiented = oldmat->roworiented; 3225 a->rowindices = 0; 3226 a->rowvalues = 0; 3227 a->getrowactive = PETSC_FALSE; 3228 3229 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3230 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3231 3232 if (oldmat->colmap) { 3233 #if defined (PETSC_USE_CTABLE) 3234 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3235 #else 3236 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3237 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3238 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3239 #endif 3240 } else a->colmap = 0; 3241 if (oldmat->garray) { 3242 PetscInt len; 3243 len = oldmat->B->cmap->n; 3244 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3245 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3246 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3247 } else a->garray = 0; 3248 3249 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3250 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3251 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3253 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3254 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3255 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3257 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3258 *newmat = mat; 3259 PetscFunctionReturn(0); 3260 } 3261 3262 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatLoad_MPIAIJ" 3266 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3267 { 3268 PetscScalar *vals,*svals; 3269 MPI_Comm comm = ((PetscObject)viewer)->comm; 3270 PetscErrorCode ierr; 3271 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3272 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3273 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3274 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3275 PetscInt cend,cstart,n,*rowners,sizesset=1; 3276 int fd; 3277 3278 PetscFunctionBegin; 3279 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3280 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3281 if (!rank) { 3282 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3283 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3284 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3285 } 3286 3287 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3288 3289 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3290 M = header[1]; N = header[2]; 3291 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3292 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3293 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3294 3295 /* If global sizes are set, check if they are consistent with that given in the file */ 3296 if (sizesset) { 3297 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3298 } 3299 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3300 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3301 3302 /* determine ownership of all rows */ 3303 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3304 else m = newMat->rmap->n; /* Set by user */ 3305 3306 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3307 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3308 3309 /* First process needs enough room for process with most rows */ 3310 if (!rank) { 3311 mmax = rowners[1]; 3312 for (i=2; i<size; i++) { 3313 mmax = PetscMax(mmax,rowners[i]); 3314 } 3315 } else mmax = m; 3316 3317 rowners[0] = 0; 3318 for (i=2; i<=size; i++) { 3319 rowners[i] += rowners[i-1]; 3320 } 3321 rstart = rowners[rank]; 3322 rend = rowners[rank+1]; 3323 3324 /* distribute row lengths to all processors */ 3325 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3326 if (!rank) { 3327 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3328 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3329 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3330 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3331 for (j=0; j<m; j++) { 3332 procsnz[0] += ourlens[j]; 3333 } 3334 for (i=1; i<size; i++) { 3335 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3336 /* calculate the number of nonzeros on each processor */ 3337 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3338 procsnz[i] += rowlengths[j]; 3339 } 3340 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3341 } 3342 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3343 } else { 3344 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3345 } 3346 3347 if (!rank) { 3348 /* determine max buffer needed and allocate it */ 3349 maxnz = 0; 3350 for (i=0; i<size; i++) { 3351 maxnz = PetscMax(maxnz,procsnz[i]); 3352 } 3353 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3354 3355 /* read in my part of the matrix column indices */ 3356 nz = procsnz[0]; 3357 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3358 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3359 3360 /* read in every one elses and ship off */ 3361 for (i=1; i<size; i++) { 3362 nz = procsnz[i]; 3363 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3364 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3365 } 3366 ierr = PetscFree(cols);CHKERRQ(ierr); 3367 } else { 3368 /* determine buffer space needed for message */ 3369 nz = 0; 3370 for (i=0; i<m; i++) { 3371 nz += ourlens[i]; 3372 } 3373 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3374 3375 /* receive message of column indices*/ 3376 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3377 } 3378 3379 /* determine column ownership if matrix is not square */ 3380 if (N != M) { 3381 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3382 else n = newMat->cmap->n; 3383 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3384 cstart = cend - n; 3385 } else { 3386 cstart = rstart; 3387 cend = rend; 3388 n = cend - cstart; 3389 } 3390 3391 /* loop over local rows, determining number of off diagonal entries */ 3392 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3393 jj = 0; 3394 for (i=0; i<m; i++) { 3395 for (j=0; j<ourlens[i]; j++) { 3396 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3397 jj++; 3398 } 3399 } 3400 3401 for (i=0; i<m; i++) { 3402 ourlens[i] -= offlens[i]; 3403 } 3404 if (!sizesset) { 3405 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3406 } 3407 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3408 3409 for (i=0; i<m; i++) { 3410 ourlens[i] += offlens[i]; 3411 } 3412 3413 if (!rank) { 3414 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3415 3416 /* read in my part of the matrix numerical values */ 3417 nz = procsnz[0]; 3418 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3419 3420 /* insert into matrix */ 3421 jj = rstart; 3422 smycols = mycols; 3423 svals = vals; 3424 for (i=0; i<m; i++) { 3425 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3426 smycols += ourlens[i]; 3427 svals += ourlens[i]; 3428 jj++; 3429 } 3430 3431 /* read in other processors and ship out */ 3432 for (i=1; i<size; i++) { 3433 nz = procsnz[i]; 3434 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3435 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3436 } 3437 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3438 } else { 3439 /* receive numeric values */ 3440 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3441 3442 /* receive message of values*/ 3443 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3444 3445 /* insert into matrix */ 3446 jj = rstart; 3447 smycols = mycols; 3448 svals = vals; 3449 for (i=0; i<m; i++) { 3450 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3451 smycols += ourlens[i]; 3452 svals += ourlens[i]; 3453 jj++; 3454 } 3455 } 3456 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3457 ierr = PetscFree(vals);CHKERRQ(ierr); 3458 ierr = PetscFree(mycols);CHKERRQ(ierr); 3459 ierr = PetscFree(rowners);CHKERRQ(ierr); 3460 3461 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3462 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 #undef __FUNCT__ 3467 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3468 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3469 { 3470 PetscErrorCode ierr; 3471 IS iscol_local; 3472 PetscInt csize; 3473 3474 PetscFunctionBegin; 3475 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3476 if (call == MAT_REUSE_MATRIX) { 3477 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3478 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3479 } else { 3480 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3481 } 3482 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3483 if (call == MAT_INITIAL_MATRIX) { 3484 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3485 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3486 } 3487 PetscFunctionReturn(0); 3488 } 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3492 /* 3493 Not great since it makes two copies of the submatrix, first an SeqAIJ 3494 in local and then by concatenating the local matrices the end result. 3495 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3496 3497 Note: This requires a sequential iscol with all indices. 3498 */ 3499 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3500 { 3501 PetscErrorCode ierr; 3502 PetscMPIInt rank,size; 3503 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3504 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3505 Mat *local,M,Mreuse; 3506 MatScalar *vwork,*aa; 3507 MPI_Comm comm = ((PetscObject)mat)->comm; 3508 Mat_SeqAIJ *aij; 3509 3510 3511 PetscFunctionBegin; 3512 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3514 3515 if (call == MAT_REUSE_MATRIX) { 3516 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3517 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3518 local = &Mreuse; 3519 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3520 } else { 3521 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3522 Mreuse = *local; 3523 ierr = PetscFree(local);CHKERRQ(ierr); 3524 } 3525 3526 /* 3527 m - number of local rows 3528 n - number of columns (same on all processors) 3529 rstart - first row in new global matrix generated 3530 */ 3531 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3532 if (call == MAT_INITIAL_MATRIX) { 3533 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3534 ii = aij->i; 3535 jj = aij->j; 3536 3537 /* 3538 Determine the number of non-zeros in the diagonal and off-diagonal 3539 portions of the matrix in order to do correct preallocation 3540 */ 3541 3542 /* first get start and end of "diagonal" columns */ 3543 if (csize == PETSC_DECIDE) { 3544 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3545 if (mglobal == n) { /* square matrix */ 3546 nlocal = m; 3547 } else { 3548 nlocal = n/size + ((n % size) > rank); 3549 } 3550 } else { 3551 nlocal = csize; 3552 } 3553 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3554 rstart = rend - nlocal; 3555 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3556 3557 /* next, compute all the lengths */ 3558 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3559 olens = dlens + m; 3560 for (i=0; i<m; i++) { 3561 jend = ii[i+1] - ii[i]; 3562 olen = 0; 3563 dlen = 0; 3564 for (j=0; j<jend; j++) { 3565 if (*jj < rstart || *jj >= rend) olen++; 3566 else dlen++; 3567 jj++; 3568 } 3569 olens[i] = olen; 3570 dlens[i] = dlen; 3571 } 3572 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3573 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3574 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3575 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3576 ierr = PetscFree(dlens);CHKERRQ(ierr); 3577 } else { 3578 PetscInt ml,nl; 3579 3580 M = *newmat; 3581 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3582 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3583 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3584 /* 3585 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3586 rather than the slower MatSetValues(). 3587 */ 3588 M->was_assembled = PETSC_TRUE; 3589 M->assembled = PETSC_FALSE; 3590 } 3591 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3592 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3593 ii = aij->i; 3594 jj = aij->j; 3595 aa = aij->a; 3596 for (i=0; i<m; i++) { 3597 row = rstart + i; 3598 nz = ii[i+1] - ii[i]; 3599 cwork = jj; jj += nz; 3600 vwork = aa; aa += nz; 3601 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3602 } 3603 3604 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3605 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 *newmat = M; 3607 3608 /* save submatrix used in processor for next request */ 3609 if (call == MAT_INITIAL_MATRIX) { 3610 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3611 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3612 } 3613 3614 PetscFunctionReturn(0); 3615 } 3616 3617 EXTERN_C_BEGIN 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3620 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3621 { 3622 PetscInt m,cstart, cend,j,nnz,i,d; 3623 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3624 const PetscInt *JJ; 3625 PetscScalar *values; 3626 PetscErrorCode ierr; 3627 3628 PetscFunctionBegin; 3629 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3630 3631 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3632 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3633 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3634 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3635 m = B->rmap->n; 3636 cstart = B->cmap->rstart; 3637 cend = B->cmap->rend; 3638 rstart = B->rmap->rstart; 3639 3640 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3641 3642 #if defined(PETSC_USE_DEBUGGING) 3643 for (i=0; i<m; i++) { 3644 nnz = Ii[i+1]- Ii[i]; 3645 JJ = J + Ii[i]; 3646 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3647 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3648 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3649 } 3650 #endif 3651 3652 for (i=0; i<m; i++) { 3653 nnz = Ii[i+1]- Ii[i]; 3654 JJ = J + Ii[i]; 3655 nnz_max = PetscMax(nnz_max,nnz); 3656 d = 0; 3657 for (j=0; j<nnz; j++) { 3658 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3659 } 3660 d_nnz[i] = d; 3661 o_nnz[i] = nnz - d; 3662 } 3663 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3664 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3665 3666 if (v) values = (PetscScalar*)v; 3667 else { 3668 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3669 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3670 } 3671 3672 for (i=0; i<m; i++) { 3673 ii = i + rstart; 3674 nnz = Ii[i+1]- Ii[i]; 3675 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3676 } 3677 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3678 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3679 3680 if (!v) { 3681 ierr = PetscFree(values);CHKERRQ(ierr); 3682 } 3683 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3684 PetscFunctionReturn(0); 3685 } 3686 EXTERN_C_END 3687 3688 #undef __FUNCT__ 3689 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3690 /*@ 3691 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3692 (the default parallel PETSc format). 3693 3694 Collective on MPI_Comm 3695 3696 Input Parameters: 3697 + B - the matrix 3698 . i - the indices into j for the start of each local row (starts with zero) 3699 . j - the column indices for each local row (starts with zero) 3700 - v - optional values in the matrix 3701 3702 Level: developer 3703 3704 Notes: 3705 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3706 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3707 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3708 3709 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3710 3711 The format which is used for the sparse matrix input, is equivalent to a 3712 row-major ordering.. i.e for the following matrix, the input data expected is 3713 as shown: 3714 3715 1 0 0 3716 2 0 3 P0 3717 ------- 3718 4 5 6 P1 3719 3720 Process0 [P0]: rows_owned=[0,1] 3721 i = {0,1,3} [size = nrow+1 = 2+1] 3722 j = {0,0,2} [size = nz = 6] 3723 v = {1,2,3} [size = nz = 6] 3724 3725 Process1 [P1]: rows_owned=[2] 3726 i = {0,3} [size = nrow+1 = 1+1] 3727 j = {0,1,2} [size = nz = 6] 3728 v = {4,5,6} [size = nz = 6] 3729 3730 .keywords: matrix, aij, compressed row, sparse, parallel 3731 3732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3733 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3734 @*/ 3735 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3736 { 3737 PetscErrorCode ierr; 3738 3739 PetscFunctionBegin; 3740 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3741 PetscFunctionReturn(0); 3742 } 3743 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3746 /*@C 3747 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3748 (the default parallel PETSc format). For good matrix assembly performance 3749 the user should preallocate the matrix storage by setting the parameters 3750 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3751 performance can be increased by more than a factor of 50. 3752 3753 Collective on MPI_Comm 3754 3755 Input Parameters: 3756 + A - the matrix 3757 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3758 (same value is used for all local rows) 3759 . d_nnz - array containing the number of nonzeros in the various rows of the 3760 DIAGONAL portion of the local submatrix (possibly different for each row) 3761 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3762 The size of this array is equal to the number of local rows, i.e 'm'. 3763 For matrices that will be factored, you must leave room for (and set) 3764 the diagonal entry even if it is zero. 3765 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3766 submatrix (same value is used for all local rows). 3767 - o_nnz - array containing the number of nonzeros in the various rows of the 3768 OFF-DIAGONAL portion of the local submatrix (possibly different for 3769 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3770 structure. The size of this array is equal to the number 3771 of local rows, i.e 'm'. 3772 3773 If the *_nnz parameter is given then the *_nz parameter is ignored 3774 3775 The AIJ format (also called the Yale sparse matrix format or 3776 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3777 storage. The stored row and column indices begin with zero. 3778 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3779 3780 The parallel matrix is partitioned such that the first m0 rows belong to 3781 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3782 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3783 3784 The DIAGONAL portion of the local submatrix of a processor can be defined 3785 as the submatrix which is obtained by extraction the part corresponding to 3786 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3787 first row that belongs to the processor, r2 is the last row belonging to 3788 the this processor, and c1-c2 is range of indices of the local part of a 3789 vector suitable for applying the matrix to. This is an mxn matrix. In the 3790 common case of a square matrix, the row and column ranges are the same and 3791 the DIAGONAL part is also square. The remaining portion of the local 3792 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3793 3794 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3795 3796 You can call MatGetInfo() to get information on how effective the preallocation was; 3797 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3798 You can also run with the option -info and look for messages with the string 3799 malloc in them to see if additional memory allocation was needed. 3800 3801 Example usage: 3802 3803 Consider the following 8x8 matrix with 34 non-zero values, that is 3804 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3805 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3806 as follows: 3807 3808 .vb 3809 1 2 0 | 0 3 0 | 0 4 3810 Proc0 0 5 6 | 7 0 0 | 8 0 3811 9 0 10 | 11 0 0 | 12 0 3812 ------------------------------------- 3813 13 0 14 | 15 16 17 | 0 0 3814 Proc1 0 18 0 | 19 20 21 | 0 0 3815 0 0 0 | 22 23 0 | 24 0 3816 ------------------------------------- 3817 Proc2 25 26 27 | 0 0 28 | 29 0 3818 30 0 0 | 31 32 33 | 0 34 3819 .ve 3820 3821 This can be represented as a collection of submatrices as: 3822 3823 .vb 3824 A B C 3825 D E F 3826 G H I 3827 .ve 3828 3829 Where the submatrices A,B,C are owned by proc0, D,E,F are 3830 owned by proc1, G,H,I are owned by proc2. 3831 3832 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3833 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3834 The 'M','N' parameters are 8,8, and have the same values on all procs. 3835 3836 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3837 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3838 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3839 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3840 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3841 matrix, ans [DF] as another SeqAIJ matrix. 3842 3843 When d_nz, o_nz parameters are specified, d_nz storage elements are 3844 allocated for every row of the local diagonal submatrix, and o_nz 3845 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3846 One way to choose d_nz and o_nz is to use the max nonzerors per local 3847 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3848 In this case, the values of d_nz,o_nz are: 3849 .vb 3850 proc0 : dnz = 2, o_nz = 2 3851 proc1 : dnz = 3, o_nz = 2 3852 proc2 : dnz = 1, o_nz = 4 3853 .ve 3854 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3855 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3856 for proc3. i.e we are using 12+15+10=37 storage locations to store 3857 34 values. 3858 3859 When d_nnz, o_nnz parameters are specified, the storage is specified 3860 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3861 In the above case the values for d_nnz,o_nnz are: 3862 .vb 3863 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3864 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3865 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3866 .ve 3867 Here the space allocated is sum of all the above values i.e 34, and 3868 hence pre-allocation is perfect. 3869 3870 Level: intermediate 3871 3872 .keywords: matrix, aij, compressed row, sparse, parallel 3873 3874 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3875 MPIAIJ, MatGetInfo() 3876 @*/ 3877 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3878 { 3879 PetscErrorCode ierr; 3880 3881 PetscFunctionBegin; 3882 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3883 PetscValidType(B,1); 3884 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3885 PetscFunctionReturn(0); 3886 } 3887 3888 #undef __FUNCT__ 3889 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3890 /*@ 3891 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3892 CSR format the local rows. 3893 3894 Collective on MPI_Comm 3895 3896 Input Parameters: 3897 + comm - MPI communicator 3898 . m - number of local rows (Cannot be PETSC_DECIDE) 3899 . n - This value should be the same as the local size used in creating the 3900 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3901 calculated if N is given) For square matrices n is almost always m. 3902 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3903 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3904 . i - row indices 3905 . j - column indices 3906 - a - matrix values 3907 3908 Output Parameter: 3909 . mat - the matrix 3910 3911 Level: intermediate 3912 3913 Notes: 3914 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3915 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3916 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3917 3918 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3919 3920 The format which is used for the sparse matrix input, is equivalent to a 3921 row-major ordering.. i.e for the following matrix, the input data expected is 3922 as shown: 3923 3924 1 0 0 3925 2 0 3 P0 3926 ------- 3927 4 5 6 P1 3928 3929 Process0 [P0]: rows_owned=[0,1] 3930 i = {0,1,3} [size = nrow+1 = 2+1] 3931 j = {0,0,2} [size = nz = 6] 3932 v = {1,2,3} [size = nz = 6] 3933 3934 Process1 [P1]: rows_owned=[2] 3935 i = {0,3} [size = nrow+1 = 1+1] 3936 j = {0,1,2} [size = nz = 6] 3937 v = {4,5,6} [size = nz = 6] 3938 3939 .keywords: matrix, aij, compressed row, sparse, parallel 3940 3941 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3942 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3943 @*/ 3944 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3945 { 3946 PetscErrorCode ierr; 3947 3948 PetscFunctionBegin; 3949 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3950 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3951 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3952 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3953 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3954 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3955 PetscFunctionReturn(0); 3956 } 3957 3958 #undef __FUNCT__ 3959 #define __FUNCT__ "MatCreateAIJ" 3960 /*@C 3961 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3962 (the default parallel PETSc format). For good matrix assembly performance 3963 the user should preallocate the matrix storage by setting the parameters 3964 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3965 performance can be increased by more than a factor of 50. 3966 3967 Collective on MPI_Comm 3968 3969 Input Parameters: 3970 + comm - MPI communicator 3971 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3972 This value should be the same as the local size used in creating the 3973 y vector for the matrix-vector product y = Ax. 3974 . n - This value should be the same as the local size used in creating the 3975 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3976 calculated if N is given) For square matrices n is almost always m. 3977 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3978 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3979 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3980 (same value is used for all local rows) 3981 . d_nnz - array containing the number of nonzeros in the various rows of the 3982 DIAGONAL portion of the local submatrix (possibly different for each row) 3983 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3984 The size of this array is equal to the number of local rows, i.e 'm'. 3985 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3986 submatrix (same value is used for all local rows). 3987 - o_nnz - array containing the number of nonzeros in the various rows of the 3988 OFF-DIAGONAL portion of the local submatrix (possibly different for 3989 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3990 structure. The size of this array is equal to the number 3991 of local rows, i.e 'm'. 3992 3993 Output Parameter: 3994 . A - the matrix 3995 3996 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3997 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3998 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3999 4000 Notes: 4001 If the *_nnz parameter is given then the *_nz parameter is ignored 4002 4003 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4004 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4005 storage requirements for this matrix. 4006 4007 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4008 processor than it must be used on all processors that share the object for 4009 that argument. 4010 4011 The user MUST specify either the local or global matrix dimensions 4012 (possibly both). 4013 4014 The parallel matrix is partitioned across processors such that the 4015 first m0 rows belong to process 0, the next m1 rows belong to 4016 process 1, the next m2 rows belong to process 2 etc.. where 4017 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4018 values corresponding to [m x N] submatrix. 4019 4020 The columns are logically partitioned with the n0 columns belonging 4021 to 0th partition, the next n1 columns belonging to the next 4022 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4023 4024 The DIAGONAL portion of the local submatrix on any given processor 4025 is the submatrix corresponding to the rows and columns m,n 4026 corresponding to the given processor. i.e diagonal matrix on 4027 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4028 etc. The remaining portion of the local submatrix [m x (N-n)] 4029 constitute the OFF-DIAGONAL portion. The example below better 4030 illustrates this concept. 4031 4032 For a square global matrix we define each processor's diagonal portion 4033 to be its local rows and the corresponding columns (a square submatrix); 4034 each processor's off-diagonal portion encompasses the remainder of the 4035 local matrix (a rectangular submatrix). 4036 4037 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4038 4039 When calling this routine with a single process communicator, a matrix of 4040 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4041 type of communicator, use the construction mechanism: 4042 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4043 4044 By default, this format uses inodes (identical nodes) when possible. 4045 We search for consecutive rows with the same nonzero structure, thereby 4046 reusing matrix information to achieve increased efficiency. 4047 4048 Options Database Keys: 4049 + -mat_no_inode - Do not use inodes 4050 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4051 - -mat_aij_oneindex - Internally use indexing starting at 1 4052 rather than 0. Note that when calling MatSetValues(), 4053 the user still MUST index entries starting at 0! 4054 4055 4056 Example usage: 4057 4058 Consider the following 8x8 matrix with 34 non-zero values, that is 4059 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4060 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4061 as follows: 4062 4063 .vb 4064 1 2 0 | 0 3 0 | 0 4 4065 Proc0 0 5 6 | 7 0 0 | 8 0 4066 9 0 10 | 11 0 0 | 12 0 4067 ------------------------------------- 4068 13 0 14 | 15 16 17 | 0 0 4069 Proc1 0 18 0 | 19 20 21 | 0 0 4070 0 0 0 | 22 23 0 | 24 0 4071 ------------------------------------- 4072 Proc2 25 26 27 | 0 0 28 | 29 0 4073 30 0 0 | 31 32 33 | 0 34 4074 .ve 4075 4076 This can be represented as a collection of submatrices as: 4077 4078 .vb 4079 A B C 4080 D E F 4081 G H I 4082 .ve 4083 4084 Where the submatrices A,B,C are owned by proc0, D,E,F are 4085 owned by proc1, G,H,I are owned by proc2. 4086 4087 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4088 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4089 The 'M','N' parameters are 8,8, and have the same values on all procs. 4090 4091 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4092 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4093 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4094 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4095 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4096 matrix, ans [DF] as another SeqAIJ matrix. 4097 4098 When d_nz, o_nz parameters are specified, d_nz storage elements are 4099 allocated for every row of the local diagonal submatrix, and o_nz 4100 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4101 One way to choose d_nz and o_nz is to use the max nonzerors per local 4102 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4103 In this case, the values of d_nz,o_nz are: 4104 .vb 4105 proc0 : dnz = 2, o_nz = 2 4106 proc1 : dnz = 3, o_nz = 2 4107 proc2 : dnz = 1, o_nz = 4 4108 .ve 4109 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4110 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4111 for proc3. i.e we are using 12+15+10=37 storage locations to store 4112 34 values. 4113 4114 When d_nnz, o_nnz parameters are specified, the storage is specified 4115 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4116 In the above case the values for d_nnz,o_nnz are: 4117 .vb 4118 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4119 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4120 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4121 .ve 4122 Here the space allocated is sum of all the above values i.e 34, and 4123 hence pre-allocation is perfect. 4124 4125 Level: intermediate 4126 4127 .keywords: matrix, aij, compressed row, sparse, parallel 4128 4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4130 MPIAIJ, MatCreateMPIAIJWithArrays() 4131 @*/ 4132 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4133 { 4134 PetscErrorCode ierr; 4135 PetscMPIInt size; 4136 4137 PetscFunctionBegin; 4138 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4139 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4140 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4141 if (size > 1) { 4142 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4143 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4144 } else { 4145 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4146 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4147 } 4148 PetscFunctionReturn(0); 4149 } 4150 4151 #undef __FUNCT__ 4152 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4153 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4154 { 4155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4156 4157 PetscFunctionBegin; 4158 *Ad = a->A; 4159 *Ao = a->B; 4160 *colmap = a->garray; 4161 PetscFunctionReturn(0); 4162 } 4163 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4166 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4167 { 4168 PetscErrorCode ierr; 4169 PetscInt i; 4170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4171 4172 PetscFunctionBegin; 4173 if (coloring->ctype == IS_COLORING_GLOBAL) { 4174 ISColoringValue *allcolors,*colors; 4175 ISColoring ocoloring; 4176 4177 /* set coloring for diagonal portion */ 4178 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4179 4180 /* set coloring for off-diagonal portion */ 4181 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4182 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4183 for (i=0; i<a->B->cmap->n; i++) { 4184 colors[i] = allcolors[a->garray[i]]; 4185 } 4186 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4187 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4188 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4189 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4190 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4191 ISColoringValue *colors; 4192 PetscInt *larray; 4193 ISColoring ocoloring; 4194 4195 /* set coloring for diagonal portion */ 4196 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4197 for (i=0; i<a->A->cmap->n; i++) { 4198 larray[i] = i + A->cmap->rstart; 4199 } 4200 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4201 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4202 for (i=0; i<a->A->cmap->n; i++) { 4203 colors[i] = coloring->colors[larray[i]]; 4204 } 4205 ierr = PetscFree(larray);CHKERRQ(ierr); 4206 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4207 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4208 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4209 4210 /* set coloring for off-diagonal portion */ 4211 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4212 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4213 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4214 for (i=0; i<a->B->cmap->n; i++) { 4215 colors[i] = coloring->colors[larray[i]]; 4216 } 4217 ierr = PetscFree(larray);CHKERRQ(ierr); 4218 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4219 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4220 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4221 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4222 4223 PetscFunctionReturn(0); 4224 } 4225 4226 #if defined(PETSC_HAVE_ADIC) 4227 #undef __FUNCT__ 4228 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4229 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4230 { 4231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4232 PetscErrorCode ierr; 4233 4234 PetscFunctionBegin; 4235 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4236 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4237 PetscFunctionReturn(0); 4238 } 4239 #endif 4240 4241 #undef __FUNCT__ 4242 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4243 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4244 { 4245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4246 PetscErrorCode ierr; 4247 4248 PetscFunctionBegin; 4249 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4250 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4251 PetscFunctionReturn(0); 4252 } 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4256 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4257 { 4258 PetscErrorCode ierr; 4259 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum; 4260 PetscInt *indx; 4261 4262 PetscFunctionBegin; 4263 /* This routine will ONLY return MPIAIJ type matrix */ 4264 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4265 if (n == PETSC_DECIDE){ 4266 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4267 } 4268 /* Check sum(n) = N */ 4269 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr); 4270 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4271 4272 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4273 rstart -= m; 4274 4275 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4276 for (i=0;i<m;i++) { 4277 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4278 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4279 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4280 } 4281 4282 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4283 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4284 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4285 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4286 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4287 PetscFunctionReturn(0); 4288 } 4289 4290 #undef __FUNCT__ 4291 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4292 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4293 { 4294 PetscErrorCode ierr; 4295 PetscInt m,N,i,rstart,nnz,Ii; 4296 PetscInt *indx; 4297 PetscScalar *values; 4298 4299 PetscFunctionBegin; 4300 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4301 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4302 for (i=0;i<m;i++) { 4303 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4304 Ii = i + rstart; 4305 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4306 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4307 } 4308 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4309 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4310 PetscFunctionReturn(0); 4311 } 4312 4313 #undef __FUNCT__ 4314 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4315 /*@ 4316 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4317 matrices from each processor 4318 4319 Collective on MPI_Comm 4320 4321 Input Parameters: 4322 + comm - the communicators the parallel matrix will live on 4323 . inmat - the input sequential matrices 4324 . n - number of local columns (or PETSC_DECIDE) 4325 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4326 4327 Output Parameter: 4328 . outmat - the parallel matrix generated 4329 4330 Level: advanced 4331 4332 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4333 4334 @*/ 4335 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4336 { 4337 PetscErrorCode ierr; 4338 4339 PetscFunctionBegin; 4340 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4341 if (scall == MAT_INITIAL_MATRIX){ 4342 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4343 } 4344 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4345 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4346 PetscFunctionReturn(0); 4347 } 4348 4349 #undef __FUNCT__ 4350 #define __FUNCT__ "MatFileSplit" 4351 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4352 { 4353 PetscErrorCode ierr; 4354 PetscMPIInt rank; 4355 PetscInt m,N,i,rstart,nnz; 4356 size_t len; 4357 const PetscInt *indx; 4358 PetscViewer out; 4359 char *name; 4360 Mat B; 4361 const PetscScalar *values; 4362 4363 PetscFunctionBegin; 4364 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4365 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4366 /* Should this be the type of the diagonal block of A? */ 4367 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4368 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4369 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4370 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4371 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4372 for (i=0;i<m;i++) { 4373 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4374 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4375 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4376 } 4377 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4379 4380 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4381 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4382 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4383 sprintf(name,"%s.%d",outfile,rank); 4384 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4385 ierr = PetscFree(name); 4386 ierr = MatView(B,out);CHKERRQ(ierr); 4387 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4388 ierr = MatDestroy(&B);CHKERRQ(ierr); 4389 PetscFunctionReturn(0); 4390 } 4391 4392 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4393 #undef __FUNCT__ 4394 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4395 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4396 { 4397 PetscErrorCode ierr; 4398 Mat_Merge_SeqsToMPI *merge; 4399 PetscContainer container; 4400 4401 PetscFunctionBegin; 4402 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4403 if (container) { 4404 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4405 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4406 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4407 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4408 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4409 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4410 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4411 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4412 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4417 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4418 ierr = PetscFree(merge);CHKERRQ(ierr); 4419 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4420 } 4421 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4422 PetscFunctionReturn(0); 4423 } 4424 4425 #include <../src/mat/utils/freespace.h> 4426 #include <petscbt.h> 4427 4428 #undef __FUNCT__ 4429 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4430 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4431 { 4432 PetscErrorCode ierr; 4433 MPI_Comm comm=((PetscObject)mpimat)->comm; 4434 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4435 PetscMPIInt size,rank,taga,*len_s; 4436 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4437 PetscInt proc,m; 4438 PetscInt **buf_ri,**buf_rj; 4439 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4440 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4441 MPI_Request *s_waits,*r_waits; 4442 MPI_Status *status; 4443 MatScalar *aa=a->a; 4444 MatScalar **abuf_r,*ba_i; 4445 Mat_Merge_SeqsToMPI *merge; 4446 PetscContainer container; 4447 4448 PetscFunctionBegin; 4449 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4450 4451 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4452 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4453 4454 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4455 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4456 4457 bi = merge->bi; 4458 bj = merge->bj; 4459 buf_ri = merge->buf_ri; 4460 buf_rj = merge->buf_rj; 4461 4462 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4463 owners = merge->rowmap->range; 4464 len_s = merge->len_s; 4465 4466 /* send and recv matrix values */ 4467 /*-----------------------------*/ 4468 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4469 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4470 4471 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4472 for (proc=0,k=0; proc<size; proc++){ 4473 if (!len_s[proc]) continue; 4474 i = owners[proc]; 4475 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4476 k++; 4477 } 4478 4479 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4480 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4481 ierr = PetscFree(status);CHKERRQ(ierr); 4482 4483 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4484 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4485 4486 /* insert mat values of mpimat */ 4487 /*----------------------------*/ 4488 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4489 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4490 4491 for (k=0; k<merge->nrecv; k++){ 4492 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4493 nrows = *(buf_ri_k[k]); 4494 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4495 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4496 } 4497 4498 /* set values of ba */ 4499 m = merge->rowmap->n; 4500 for (i=0; i<m; i++) { 4501 arow = owners[rank] + i; 4502 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4503 bnzi = bi[i+1] - bi[i]; 4504 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4505 4506 /* add local non-zero vals of this proc's seqmat into ba */ 4507 anzi = ai[arow+1] - ai[arow]; 4508 aj = a->j + ai[arow]; 4509 aa = a->a + ai[arow]; 4510 nextaj = 0; 4511 for (j=0; nextaj<anzi; j++){ 4512 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4513 ba_i[j] += aa[nextaj++]; 4514 } 4515 } 4516 4517 /* add received vals into ba */ 4518 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4519 /* i-th row */ 4520 if (i == *nextrow[k]) { 4521 anzi = *(nextai[k]+1) - *nextai[k]; 4522 aj = buf_rj[k] + *(nextai[k]); 4523 aa = abuf_r[k] + *(nextai[k]); 4524 nextaj = 0; 4525 for (j=0; nextaj<anzi; j++){ 4526 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4527 ba_i[j] += aa[nextaj++]; 4528 } 4529 } 4530 nextrow[k]++; nextai[k]++; 4531 } 4532 } 4533 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4534 } 4535 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4536 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4537 4538 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4539 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4540 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4541 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4542 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4543 PetscFunctionReturn(0); 4544 } 4545 4546 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4547 4548 #undef __FUNCT__ 4549 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4550 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4551 { 4552 PetscErrorCode ierr; 4553 Mat B_mpi; 4554 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4555 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4556 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4557 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4558 PetscInt len,proc,*dnz,*onz; 4559 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4560 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4561 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4562 MPI_Status *status; 4563 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4564 PetscBT lnkbt; 4565 Mat_Merge_SeqsToMPI *merge; 4566 PetscContainer container; 4567 4568 PetscFunctionBegin; 4569 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4570 4571 /* make sure it is a PETSc comm */ 4572 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4574 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4575 4576 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4577 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4578 4579 /* determine row ownership */ 4580 /*---------------------------------------------------------*/ 4581 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4582 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4583 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4584 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4585 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4586 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4587 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4588 4589 m = merge->rowmap->n; 4590 M = merge->rowmap->N; 4591 owners = merge->rowmap->range; 4592 4593 /* determine the number of messages to send, their lengths */ 4594 /*---------------------------------------------------------*/ 4595 len_s = merge->len_s; 4596 4597 len = 0; /* length of buf_si[] */ 4598 merge->nsend = 0; 4599 for (proc=0; proc<size; proc++){ 4600 len_si[proc] = 0; 4601 if (proc == rank){ 4602 len_s[proc] = 0; 4603 } else { 4604 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4605 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4606 } 4607 if (len_s[proc]) { 4608 merge->nsend++; 4609 nrows = 0; 4610 for (i=owners[proc]; i<owners[proc+1]; i++){ 4611 if (ai[i+1] > ai[i]) nrows++; 4612 } 4613 len_si[proc] = 2*(nrows+1); 4614 len += len_si[proc]; 4615 } 4616 } 4617 4618 /* determine the number and length of messages to receive for ij-structure */ 4619 /*-------------------------------------------------------------------------*/ 4620 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4621 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4622 4623 /* post the Irecv of j-structure */ 4624 /*-------------------------------*/ 4625 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4626 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4627 4628 /* post the Isend of j-structure */ 4629 /*--------------------------------*/ 4630 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4631 4632 for (proc=0, k=0; proc<size; proc++){ 4633 if (!len_s[proc]) continue; 4634 i = owners[proc]; 4635 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4636 k++; 4637 } 4638 4639 /* receives and sends of j-structure are complete */ 4640 /*------------------------------------------------*/ 4641 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4642 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4643 4644 /* send and recv i-structure */ 4645 /*---------------------------*/ 4646 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4647 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4648 4649 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4650 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4651 for (proc=0,k=0; proc<size; proc++){ 4652 if (!len_s[proc]) continue; 4653 /* form outgoing message for i-structure: 4654 buf_si[0]: nrows to be sent 4655 [1:nrows]: row index (global) 4656 [nrows+1:2*nrows+1]: i-structure index 4657 */ 4658 /*-------------------------------------------*/ 4659 nrows = len_si[proc]/2 - 1; 4660 buf_si_i = buf_si + nrows+1; 4661 buf_si[0] = nrows; 4662 buf_si_i[0] = 0; 4663 nrows = 0; 4664 for (i=owners[proc]; i<owners[proc+1]; i++){ 4665 anzi = ai[i+1] - ai[i]; 4666 if (anzi) { 4667 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4668 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4669 nrows++; 4670 } 4671 } 4672 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4673 k++; 4674 buf_si += len_si[proc]; 4675 } 4676 4677 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4678 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4679 4680 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4681 for (i=0; i<merge->nrecv; i++){ 4682 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4683 } 4684 4685 ierr = PetscFree(len_si);CHKERRQ(ierr); 4686 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4687 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4688 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4689 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4690 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4691 ierr = PetscFree(status);CHKERRQ(ierr); 4692 4693 /* compute a local seq matrix in each processor */ 4694 /*----------------------------------------------*/ 4695 /* allocate bi array and free space for accumulating nonzero column info */ 4696 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4697 bi[0] = 0; 4698 4699 /* create and initialize a linked list */ 4700 nlnk = N+1; 4701 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4702 4703 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4704 len = 0; 4705 len = ai[owners[rank+1]] - ai[owners[rank]]; 4706 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4707 current_space = free_space; 4708 4709 /* determine symbolic info for each local row */ 4710 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4711 4712 for (k=0; k<merge->nrecv; k++){ 4713 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4714 nrows = *buf_ri_k[k]; 4715 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4716 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4717 } 4718 4719 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4720 len = 0; 4721 for (i=0;i<m;i++) { 4722 bnzi = 0; 4723 /* add local non-zero cols of this proc's seqmat into lnk */ 4724 arow = owners[rank] + i; 4725 anzi = ai[arow+1] - ai[arow]; 4726 aj = a->j + ai[arow]; 4727 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4728 bnzi += nlnk; 4729 /* add received col data into lnk */ 4730 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4731 if (i == *nextrow[k]) { /* i-th row */ 4732 anzi = *(nextai[k]+1) - *nextai[k]; 4733 aj = buf_rj[k] + *nextai[k]; 4734 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4735 bnzi += nlnk; 4736 nextrow[k]++; nextai[k]++; 4737 } 4738 } 4739 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4740 4741 /* if free space is not available, make more free space */ 4742 if (current_space->local_remaining<bnzi) { 4743 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4744 nspacedouble++; 4745 } 4746 /* copy data into free space, then initialize lnk */ 4747 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4748 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4749 4750 current_space->array += bnzi; 4751 current_space->local_used += bnzi; 4752 current_space->local_remaining -= bnzi; 4753 4754 bi[i+1] = bi[i] + bnzi; 4755 } 4756 4757 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4758 4759 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4760 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4761 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4762 4763 /* create symbolic parallel matrix B_mpi */ 4764 /*---------------------------------------*/ 4765 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4766 if (n==PETSC_DECIDE) { 4767 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4768 } else { 4769 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4770 } 4771 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4772 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4773 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4774 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4775 4776 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4777 B_mpi->assembled = PETSC_FALSE; 4778 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4779 merge->bi = bi; 4780 merge->bj = bj; 4781 merge->buf_ri = buf_ri; 4782 merge->buf_rj = buf_rj; 4783 merge->coi = PETSC_NULL; 4784 merge->coj = PETSC_NULL; 4785 merge->owners_co = PETSC_NULL; 4786 4787 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4788 4789 /* attach the supporting struct to B_mpi for reuse */ 4790 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4791 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4792 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4793 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4794 *mpimat = B_mpi; 4795 4796 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4797 PetscFunctionReturn(0); 4798 } 4799 4800 #undef __FUNCT__ 4801 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4802 /*@C 4803 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4804 matrices from each processor 4805 4806 Collective on MPI_Comm 4807 4808 Input Parameters: 4809 + comm - the communicators the parallel matrix will live on 4810 . seqmat - the input sequential matrices 4811 . m - number of local rows (or PETSC_DECIDE) 4812 . n - number of local columns (or PETSC_DECIDE) 4813 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4814 4815 Output Parameter: 4816 . mpimat - the parallel matrix generated 4817 4818 Level: advanced 4819 4820 Notes: 4821 The dimensions of the sequential matrix in each processor MUST be the same. 4822 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4823 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4824 @*/ 4825 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4826 { 4827 PetscErrorCode ierr; 4828 PetscMPIInt size; 4829 4830 PetscFunctionBegin; 4831 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4832 if (size == 1){ 4833 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4834 if (scall == MAT_INITIAL_MATRIX){ 4835 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4836 } else { 4837 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4838 } 4839 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4840 PetscFunctionReturn(0); 4841 } 4842 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4843 if (scall == MAT_INITIAL_MATRIX){ 4844 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4845 } 4846 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4847 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4848 PetscFunctionReturn(0); 4849 } 4850 4851 #undef __FUNCT__ 4852 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4853 /*@ 4854 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4855 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4856 with MatGetSize() 4857 4858 Not Collective 4859 4860 Input Parameters: 4861 + A - the matrix 4862 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4863 4864 Output Parameter: 4865 . A_loc - the local sequential matrix generated 4866 4867 Level: developer 4868 4869 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4870 4871 @*/ 4872 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4873 { 4874 PetscErrorCode ierr; 4875 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4876 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4877 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4878 MatScalar *aa=a->a,*ba=b->a,*cam; 4879 PetscScalar *ca; 4880 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4881 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4882 PetscBool match; 4883 4884 PetscFunctionBegin; 4885 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4886 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4887 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4888 if (scall == MAT_INITIAL_MATRIX){ 4889 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4890 ci[0] = 0; 4891 for (i=0; i<am; i++){ 4892 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4893 } 4894 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4895 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4896 k = 0; 4897 for (i=0; i<am; i++) { 4898 ncols_o = bi[i+1] - bi[i]; 4899 ncols_d = ai[i+1] - ai[i]; 4900 /* off-diagonal portion of A */ 4901 for (jo=0; jo<ncols_o; jo++) { 4902 col = cmap[*bj]; 4903 if (col >= cstart) break; 4904 cj[k] = col; bj++; 4905 ca[k++] = *ba++; 4906 } 4907 /* diagonal portion of A */ 4908 for (j=0; j<ncols_d; j++) { 4909 cj[k] = cstart + *aj++; 4910 ca[k++] = *aa++; 4911 } 4912 /* off-diagonal portion of A */ 4913 for (j=jo; j<ncols_o; j++) { 4914 cj[k] = cmap[*bj++]; 4915 ca[k++] = *ba++; 4916 } 4917 } 4918 /* put together the new matrix */ 4919 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4920 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4921 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4922 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4923 mat->free_a = PETSC_TRUE; 4924 mat->free_ij = PETSC_TRUE; 4925 mat->nonew = 0; 4926 } else if (scall == MAT_REUSE_MATRIX){ 4927 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4928 ci = mat->i; cj = mat->j; cam = mat->a; 4929 for (i=0; i<am; i++) { 4930 /* off-diagonal portion of A */ 4931 ncols_o = bi[i+1] - bi[i]; 4932 for (jo=0; jo<ncols_o; jo++) { 4933 col = cmap[*bj]; 4934 if (col >= cstart) break; 4935 *cam++ = *ba++; bj++; 4936 } 4937 /* diagonal portion of A */ 4938 ncols_d = ai[i+1] - ai[i]; 4939 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4940 /* off-diagonal portion of A */ 4941 for (j=jo; j<ncols_o; j++) { 4942 *cam++ = *ba++; bj++; 4943 } 4944 } 4945 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4946 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4947 PetscFunctionReturn(0); 4948 } 4949 4950 #undef __FUNCT__ 4951 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4952 /*@C 4953 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4954 4955 Not Collective 4956 4957 Input Parameters: 4958 + A - the matrix 4959 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4960 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4961 4962 Output Parameter: 4963 . A_loc - the local sequential matrix generated 4964 4965 Level: developer 4966 4967 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4968 4969 @*/ 4970 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4971 { 4972 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4973 PetscErrorCode ierr; 4974 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4975 IS isrowa,iscola; 4976 Mat *aloc; 4977 PetscBool match; 4978 4979 PetscFunctionBegin; 4980 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4981 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4982 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4983 if (!row){ 4984 start = A->rmap->rstart; end = A->rmap->rend; 4985 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4986 } else { 4987 isrowa = *row; 4988 } 4989 if (!col){ 4990 start = A->cmap->rstart; 4991 cmap = a->garray; 4992 nzA = a->A->cmap->n; 4993 nzB = a->B->cmap->n; 4994 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4995 ncols = 0; 4996 for (i=0; i<nzB; i++) { 4997 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4998 else break; 4999 } 5000 imark = i; 5001 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5002 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5003 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5004 } else { 5005 iscola = *col; 5006 } 5007 if (scall != MAT_INITIAL_MATRIX){ 5008 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5009 aloc[0] = *A_loc; 5010 } 5011 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5012 *A_loc = aloc[0]; 5013 ierr = PetscFree(aloc);CHKERRQ(ierr); 5014 if (!row){ 5015 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5016 } 5017 if (!col){ 5018 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5019 } 5020 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5021 PetscFunctionReturn(0); 5022 } 5023 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatGetBrowsOfAcols" 5026 /*@C 5027 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5028 5029 Collective on Mat 5030 5031 Input Parameters: 5032 + A,B - the matrices in mpiaij format 5033 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5034 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5035 5036 Output Parameter: 5037 + rowb, colb - index sets of rows and columns of B to extract 5038 - B_seq - the sequential matrix generated 5039 5040 Level: developer 5041 5042 @*/ 5043 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5044 { 5045 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5046 PetscErrorCode ierr; 5047 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5048 IS isrowb,iscolb; 5049 Mat *bseq=PETSC_NULL; 5050 5051 PetscFunctionBegin; 5052 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5053 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5054 } 5055 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5056 5057 if (scall == MAT_INITIAL_MATRIX){ 5058 start = A->cmap->rstart; 5059 cmap = a->garray; 5060 nzA = a->A->cmap->n; 5061 nzB = a->B->cmap->n; 5062 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5063 ncols = 0; 5064 for (i=0; i<nzB; i++) { /* row < local row index */ 5065 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5066 else break; 5067 } 5068 imark = i; 5069 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5070 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5071 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5072 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5073 } else { 5074 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5075 isrowb = *rowb; iscolb = *colb; 5076 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5077 bseq[0] = *B_seq; 5078 } 5079 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5080 *B_seq = bseq[0]; 5081 ierr = PetscFree(bseq);CHKERRQ(ierr); 5082 if (!rowb){ 5083 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5084 } else { 5085 *rowb = isrowb; 5086 } 5087 if (!colb){ 5088 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5089 } else { 5090 *colb = iscolb; 5091 } 5092 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 5096 #undef __FUNCT__ 5097 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5098 /* 5099 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5100 of the OFF-DIAGONAL portion of local A 5101 5102 Collective on Mat 5103 5104 Input Parameters: 5105 + A,B - the matrices in mpiaij format 5106 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5107 5108 Output Parameter: 5109 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5110 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5111 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5112 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5113 5114 Level: developer 5115 5116 */ 5117 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5118 { 5119 VecScatter_MPI_General *gen_to,*gen_from; 5120 PetscErrorCode ierr; 5121 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5122 Mat_SeqAIJ *b_oth; 5123 VecScatter ctx=a->Mvctx; 5124 MPI_Comm comm=((PetscObject)ctx)->comm; 5125 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5126 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5127 PetscScalar *rvalues,*svalues; 5128 MatScalar *b_otha,*bufa,*bufA; 5129 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5130 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5131 MPI_Status *sstatus,rstatus; 5132 PetscMPIInt jj; 5133 PetscInt *cols,sbs,rbs; 5134 PetscScalar *vals; 5135 5136 PetscFunctionBegin; 5137 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5138 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5139 } 5140 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5141 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5142 5143 gen_to = (VecScatter_MPI_General*)ctx->todata; 5144 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5145 rvalues = gen_from->values; /* holds the length of receiving row */ 5146 svalues = gen_to->values; /* holds the length of sending row */ 5147 nrecvs = gen_from->n; 5148 nsends = gen_to->n; 5149 5150 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5151 srow = gen_to->indices; /* local row index to be sent */ 5152 sstarts = gen_to->starts; 5153 sprocs = gen_to->procs; 5154 sstatus = gen_to->sstatus; 5155 sbs = gen_to->bs; 5156 rstarts = gen_from->starts; 5157 rprocs = gen_from->procs; 5158 rbs = gen_from->bs; 5159 5160 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5161 if (scall == MAT_INITIAL_MATRIX){ 5162 /* i-array */ 5163 /*---------*/ 5164 /* post receives */ 5165 for (i=0; i<nrecvs; i++){ 5166 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5167 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5168 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5169 } 5170 5171 /* pack the outgoing message */ 5172 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5173 sstartsj[0] = 0; rstartsj[0] = 0; 5174 len = 0; /* total length of j or a array to be sent */ 5175 k = 0; 5176 for (i=0; i<nsends; i++){ 5177 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5178 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5179 for (j=0; j<nrows; j++) { 5180 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5181 for (l=0; l<sbs; l++){ 5182 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5183 rowlen[j*sbs+l] = ncols; 5184 len += ncols; 5185 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5186 } 5187 k++; 5188 } 5189 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5190 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5191 } 5192 /* recvs and sends of i-array are completed */ 5193 i = nrecvs; 5194 while (i--) { 5195 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5196 } 5197 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5198 5199 /* allocate buffers for sending j and a arrays */ 5200 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5201 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5202 5203 /* create i-array of B_oth */ 5204 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5205 b_othi[0] = 0; 5206 len = 0; /* total length of j or a array to be received */ 5207 k = 0; 5208 for (i=0; i<nrecvs; i++){ 5209 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5210 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5211 for (j=0; j<nrows; j++) { 5212 b_othi[k+1] = b_othi[k] + rowlen[j]; 5213 len += rowlen[j]; k++; 5214 } 5215 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5216 } 5217 5218 /* allocate space for j and a arrrays of B_oth */ 5219 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5220 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5221 5222 /* j-array */ 5223 /*---------*/ 5224 /* post receives of j-array */ 5225 for (i=0; i<nrecvs; i++){ 5226 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5227 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5228 } 5229 5230 /* pack the outgoing message j-array */ 5231 k = 0; 5232 for (i=0; i<nsends; i++){ 5233 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5234 bufJ = bufj+sstartsj[i]; 5235 for (j=0; j<nrows; j++) { 5236 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5237 for (ll=0; ll<sbs; ll++){ 5238 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5239 for (l=0; l<ncols; l++){ 5240 *bufJ++ = cols[l]; 5241 } 5242 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5243 } 5244 } 5245 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5246 } 5247 5248 /* recvs and sends of j-array are completed */ 5249 i = nrecvs; 5250 while (i--) { 5251 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5252 } 5253 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5254 } else if (scall == MAT_REUSE_MATRIX){ 5255 sstartsj = *startsj_s; 5256 rstartsj = *startsj_r; 5257 bufa = *bufa_ptr; 5258 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5259 b_otha = b_oth->a; 5260 } else { 5261 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5262 } 5263 5264 /* a-array */ 5265 /*---------*/ 5266 /* post receives of a-array */ 5267 for (i=0; i<nrecvs; i++){ 5268 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5269 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5270 } 5271 5272 /* pack the outgoing message a-array */ 5273 k = 0; 5274 for (i=0; i<nsends; i++){ 5275 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5276 bufA = bufa+sstartsj[i]; 5277 for (j=0; j<nrows; j++) { 5278 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5279 for (ll=0; ll<sbs; ll++){ 5280 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5281 for (l=0; l<ncols; l++){ 5282 *bufA++ = vals[l]; 5283 } 5284 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5285 } 5286 } 5287 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5288 } 5289 /* recvs and sends of a-array are completed */ 5290 i = nrecvs; 5291 while (i--) { 5292 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5293 } 5294 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5295 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5296 5297 if (scall == MAT_INITIAL_MATRIX){ 5298 /* put together the new matrix */ 5299 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5300 5301 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5302 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5303 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5304 b_oth->free_a = PETSC_TRUE; 5305 b_oth->free_ij = PETSC_TRUE; 5306 b_oth->nonew = 0; 5307 5308 ierr = PetscFree(bufj);CHKERRQ(ierr); 5309 if (!startsj_s || !bufa_ptr){ 5310 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5311 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5312 } else { 5313 *startsj_s = sstartsj; 5314 *startsj_r = rstartsj; 5315 *bufa_ptr = bufa; 5316 } 5317 } 5318 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5319 PetscFunctionReturn(0); 5320 } 5321 5322 #undef __FUNCT__ 5323 #define __FUNCT__ "MatGetCommunicationStructs" 5324 /*@C 5325 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5326 5327 Not Collective 5328 5329 Input Parameters: 5330 . A - The matrix in mpiaij format 5331 5332 Output Parameter: 5333 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5334 . colmap - A map from global column index to local index into lvec 5335 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5336 5337 Level: developer 5338 5339 @*/ 5340 #if defined (PETSC_USE_CTABLE) 5341 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5342 #else 5343 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5344 #endif 5345 { 5346 Mat_MPIAIJ *a; 5347 5348 PetscFunctionBegin; 5349 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5350 PetscValidPointer(lvec, 2); 5351 PetscValidPointer(colmap, 3); 5352 PetscValidPointer(multScatter, 4); 5353 a = (Mat_MPIAIJ *) A->data; 5354 if (lvec) *lvec = a->lvec; 5355 if (colmap) *colmap = a->colmap; 5356 if (multScatter) *multScatter = a->Mvctx; 5357 PetscFunctionReturn(0); 5358 } 5359 5360 EXTERN_C_BEGIN 5361 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5362 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5363 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5364 EXTERN_C_END 5365 5366 #undef __FUNCT__ 5367 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5368 /* 5369 Computes (B'*A')' since computing B*A directly is untenable 5370 5371 n p p 5372 ( ) ( ) ( ) 5373 m ( A ) * n ( B ) = m ( C ) 5374 ( ) ( ) ( ) 5375 5376 */ 5377 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5378 { 5379 PetscErrorCode ierr; 5380 Mat At,Bt,Ct; 5381 5382 PetscFunctionBegin; 5383 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5384 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5385 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5386 ierr = MatDestroy(&At);CHKERRQ(ierr); 5387 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5388 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5389 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5390 PetscFunctionReturn(0); 5391 } 5392 5393 #undef __FUNCT__ 5394 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5395 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5396 { 5397 PetscErrorCode ierr; 5398 PetscInt m=A->rmap->n,n=B->cmap->n; 5399 Mat Cmat; 5400 5401 PetscFunctionBegin; 5402 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5403 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5404 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5405 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5406 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5407 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5408 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5409 *C = Cmat; 5410 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5411 PetscFunctionReturn(0); 5412 } 5413 5414 /* ----------------------------------------------------------------*/ 5415 #undef __FUNCT__ 5416 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5417 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5418 { 5419 PetscErrorCode ierr; 5420 5421 PetscFunctionBegin; 5422 if (scall == MAT_INITIAL_MATRIX){ 5423 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5424 } 5425 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5426 PetscFunctionReturn(0); 5427 } 5428 5429 EXTERN_C_BEGIN 5430 #if defined(PETSC_HAVE_MUMPS) 5431 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5432 #endif 5433 #if defined(PETSC_HAVE_PASTIX) 5434 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5435 #endif 5436 #if defined(PETSC_HAVE_SUPERLU_DIST) 5437 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5438 #endif 5439 #if defined(PETSC_HAVE_SPOOLES) 5440 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5441 #endif 5442 EXTERN_C_END 5443 5444 /*MC 5445 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5446 5447 Options Database Keys: 5448 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5449 5450 Level: beginner 5451 5452 .seealso: MatCreateAIJ() 5453 M*/ 5454 5455 EXTERN_C_BEGIN 5456 #undef __FUNCT__ 5457 #define __FUNCT__ "MatCreate_MPIAIJ" 5458 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5459 { 5460 Mat_MPIAIJ *b; 5461 PetscErrorCode ierr; 5462 PetscMPIInt size; 5463 5464 PetscFunctionBegin; 5465 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5466 5467 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5468 B->data = (void*)b; 5469 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5470 B->assembled = PETSC_FALSE; 5471 5472 B->insertmode = NOT_SET_VALUES; 5473 b->size = size; 5474 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5475 5476 /* build cache for off array entries formed */ 5477 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5478 b->donotstash = PETSC_FALSE; 5479 b->colmap = 0; 5480 b->garray = 0; 5481 b->roworiented = PETSC_TRUE; 5482 5483 /* stuff used for matrix vector multiply */ 5484 b->lvec = PETSC_NULL; 5485 b->Mvctx = PETSC_NULL; 5486 5487 /* stuff for MatGetRow() */ 5488 b->rowindices = 0; 5489 b->rowvalues = 0; 5490 b->getrowactive = PETSC_FALSE; 5491 5492 #if defined(PETSC_HAVE_SPOOLES) 5493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5494 "MatGetFactor_mpiaij_spooles", 5495 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5496 #endif 5497 #if defined(PETSC_HAVE_MUMPS) 5498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5499 "MatGetFactor_aij_mumps", 5500 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5501 #endif 5502 #if defined(PETSC_HAVE_PASTIX) 5503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5504 "MatGetFactor_mpiaij_pastix", 5505 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5506 #endif 5507 #if defined(PETSC_HAVE_SUPERLU_DIST) 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5509 "MatGetFactor_mpiaij_superlu_dist", 5510 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5511 #endif 5512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5513 "MatStoreValues_MPIAIJ", 5514 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5516 "MatRetrieveValues_MPIAIJ", 5517 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5519 "MatGetDiagonalBlock_MPIAIJ", 5520 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5522 "MatIsTranspose_MPIAIJ", 5523 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5525 "MatMPIAIJSetPreallocation_MPIAIJ", 5526 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5528 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5529 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5531 "MatDiagonalScaleLocal_MPIAIJ", 5532 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5534 "MatConvert_MPIAIJ_MPIAIJPERM", 5535 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5537 "MatConvert_MPIAIJ_MPIAIJCRL", 5538 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5540 "MatConvert_MPIAIJ_MPISBAIJ", 5541 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5543 "MatMatMult_MPIDense_MPIAIJ", 5544 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5546 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5547 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5549 "MatMatMultNumeric_MPIDense_MPIAIJ", 5550 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5552 PetscFunctionReturn(0); 5553 } 5554 EXTERN_C_END 5555 5556 #undef __FUNCT__ 5557 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5558 /*@ 5559 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5560 and "off-diagonal" part of the matrix in CSR format. 5561 5562 Collective on MPI_Comm 5563 5564 Input Parameters: 5565 + comm - MPI communicator 5566 . m - number of local rows (Cannot be PETSC_DECIDE) 5567 . n - This value should be the same as the local size used in creating the 5568 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5569 calculated if N is given) For square matrices n is almost always m. 5570 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5571 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5572 . i - row indices for "diagonal" portion of matrix 5573 . j - column indices 5574 . a - matrix values 5575 . oi - row indices for "off-diagonal" portion of matrix 5576 . oj - column indices 5577 - oa - matrix values 5578 5579 Output Parameter: 5580 . mat - the matrix 5581 5582 Level: advanced 5583 5584 Notes: 5585 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5586 must free the arrays once the matrix has been destroyed and not before. 5587 5588 The i and j indices are 0 based 5589 5590 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5591 5592 This sets local rows and cannot be used to set off-processor values. 5593 5594 You cannot later use MatSetValues() to change values in this matrix. 5595 5596 .keywords: matrix, aij, compressed row, sparse, parallel 5597 5598 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5599 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5600 @*/ 5601 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5602 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5603 { 5604 PetscErrorCode ierr; 5605 Mat_MPIAIJ *maij; 5606 5607 PetscFunctionBegin; 5608 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5609 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5610 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5611 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5612 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5613 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5614 maij = (Mat_MPIAIJ*) (*mat)->data; 5615 maij->donotstash = PETSC_TRUE; 5616 (*mat)->preallocated = PETSC_TRUE; 5617 5618 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5619 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5620 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5621 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5622 5623 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5624 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5625 5626 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5627 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5629 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5630 5631 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 PetscFunctionReturn(0); 5634 } 5635 5636 /* 5637 Special version for direct calls from Fortran 5638 */ 5639 #include <petsc-private/fortranimpl.h> 5640 5641 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5642 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5643 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5644 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5645 #endif 5646 5647 /* Change these macros so can be used in void function */ 5648 #undef CHKERRQ 5649 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5650 #undef SETERRQ2 5651 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5652 #undef SETERRQ3 5653 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5654 #undef SETERRQ 5655 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5656 5657 EXTERN_C_BEGIN 5658 #undef __FUNCT__ 5659 #define __FUNCT__ "matsetvaluesmpiaij_" 5660 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5661 { 5662 Mat mat = *mmat; 5663 PetscInt m = *mm, n = *mn; 5664 InsertMode addv = *maddv; 5665 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5666 PetscScalar value; 5667 PetscErrorCode ierr; 5668 5669 MatCheckPreallocated(mat,1); 5670 if (mat->insertmode == NOT_SET_VALUES) { 5671 mat->insertmode = addv; 5672 } 5673 #if defined(PETSC_USE_DEBUG) 5674 else if (mat->insertmode != addv) { 5675 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5676 } 5677 #endif 5678 { 5679 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5680 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5681 PetscBool roworiented = aij->roworiented; 5682 5683 /* Some Variables required in the macro */ 5684 Mat A = aij->A; 5685 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5686 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5687 MatScalar *aa = a->a; 5688 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5689 Mat B = aij->B; 5690 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5691 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5692 MatScalar *ba = b->a; 5693 5694 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5695 PetscInt nonew = a->nonew; 5696 MatScalar *ap1,*ap2; 5697 5698 PetscFunctionBegin; 5699 for (i=0; i<m; i++) { 5700 if (im[i] < 0) continue; 5701 #if defined(PETSC_USE_DEBUG) 5702 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5703 #endif 5704 if (im[i] >= rstart && im[i] < rend) { 5705 row = im[i] - rstart; 5706 lastcol1 = -1; 5707 rp1 = aj + ai[row]; 5708 ap1 = aa + ai[row]; 5709 rmax1 = aimax[row]; 5710 nrow1 = ailen[row]; 5711 low1 = 0; 5712 high1 = nrow1; 5713 lastcol2 = -1; 5714 rp2 = bj + bi[row]; 5715 ap2 = ba + bi[row]; 5716 rmax2 = bimax[row]; 5717 nrow2 = bilen[row]; 5718 low2 = 0; 5719 high2 = nrow2; 5720 5721 for (j=0; j<n; j++) { 5722 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5723 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5724 if (in[j] >= cstart && in[j] < cend){ 5725 col = in[j] - cstart; 5726 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5727 } else if (in[j] < 0) continue; 5728 #if defined(PETSC_USE_DEBUG) 5729 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5730 #endif 5731 else { 5732 if (mat->was_assembled) { 5733 if (!aij->colmap) { 5734 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5735 } 5736 #if defined (PETSC_USE_CTABLE) 5737 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5738 col--; 5739 #else 5740 col = aij->colmap[in[j]] - 1; 5741 #endif 5742 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5744 col = in[j]; 5745 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5746 B = aij->B; 5747 b = (Mat_SeqAIJ*)B->data; 5748 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5749 rp2 = bj + bi[row]; 5750 ap2 = ba + bi[row]; 5751 rmax2 = bimax[row]; 5752 nrow2 = bilen[row]; 5753 low2 = 0; 5754 high2 = nrow2; 5755 bm = aij->B->rmap->n; 5756 ba = b->a; 5757 } 5758 } else col = in[j]; 5759 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5760 } 5761 } 5762 } else { 5763 if (!aij->donotstash) { 5764 if (roworiented) { 5765 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5766 } else { 5767 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5768 } 5769 } 5770 } 5771 }} 5772 PetscFunctionReturnVoid(); 5773 } 5774 EXTERN_C_END 5775 5776