1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 334 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 342 #if defined (PETSC_USE_CTABLE) 343 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 344 for (i=0; i<n; i++){ 345 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 346 } 347 #else 348 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 349 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 352 #endif 353 PetscFunctionReturn(0); 354 } 355 356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 357 { \ 358 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 359 lastcol1 = col;\ 360 while (high1-low1 > 5) { \ 361 t = (low1+high1)/2; \ 362 if (rp1[t] > col) high1 = t; \ 363 else low1 = t; \ 364 } \ 365 for (_i=low1; _i<high1; _i++) { \ 366 if (rp1[_i] > col) break; \ 367 if (rp1[_i] == col) { \ 368 if (addv == ADD_VALUES) ap1[_i] += value; \ 369 else ap1[_i] = value; \ 370 goto a_noinsert; \ 371 } \ 372 } \ 373 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 374 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 375 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 376 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 377 N = nrow1++ - 1; a->nz++; high1++; \ 378 /* shift up all the later entries in this row */ \ 379 for (ii=N; ii>=_i; ii--) { \ 380 rp1[ii+1] = rp1[ii]; \ 381 ap1[ii+1] = ap1[ii]; \ 382 } \ 383 rp1[_i] = col; \ 384 ap1[_i] = value; \ 385 a_noinsert: ; \ 386 ailen[row] = nrow1; \ 387 } 388 389 390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 391 { \ 392 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 393 lastcol2 = col;\ 394 while (high2-low2 > 5) { \ 395 t = (low2+high2)/2; \ 396 if (rp2[t] > col) high2 = t; \ 397 else low2 = t; \ 398 } \ 399 for (_i=low2; _i<high2; _i++) { \ 400 if (rp2[_i] > col) break; \ 401 if (rp2[_i] == col) { \ 402 if (addv == ADD_VALUES) ap2[_i] += value; \ 403 else ap2[_i] = value; \ 404 goto b_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 410 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 411 N = nrow2++ - 1; b->nz++; high2++; \ 412 /* shift up all the later entries in this row */ \ 413 for (ii=N; ii>=_i; ii--) { \ 414 rp2[ii+1] = rp2[ii]; \ 415 ap2[ii+1] = ap2[ii]; \ 416 } \ 417 rp2[_i] = col; \ 418 ap2[_i] = value; \ 419 b_noinsert: ; \ 420 bilen[row] = nrow2; \ 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 426 { 427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 429 PetscErrorCode ierr; 430 PetscInt l,*garray = mat->garray,diag; 431 432 PetscFunctionBegin; 433 /* code only works for square matrices A */ 434 435 /* find size of row to the left of the diagonal part */ 436 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 437 row = row - diag; 438 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 439 if (garray[b->j[b->i[row]+l]] > diag) break; 440 } 441 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 442 443 /* diagonal part */ 444 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 445 446 /* right of diagonal part */ 447 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValues_MPIAIJ" 453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscScalar value; 457 PetscErrorCode ierr; 458 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 459 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 460 PetscBool roworiented = aij->roworiented; 461 462 /* Some Variables required in the macro */ 463 Mat A = aij->A; 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 465 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 466 MatScalar *aa = a->a; 467 PetscBool ignorezeroentries = a->ignorezeroentries; 468 Mat B = aij->B; 469 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 470 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 471 MatScalar *ba = b->a; 472 473 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 474 PetscInt nonew; 475 MatScalar *ap1,*ap2; 476 477 PetscFunctionBegin; 478 if (v) PetscValidScalarPointer(v,6); 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 lastcol1 = -1; 487 rp1 = aj + ai[row]; 488 ap1 = aa + ai[row]; 489 rmax1 = aimax[row]; 490 nrow1 = ailen[row]; 491 low1 = 0; 492 high1 = nrow1; 493 lastcol2 = -1; 494 rp2 = bj + bi[row]; 495 ap2 = ba + bi[row]; 496 rmax2 = bimax[row]; 497 nrow2 = bilen[row]; 498 low2 = 0; 499 high2 = nrow2; 500 501 for (j=0; j<n; j++) { 502 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 503 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 504 if (in[j] >= cstart && in[j] < cend){ 505 col = in[j] - cstart; 506 nonew = a->nonew; 507 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 508 } else if (in[j] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 511 #endif 512 else { 513 if (mat->was_assembled) { 514 if (!aij->colmap) { 515 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 516 } 517 #if defined (PETSC_USE_CTABLE) 518 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 519 col--; 520 #else 521 col = aij->colmap[in[j]] - 1; 522 #endif 523 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 524 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 col = in[j]; 526 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 527 B = aij->B; 528 b = (Mat_SeqAIJ*)B->data; 529 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 530 rp2 = bj + bi[row]; 531 ap2 = ba + bi[row]; 532 rmax2 = bimax[row]; 533 nrow2 = bilen[row]; 534 low2 = 0; 535 high2 = nrow2; 536 bm = aij->B->rmap->n; 537 ba = b->a; 538 } 539 } else col = in[j]; 540 nonew = b->nonew; 541 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 542 } 543 } 544 } else { 545 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 546 if (!aij->donotstash) { 547 mat->assembled = PETSC_FALSE; 548 if (roworiented) { 549 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatGetValues_MPIAIJ" 561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 562 { 563 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 564 PetscErrorCode ierr; 565 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 566 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 567 568 PetscFunctionBegin; 569 for (i=0; i<m; i++) { 570 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 571 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 572 if (idxm[i] >= rstart && idxm[i] < rend) { 573 row = idxm[i] - rstart; 574 for (j=0; j<n; j++) { 575 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 576 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 577 if (idxn[j] >= cstart && idxn[j] < cend){ 578 col = idxn[j] - cstart; 579 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 580 } else { 581 if (!aij->colmap) { 582 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 583 } 584 #if defined (PETSC_USE_CTABLE) 585 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 586 col --; 587 #else 588 col = aij->colmap[idxn[j]] - 1; 589 #endif 590 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 591 else { 592 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 593 } 594 } 595 } 596 } else { 597 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 598 } 599 } 600 PetscFunctionReturn(0); 601 } 602 603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 608 { 609 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 610 PetscErrorCode ierr; 611 PetscInt nstash,reallocs; 612 InsertMode addv; 613 614 PetscFunctionBegin; 615 if (aij->donotstash || mat->nooffprocentries) { 616 PetscFunctionReturn(0); 617 } 618 619 /* make sure all processors are either in INSERTMODE or ADDMODE */ 620 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 621 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 622 mat->insertmode = addv; /* in case this processor had no cache */ 623 624 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 625 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 626 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 636 PetscErrorCode ierr; 637 PetscMPIInt n; 638 PetscInt i,j,rstart,ncols,flg; 639 PetscInt *row,*col; 640 PetscBool other_disassembled; 641 PetscScalar *val; 642 InsertMode addv = mat->insertmode; 643 644 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 645 PetscFunctionBegin; 646 if (!aij->donotstash && !mat->nooffprocentries) { 647 while (1) { 648 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 649 if (!flg) break; 650 651 for (i=0; i<n;) { 652 /* Now identify the consecutive vals belonging to the same row */ 653 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 654 if (j < n) ncols = j-i; 655 else ncols = n-i; 656 /* Now assemble all these values with a single function call */ 657 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 658 i = j; 659 } 660 } 661 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 662 } 663 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 664 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 665 666 /* determine if any processor has disassembled, if so we must 667 also disassemble ourselfs, in order that we may reassemble. */ 668 /* 669 if nonzero structure of submatrix B cannot change then we know that 670 no processor disassembled thus we can skip this stuff 671 */ 672 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 673 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 674 if (mat->was_assembled && !other_disassembled) { 675 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 } 678 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 679 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 680 } 681 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 682 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 684 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 685 686 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 687 aij->rowvalues = 0; 688 689 /* used by MatAXPY() */ 690 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 691 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 692 693 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 694 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 701 { 702 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 703 PetscErrorCode ierr; 704 705 PetscFunctionBegin; 706 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 707 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatZeroRows_MPIAIJ" 713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 714 { 715 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 718 PetscInt i,*owners = A->rmap->range; 719 PetscInt *nprocs,j,idx,nsends,row; 720 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 721 PetscInt *rvalues,count,base,slen,*source; 722 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 723 MPI_Comm comm = ((PetscObject)A)->comm; 724 MPI_Request *send_waits,*recv_waits; 725 MPI_Status recv_status,*send_status; 726 const PetscScalar *xx; 727 PetscScalar *bb; 728 #if defined(PETSC_DEBUG) 729 PetscBool found = PETSC_FALSE; 730 #endif 731 732 PetscFunctionBegin; 733 /* first count number of contributors to each processor */ 734 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 735 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 736 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 737 j = 0; 738 for (i=0; i<N; i++) { 739 if (lastidx > (idx = rows[i])) j = 0; 740 lastidx = idx; 741 for (; j<size; j++) { 742 if (idx >= owners[j] && idx < owners[j+1]) { 743 nprocs[2*j]++; 744 nprocs[2*j+1] = 1; 745 owner[i] = j; 746 #if defined(PETSC_DEBUG) 747 found = PETSC_TRUE; 748 #endif 749 break; 750 } 751 } 752 #if defined(PETSC_DEBUG) 753 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 754 found = PETSC_FALSE; 755 #endif 756 } 757 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 758 759 if (A->nooffproczerorows) { 760 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 761 nrecvs = nsends; 762 nmax = N; 763 } else { 764 /* inform other processors of number of messages and max length*/ 765 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 766 } 767 768 /* post receives: */ 769 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 770 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 771 for (i=0; i<nrecvs; i++) { 772 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 773 } 774 775 /* do sends: 776 1) starts[i] gives the starting index in svalues for stuff going to 777 the ith processor 778 */ 779 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 780 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 781 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 782 starts[0] = 0; 783 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 784 for (i=0; i<N; i++) { 785 svalues[starts[owner[i]]++] = rows[i]; 786 } 787 788 starts[0] = 0; 789 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 790 count = 0; 791 for (i=0; i<size; i++) { 792 if (nprocs[2*i+1]) { 793 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 794 } 795 } 796 ierr = PetscFree(starts);CHKERRQ(ierr); 797 798 base = owners[rank]; 799 800 /* wait on receives */ 801 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 802 count = nrecvs; slen = 0; 803 while (count) { 804 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 805 /* unpack receives into our local space */ 806 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 807 source[imdex] = recv_status.MPI_SOURCE; 808 lens[imdex] = n; 809 slen += n; 810 count--; 811 } 812 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 813 814 /* move the data into the send scatter */ 815 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 816 count = 0; 817 for (i=0; i<nrecvs; i++) { 818 values = rvalues + i*nmax; 819 for (j=0; j<lens[i]; j++) { 820 lrows[count++] = values[j] - base; 821 } 822 } 823 ierr = PetscFree(rvalues);CHKERRQ(ierr); 824 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 825 ierr = PetscFree(owner);CHKERRQ(ierr); 826 ierr = PetscFree(nprocs);CHKERRQ(ierr); 827 828 /* fix right hand side if needed */ 829 if (x && b) { 830 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 831 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 832 for (i=0; i<slen; i++) { 833 bb[lrows[i]] = diag*xx[lrows[i]]; 834 } 835 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 836 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 837 } 838 /* 839 Zero the required rows. If the "diagonal block" of the matrix 840 is square and the user wishes to set the diagonal we use separate 841 code so that MatSetValues() is not called for each diagonal allocating 842 new memory, thus calling lots of mallocs and slowing things down. 843 844 */ 845 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 846 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 848 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 849 } else if (diag != 0.0) { 850 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 851 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 852 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 854 } 855 for (i = 0; i < slen; i++) { 856 row = lrows[i] + rstart; 857 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 858 } 859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 } else { 862 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(lrows);CHKERRQ(ierr); 865 866 /* wait on sends */ 867 if (nsends) { 868 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 869 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 870 ierr = PetscFree(send_status);CHKERRQ(ierr); 871 } 872 ierr = PetscFree(send_waits);CHKERRQ(ierr); 873 ierr = PetscFree(svalues);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 880 { 881 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 882 PetscErrorCode ierr; 883 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 884 PetscInt i,*owners = A->rmap->range; 885 PetscInt *nprocs,j,idx,nsends; 886 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 887 PetscInt *rvalues,count,base,slen,*source; 888 PetscInt *lens,*lrows,*values,m; 889 MPI_Comm comm = ((PetscObject)A)->comm; 890 MPI_Request *send_waits,*recv_waits; 891 MPI_Status recv_status,*send_status; 892 const PetscScalar *xx; 893 PetscScalar *bb,*mask; 894 Vec xmask,lmask; 895 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 896 const PetscInt *aj, *ii,*ridx; 897 PetscScalar *aa; 898 #if defined(PETSC_DEBUG) 899 PetscBool found = PETSC_FALSE; 900 #endif 901 902 PetscFunctionBegin; 903 /* first count number of contributors to each processor */ 904 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 905 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 906 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 907 j = 0; 908 for (i=0; i<N; i++) { 909 if (lastidx > (idx = rows[i])) j = 0; 910 lastidx = idx; 911 for (; j<size; j++) { 912 if (idx >= owners[j] && idx < owners[j+1]) { 913 nprocs[2*j]++; 914 nprocs[2*j+1] = 1; 915 owner[i] = j; 916 #if defined(PETSC_DEBUG) 917 found = PETSC_TRUE; 918 #endif 919 break; 920 } 921 } 922 #if defined(PETSC_DEBUG) 923 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 924 found = PETSC_FALSE; 925 #endif 926 } 927 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 928 929 /* inform other processors of number of messages and max length*/ 930 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 931 932 /* post receives: */ 933 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 934 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 935 for (i=0; i<nrecvs; i++) { 936 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 937 } 938 939 /* do sends: 940 1) starts[i] gives the starting index in svalues for stuff going to 941 the ith processor 942 */ 943 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 944 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 945 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 946 starts[0] = 0; 947 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 948 for (i=0; i<N; i++) { 949 svalues[starts[owner[i]]++] = rows[i]; 950 } 951 952 starts[0] = 0; 953 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 954 count = 0; 955 for (i=0; i<size; i++) { 956 if (nprocs[2*i+1]) { 957 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 958 } 959 } 960 ierr = PetscFree(starts);CHKERRQ(ierr); 961 962 base = owners[rank]; 963 964 /* wait on receives */ 965 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 966 count = nrecvs; slen = 0; 967 while (count) { 968 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 969 /* unpack receives into our local space */ 970 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 971 source[imdex] = recv_status.MPI_SOURCE; 972 lens[imdex] = n; 973 slen += n; 974 count--; 975 } 976 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 977 978 /* move the data into the send scatter */ 979 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 980 count = 0; 981 for (i=0; i<nrecvs; i++) { 982 values = rvalues + i*nmax; 983 for (j=0; j<lens[i]; j++) { 984 lrows[count++] = values[j] - base; 985 } 986 } 987 ierr = PetscFree(rvalues);CHKERRQ(ierr); 988 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 989 ierr = PetscFree(owner);CHKERRQ(ierr); 990 ierr = PetscFree(nprocs);CHKERRQ(ierr); 991 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 992 993 /* zero diagonal part of matrix */ 994 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 995 996 /* handle off diagonal part of matrix */ 997 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 998 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 999 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1000 for (i=0; i<slen; i++) { 1001 bb[lrows[i]] = 1; 1002 } 1003 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1007 if (x) { 1008 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1012 } 1013 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1014 1015 /* remove zeroed rows of off diagonal matrix */ 1016 ii = aij->i; 1017 for (i=0; i<slen; i++) { 1018 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1019 } 1020 1021 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1022 if (aij->compressedrow.use){ 1023 m = aij->compressedrow.nrows; 1024 ii = aij->compressedrow.i; 1025 ridx = aij->compressedrow.rindex; 1026 for (i=0; i<m; i++){ 1027 n = ii[i+1] - ii[i]; 1028 aj = aij->j + ii[i]; 1029 aa = aij->a + ii[i]; 1030 1031 for (j=0; j<n; j++) { 1032 if (PetscAbsScalar(mask[*aj])) { 1033 if (b) bb[*ridx] -= *aa*xx[*aj]; 1034 *aa = 0.0; 1035 } 1036 aa++; 1037 aj++; 1038 } 1039 ridx++; 1040 } 1041 } else { /* do not use compressed row format */ 1042 m = l->B->rmap->n; 1043 for (i=0; i<m; i++) { 1044 n = ii[i+1] - ii[i]; 1045 aj = aij->j + ii[i]; 1046 aa = aij->a + ii[i]; 1047 for (j=0; j<n; j++) { 1048 if (PetscAbsScalar(mask[*aj])) { 1049 if (b) bb[i] -= *aa*xx[*aj]; 1050 *aa = 0.0; 1051 } 1052 aa++; 1053 aj++; 1054 } 1055 } 1056 } 1057 if (x) { 1058 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1059 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1060 } 1061 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1062 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1063 ierr = PetscFree(lrows);CHKERRQ(ierr); 1064 1065 /* wait on sends */ 1066 if (nsends) { 1067 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1068 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1069 ierr = PetscFree(send_status);CHKERRQ(ierr); 1070 } 1071 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1072 ierr = PetscFree(svalues);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMult_MPIAIJ" 1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt nt; 1084 1085 PetscFunctionBegin; 1086 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1087 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1088 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscBool merged; 1129 1130 PetscFunctionBegin; 1131 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1132 /* do nondiagonal part */ 1133 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1134 if (!merged) { 1135 /* send it on its way */ 1136 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 /* do local part */ 1138 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1139 /* receive remote parts: note this assumes the values are not actually */ 1140 /* added in yy until the next line, */ 1141 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 } else { 1143 /* do local part */ 1144 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1145 /* send it on its way */ 1146 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 /* values actually were received in the Begin() but we need to call this nop */ 1148 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1149 } 1150 PetscFunctionReturn(0); 1151 } 1152 1153 EXTERN_C_BEGIN 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1157 { 1158 MPI_Comm comm; 1159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1160 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1161 IS Me,Notme; 1162 PetscErrorCode ierr; 1163 PetscInt M,N,first,last,*notme,i; 1164 PetscMPIInt size; 1165 1166 PetscFunctionBegin; 1167 1168 /* Easy test: symmetric diagonal block */ 1169 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1170 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1171 if (!*f) PetscFunctionReturn(0); 1172 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1174 if (size == 1) PetscFunctionReturn(0); 1175 1176 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1177 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1178 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1179 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1180 for (i=0; i<first; i++) notme[i] = i; 1181 for (i=last; i<M; i++) notme[i-last+first] = i; 1182 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1183 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1184 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1185 Aoff = Aoffs[0]; 1186 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1187 Boff = Boffs[0]; 1188 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1189 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1191 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1193 ierr = PetscFree(notme);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 /* do nondiagonal part */ 1207 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1208 /* send it on its way */ 1209 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 /* do local part */ 1211 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1212 /* receive remote parts */ 1213 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* 1218 This only works correctly for square matrices where the subblock A->A is the 1219 diagonal block 1220 */ 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1224 { 1225 PetscErrorCode ierr; 1226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1227 1228 PetscFunctionBegin; 1229 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1230 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1231 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatScale_MPIAIJ" 1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1238 { 1239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1244 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatDestroy_MPIAIJ" 1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 #if defined(PETSC_USE_LOG) 1257 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1258 #endif 1259 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1263 #if defined (PETSC_USE_CTABLE) 1264 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1267 #endif 1268 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1270 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1271 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1272 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1273 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1274 1275 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1293 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1294 PetscErrorCode ierr; 1295 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1296 int fd; 1297 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1298 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1299 PetscScalar *column_values; 1300 PetscInt message_count,flowcontrolcount; 1301 1302 PetscFunctionBegin; 1303 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1304 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1305 nz = A->nz + B->nz; 1306 if (!rank) { 1307 header[0] = MAT_FILE_CLASSID; 1308 header[1] = mat->rmap->N; 1309 header[2] = mat->cmap->N; 1310 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1311 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1312 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 /* get largest number of rows any processor has */ 1314 rlen = mat->rmap->n; 1315 range = mat->rmap->range; 1316 for (i=1; i<size; i++) { 1317 rlen = PetscMax(rlen,range[i+1] - range[i]); 1318 } 1319 } else { 1320 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1321 rlen = mat->rmap->n; 1322 } 1323 1324 /* load up the local row counts */ 1325 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1326 for (i=0; i<mat->rmap->n; i++) { 1327 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1328 } 1329 1330 /* store the row lengths to the file */ 1331 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1332 if (!rank) { 1333 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 for (i=1; i<size; i++) { 1335 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1336 rlen = range[i+1] - range[i]; 1337 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1338 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1339 } 1340 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1343 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1345 } 1346 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1347 1348 /* load up the local column indices */ 1349 nzmax = nz; /* )th processor needs space a largest processor needs */ 1350 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1351 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1352 cnt = 0; 1353 for (i=0; i<mat->rmap->n; i++) { 1354 for (j=B->i[i]; j<B->i[i+1]; j++) { 1355 if ( (col = garray[B->j[j]]) > cstart) break; 1356 column_indices[cnt++] = col; 1357 } 1358 for (k=A->i[i]; k<A->i[i+1]; k++) { 1359 column_indices[cnt++] = A->j[k] + cstart; 1360 } 1361 for (; j<B->i[i+1]; j++) { 1362 column_indices[cnt++] = garray[B->j[j]]; 1363 } 1364 } 1365 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1366 1367 /* store the column indices to the file */ 1368 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1369 if (!rank) { 1370 MPI_Status status; 1371 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1372 for (i=1; i<size; i++) { 1373 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1374 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1375 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1376 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1378 } 1379 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1380 } else { 1381 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1382 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1385 } 1386 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1387 1388 /* load up the local column values */ 1389 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1390 cnt = 0; 1391 for (i=0; i<mat->rmap->n; i++) { 1392 for (j=B->i[i]; j<B->i[i+1]; j++) { 1393 if ( garray[B->j[j]] > cstart) break; 1394 column_values[cnt++] = B->a[j]; 1395 } 1396 for (k=A->i[i]; k<A->i[i+1]; k++) { 1397 column_values[cnt++] = A->a[k]; 1398 } 1399 for (; j<B->i[i+1]; j++) { 1400 column_values[cnt++] = B->a[j]; 1401 } 1402 } 1403 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1404 1405 /* store the column values to the file */ 1406 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1407 if (!rank) { 1408 MPI_Status status; 1409 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1410 for (i=1; i<size; i++) { 1411 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1412 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1413 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1414 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1415 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1416 } 1417 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1418 } else { 1419 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1420 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1421 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1423 } 1424 ierr = PetscFree(column_values);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1431 { 1432 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1433 PetscErrorCode ierr; 1434 PetscMPIInt rank = aij->rank,size = aij->size; 1435 PetscBool isdraw,iascii,isbinary; 1436 PetscViewer sviewer; 1437 PetscViewerFormat format; 1438 1439 PetscFunctionBegin; 1440 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1441 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1442 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1443 if (iascii) { 1444 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1445 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1446 MatInfo info; 1447 PetscBool inodes; 1448 1449 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1450 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1451 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1452 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1453 if (!inodes) { 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1455 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1456 } else { 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1458 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1459 } 1460 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1462 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1464 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1467 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1470 PetscInt inodecount,inodelimit,*inodes; 1471 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1472 if (inodes) { 1473 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1474 } else { 1475 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1476 } 1477 PetscFunctionReturn(0); 1478 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1479 PetscFunctionReturn(0); 1480 } 1481 } else if (isbinary) { 1482 if (size == 1) { 1483 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1484 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1485 } else { 1486 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } else if (isdraw) { 1490 PetscDraw draw; 1491 PetscBool isnull; 1492 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1493 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1494 } 1495 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 /* assemble the entire matrix onto first processor. */ 1501 Mat A; 1502 Mat_SeqAIJ *Aloc; 1503 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1504 MatScalar *a; 1505 1506 if (mat->rmap->N > 1024) { 1507 PetscBool flg = PETSC_FALSE; 1508 1509 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1510 if (!flg) { 1511 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1512 } 1513 } 1514 1515 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1516 if (!rank) { 1517 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1518 } else { 1519 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1520 } 1521 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1522 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1523 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1524 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1525 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1526 1527 /* copy over the A part */ 1528 Aloc = (Mat_SeqAIJ*)aij->A->data; 1529 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1530 row = mat->rmap->rstart; 1531 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1532 for (i=0; i<m; i++) { 1533 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1534 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1535 } 1536 aj = Aloc->j; 1537 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1538 1539 /* copy over the B part */ 1540 Aloc = (Mat_SeqAIJ*)aij->B->data; 1541 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1544 ct = cols; 1545 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1546 for (i=0; i<m; i++) { 1547 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1548 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1549 } 1550 ierr = PetscFree(ct);CHKERRQ(ierr); 1551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 /* 1554 Everyone has to call to draw the matrix since the graphics waits are 1555 synchronized across all processors that share the PetscDraw object 1556 */ 1557 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1558 if (!rank) { 1559 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1560 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1561 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1562 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1563 } 1564 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1565 ierr = MatDestroy(&A);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatView_MPIAIJ" 1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1573 { 1574 PetscErrorCode ierr; 1575 PetscBool iascii,isdraw,issocket,isbinary; 1576 1577 PetscFunctionBegin; 1578 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1579 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1580 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1581 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1582 if (iascii || isdraw || isbinary || issocket) { 1583 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1584 } else { 1585 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSOR_MPIAIJ" 1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1593 { 1594 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1595 PetscErrorCode ierr; 1596 Vec bb1 = 0; 1597 PetscBool hasop; 1598 1599 PetscFunctionBegin; 1600 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1601 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1602 } 1603 1604 if (flag == SOR_APPLY_UPPER) { 1605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1610 if (flag & SOR_ZERO_INITIAL_GUESS) { 1611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1612 its--; 1613 } 1614 1615 while (its--) { 1616 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 1619 /* update rhs: bb1 = bb - B*x */ 1620 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1621 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1622 1623 /* local sweep */ 1624 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1625 } 1626 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1627 if (flag & SOR_ZERO_INITIAL_GUESS) { 1628 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1629 its--; 1630 } 1631 while (its--) { 1632 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 1635 /* update rhs: bb1 = bb - B*x */ 1636 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1637 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1638 1639 /* local sweep */ 1640 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1641 } 1642 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1643 if (flag & SOR_ZERO_INITIAL_GUESS) { 1644 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1645 its--; 1646 } 1647 while (its--) { 1648 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 1651 /* update rhs: bb1 = bb - B*x */ 1652 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1653 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1654 1655 /* local sweep */ 1656 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1657 } 1658 } else if (flag & SOR_EISENSTAT) { 1659 Vec xx1; 1660 1661 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1662 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1663 1664 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 if (!mat->diag) { 1667 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1668 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1669 } 1670 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1671 if (hasop) { 1672 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1673 } else { 1674 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1675 } 1676 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1677 1678 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1679 1680 /* local sweep */ 1681 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1682 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1683 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1684 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1685 1686 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatPermute_MPIAIJ" 1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1693 { 1694 MPI_Comm comm; 1695 PetscInt first,local_rowsize,local_colsize; 1696 const PetscInt *rows; 1697 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1702 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1703 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1704 /* collect the global row permutation and invert it */ 1705 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1706 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1707 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1708 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1709 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1710 /* get the local target indices */ 1711 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1712 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1713 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1714 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1715 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1716 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1717 /* the column permutation is so much easier; 1718 make a local version of 'colp' and invert it */ 1719 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1720 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1721 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1722 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1723 /* now we just get the submatrix */ 1724 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1725 /* clean up */ 1726 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1727 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1734 { 1735 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1736 Mat A = mat->A,B = mat->B; 1737 PetscErrorCode ierr; 1738 PetscReal isend[5],irecv[5]; 1739 1740 PetscFunctionBegin; 1741 info->block_size = 1.0; 1742 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1743 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1744 isend[3] = info->memory; isend[4] = info->mallocs; 1745 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1746 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1747 isend[3] += info->memory; isend[4] += info->mallocs; 1748 if (flag == MAT_LOCAL) { 1749 info->nz_used = isend[0]; 1750 info->nz_allocated = isend[1]; 1751 info->nz_unneeded = isend[2]; 1752 info->memory = isend[3]; 1753 info->mallocs = isend[4]; 1754 } else if (flag == MAT_GLOBAL_MAX) { 1755 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1756 info->nz_used = irecv[0]; 1757 info->nz_allocated = irecv[1]; 1758 info->nz_unneeded = irecv[2]; 1759 info->memory = irecv[3]; 1760 info->mallocs = irecv[4]; 1761 } else if (flag == MAT_GLOBAL_SUM) { 1762 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1763 info->nz_used = irecv[0]; 1764 info->nz_allocated = irecv[1]; 1765 info->nz_unneeded = irecv[2]; 1766 info->memory = irecv[3]; 1767 info->mallocs = irecv[4]; 1768 } 1769 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1770 info->fill_ratio_needed = 0; 1771 info->factor_mallocs = 0; 1772 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatSetOption_MPIAIJ" 1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1779 { 1780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 switch (op) { 1785 case MAT_NEW_NONZERO_LOCATIONS: 1786 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1787 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1788 case MAT_KEEP_NONZERO_PATTERN: 1789 case MAT_NEW_NONZERO_LOCATION_ERR: 1790 case MAT_USE_INODES: 1791 case MAT_IGNORE_ZERO_ENTRIES: 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_ROW_ORIENTED: 1796 a->roworiented = flg; 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1799 break; 1800 case MAT_NEW_DIAGONALS: 1801 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1802 break; 1803 case MAT_IGNORE_OFF_PROC_ENTRIES: 1804 a->donotstash = flg; 1805 break; 1806 case MAT_SPD: 1807 A->spd_set = PETSC_TRUE; 1808 A->spd = flg; 1809 if (flg) { 1810 A->symmetric = PETSC_TRUE; 1811 A->structurally_symmetric = PETSC_TRUE; 1812 A->symmetric_set = PETSC_TRUE; 1813 A->structurally_symmetric_set = PETSC_TRUE; 1814 } 1815 break; 1816 case MAT_SYMMETRIC: 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_STRUCTURALLY_SYMMETRIC: 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 break; 1822 case MAT_HERMITIAN: 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_SYMMETRY_ETERNAL: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 default: 1829 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1830 } 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatGetRow_MPIAIJ" 1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1837 { 1838 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1839 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1840 PetscErrorCode ierr; 1841 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1842 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1843 PetscInt *cmap,*idx_p; 1844 1845 PetscFunctionBegin; 1846 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1847 mat->getrowactive = PETSC_TRUE; 1848 1849 if (!mat->rowvalues && (idx || v)) { 1850 /* 1851 allocate enough space to hold information from the longest row. 1852 */ 1853 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1854 PetscInt max = 1,tmp; 1855 for (i=0; i<matin->rmap->n; i++) { 1856 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1857 if (max < tmp) { max = tmp; } 1858 } 1859 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1860 } 1861 1862 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1863 lrow = row - rstart; 1864 1865 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1866 if (!v) {pvA = 0; pvB = 0;} 1867 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1868 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1869 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1870 nztot = nzA + nzB; 1871 1872 cmap = mat->garray; 1873 if (v || idx) { 1874 if (nztot) { 1875 /* Sort by increasing column numbers, assuming A and B already sorted */ 1876 PetscInt imark = -1; 1877 if (v) { 1878 *v = v_p = mat->rowvalues; 1879 for (i=0; i<nzB; i++) { 1880 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1881 else break; 1882 } 1883 imark = i; 1884 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1885 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1886 } 1887 if (idx) { 1888 *idx = idx_p = mat->rowindices; 1889 if (imark > -1) { 1890 for (i=0; i<imark; i++) { 1891 idx_p[i] = cmap[cworkB[i]]; 1892 } 1893 } else { 1894 for (i=0; i<nzB; i++) { 1895 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1896 else break; 1897 } 1898 imark = i; 1899 } 1900 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1901 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1902 } 1903 } else { 1904 if (idx) *idx = 0; 1905 if (v) *v = 0; 1906 } 1907 } 1908 *nz = nztot; 1909 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1910 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1917 { 1918 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1919 1920 PetscFunctionBegin; 1921 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1922 aij->getrowactive = PETSC_FALSE; 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatNorm_MPIAIJ" 1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1929 { 1930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1931 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1932 PetscErrorCode ierr; 1933 PetscInt i,j,cstart = mat->cmap->rstart; 1934 PetscReal sum = 0.0; 1935 MatScalar *v; 1936 1937 PetscFunctionBegin; 1938 if (aij->size == 1) { 1939 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1940 } else { 1941 if (type == NORM_FROBENIUS) { 1942 v = amat->a; 1943 for (i=0; i<amat->nz; i++) { 1944 #if defined(PETSC_USE_COMPLEX) 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 #else 1947 sum += (*v)*(*v); v++; 1948 #endif 1949 } 1950 v = bmat->a; 1951 for (i=0; i<bmat->nz; i++) { 1952 #if defined(PETSC_USE_COMPLEX) 1953 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1954 #else 1955 sum += (*v)*(*v); v++; 1956 #endif 1957 } 1958 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1959 *norm = PetscSqrtReal(*norm); 1960 } else if (type == NORM_1) { /* max column norm */ 1961 PetscReal *tmp,*tmp2; 1962 PetscInt *jj,*garray = aij->garray; 1963 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1965 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1966 *norm = 0.0; 1967 v = amat->a; jj = amat->j; 1968 for (j=0; j<amat->nz; j++) { 1969 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1970 } 1971 v = bmat->a; jj = bmat->j; 1972 for (j=0; j<bmat->nz; j++) { 1973 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1974 } 1975 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1976 for (j=0; j<mat->cmap->N; j++) { 1977 if (tmp2[j] > *norm) *norm = tmp2[j]; 1978 } 1979 ierr = PetscFree(tmp);CHKERRQ(ierr); 1980 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1981 } else if (type == NORM_INFINITY) { /* max row norm */ 1982 PetscReal ntemp = 0.0; 1983 for (j=0; j<aij->A->rmap->n; j++) { 1984 v = amat->a + amat->i[j]; 1985 sum = 0.0; 1986 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 v = bmat->a + bmat->i[j]; 1990 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1991 sum += PetscAbsScalar(*v); v++; 1992 } 1993 if (sum > ntemp) ntemp = sum; 1994 } 1995 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1996 } else { 1997 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1998 } 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatTranspose_MPIAIJ" 2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2006 { 2007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2008 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2009 PetscErrorCode ierr; 2010 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2011 PetscInt cstart=A->cmap->rstart,ncol; 2012 Mat B; 2013 MatScalar *array; 2014 2015 PetscFunctionBegin; 2016 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2017 2018 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2019 ai = Aloc->i; aj = Aloc->j; 2020 bi = Bloc->i; bj = Bloc->j; 2021 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2022 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2023 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2024 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2025 for (i=0; i<ai[ma]; i++){ 2026 d_nnz[aj[i]] ++; 2027 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2028 } 2029 2030 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2032 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2033 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2034 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2035 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2036 } else { 2037 B = *matout; 2038 } 2039 2040 /* copy over the A part */ 2041 array = Aloc->a; 2042 row = A->rmap->rstart; 2043 for (i=0; i<ma; i++) { 2044 ncol = ai[i+1]-ai[i]; 2045 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2046 row++; array += ncol; aj += ncol; 2047 } 2048 aj = Aloc->j; 2049 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2050 2051 /* copy over the B part */ 2052 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2053 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2054 array = Bloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2057 cols_tmp = cols; 2058 for (i=0; i<mb; i++) { 2059 ncol = bi[i+1]-bi[i]; 2060 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2061 row++; array += ncol; cols_tmp += ncol; 2062 } 2063 ierr = PetscFree(cols);CHKERRQ(ierr); 2064 2065 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2068 *matout = B; 2069 } else { 2070 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2078 { 2079 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2080 Mat a = aij->A,b = aij->B; 2081 PetscErrorCode ierr; 2082 PetscInt s1,s2,s3; 2083 2084 PetscFunctionBegin; 2085 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2086 if (rr) { 2087 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2088 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2089 /* Overlap communication with computation. */ 2090 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2091 } 2092 if (ll) { 2093 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2094 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2095 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2096 } 2097 /* scale the diagonal block */ 2098 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2099 2100 if (rr) { 2101 /* Do a scatter end and then right scale the off-diagonal block */ 2102 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2104 } 2105 2106 PetscFunctionReturn(0); 2107 } 2108 2109 #undef __FUNCT__ 2110 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2111 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2112 { 2113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatEqual_MPIAIJ" 2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2124 { 2125 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2126 Mat a,b,c,d; 2127 PetscBool flg; 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 a = matA->A; b = matA->B; 2132 c = matB->A; d = matB->B; 2133 2134 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2135 if (flg) { 2136 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2137 } 2138 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatCopy_MPIAIJ" 2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2145 { 2146 PetscErrorCode ierr; 2147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2148 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2149 2150 PetscFunctionBegin; 2151 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2152 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2153 /* because of the column compression in the off-processor part of the matrix a->B, 2154 the number of columns in a->B and b->B may be different, hence we cannot call 2155 the MatCopy() directly on the two parts. If need be, we can provide a more 2156 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2157 then copying the submatrices */ 2158 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2159 } else { 2160 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2161 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatSetUp_MPIAIJ" 2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2169 { 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2179 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2180 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2181 { 2182 PetscInt i,m=Y->rmap->N; 2183 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2184 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2185 const PetscInt *xi = x->i,*yi = y->i; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for(i=0; i<m; i++) { 2190 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2191 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2192 nnz[i] = 0; 2193 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2194 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2195 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2196 nnz[i]++; 2197 } 2198 for (; k<nzy; k++) nnz[i]++; 2199 } 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatAXPY_MPIAIJ" 2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt i; 2209 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2210 PetscBLASInt bnz,one=1; 2211 Mat_SeqAIJ *x,*y; 2212 2213 PetscFunctionBegin; 2214 if (str == SAME_NONZERO_PATTERN) { 2215 PetscScalar alpha = a; 2216 x = (Mat_SeqAIJ *)xx->A->data; 2217 y = (Mat_SeqAIJ *)yy->A->data; 2218 bnz = PetscBLASIntCast(x->nz); 2219 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2220 x = (Mat_SeqAIJ *)xx->B->data; 2221 y = (Mat_SeqAIJ *)yy->B->data; 2222 bnz = PetscBLASIntCast(x->nz); 2223 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2224 } else if (str == SUBSET_NONZERO_PATTERN) { 2225 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2226 2227 x = (Mat_SeqAIJ *)xx->B->data; 2228 y = (Mat_SeqAIJ *)yy->B->data; 2229 if (y->xtoy && y->XtoY != xx->B) { 2230 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2231 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2232 } 2233 if (!y->xtoy) { /* get xtoy */ 2234 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2235 y->XtoY = xx->B; 2236 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2237 } 2238 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2239 } else { 2240 Mat B; 2241 PetscInt *nnz_d,*nnz_o; 2242 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2243 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2244 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2245 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2246 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2247 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2248 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2250 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2251 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2252 ierr = MatHeaderReplace(Y,B); 2253 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatConjugate_MPIAIJ" 2263 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2264 { 2265 #if defined(PETSC_USE_COMPLEX) 2266 PetscErrorCode ierr; 2267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2268 2269 PetscFunctionBegin; 2270 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2271 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2272 #else 2273 PetscFunctionBegin; 2274 #endif 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatRealPart_MPIAIJ" 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #ifdef PETSC_HAVE_PBGL 2305 2306 #include <boost/parallel/mpi/bsp_process_group.hpp> 2307 #include <boost/graph/distributed/ilu_default_graph.hpp> 2308 #include <boost/graph/distributed/ilu_0_block.hpp> 2309 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2310 #include <boost/graph/distributed/petsc/interface.hpp> 2311 #include <boost/multi_array.hpp> 2312 #include <boost/parallel/distributed_property_map->hpp> 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2316 /* 2317 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2318 */ 2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2320 { 2321 namespace petsc = boost::distributed::petsc; 2322 2323 namespace graph_dist = boost::graph::distributed; 2324 using boost::graph::distributed::ilu_default::process_group_type; 2325 using boost::graph::ilu_permuted; 2326 2327 PetscBool row_identity, col_identity; 2328 PetscContainer c; 2329 PetscInt m, n, M, N; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2334 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2335 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2336 if (!row_identity || !col_identity) { 2337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2338 } 2339 2340 process_group_type pg; 2341 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2342 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2343 lgraph_type& level_graph = *lgraph_p; 2344 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2345 2346 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2347 ilu_permuted(level_graph); 2348 2349 /* put together the new matrix */ 2350 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2351 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2352 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2353 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2354 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2355 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2356 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 2358 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2359 ierr = PetscContainerSetPointer(c, lgraph_p); 2360 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2361 ierr = PetscContainerDestroy(&c); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2368 { 2369 PetscFunctionBegin; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatSolve_MPIAIJ" 2375 /* 2376 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2377 */ 2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2379 { 2380 namespace graph_dist = boost::graph::distributed; 2381 2382 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2383 lgraph_type* lgraph_p; 2384 PetscContainer c; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2389 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2390 ierr = VecCopy(b, x);CHKERRQ(ierr); 2391 2392 PetscScalar* array_x; 2393 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2394 PetscInt sx; 2395 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2396 2397 PetscScalar* array_b; 2398 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2399 PetscInt sb; 2400 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2401 2402 lgraph_type& level_graph = *lgraph_p; 2403 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2404 2405 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2406 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2407 ref_x(array_x, boost::extents[num_vertices(graph)]); 2408 2409 typedef boost::iterator_property_map<array_ref_type::iterator, 2410 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2411 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2412 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2413 2414 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2415 2416 PetscFunctionReturn(0); 2417 } 2418 #endif 2419 2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2421 PetscInt nzlocal,nsends,nrecvs; 2422 PetscMPIInt *send_rank,*recv_rank; 2423 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2424 PetscScalar *sbuf_a,**rbuf_a; 2425 PetscErrorCode (*Destroy)(Mat); 2426 } Mat_Redundant; 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2431 { 2432 PetscErrorCode ierr; 2433 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2434 PetscInt i; 2435 2436 PetscFunctionBegin; 2437 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2438 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2439 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2440 for (i=0; i<redund->nrecvs; i++){ 2441 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2443 } 2444 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2445 ierr = PetscFree(redund);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatDestroy_MatRedundant" 2451 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2452 { 2453 PetscErrorCode ierr; 2454 PetscContainer container; 2455 Mat_Redundant *redund=PETSC_NULL; 2456 2457 PetscFunctionBegin; 2458 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2459 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2460 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2461 A->ops->destroy = redund->Destroy; 2462 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2463 if (A->ops->destroy) { 2464 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2472 { 2473 PetscMPIInt rank,size; 2474 MPI_Comm comm=((PetscObject)mat)->comm; 2475 PetscErrorCode ierr; 2476 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2477 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2478 PetscInt *rowrange=mat->rmap->range; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2480 Mat A=aij->A,B=aij->B,C=*matredundant; 2481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2482 PetscScalar *sbuf_a; 2483 PetscInt nzlocal=a->nz+b->nz; 2484 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2485 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2486 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2487 MatScalar *aworkA,*aworkB; 2488 PetscScalar *vals; 2489 PetscMPIInt tag1,tag2,tag3,imdex; 2490 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2491 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2492 MPI_Status recv_status,*send_status; 2493 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2494 PetscInt **rbuf_j=PETSC_NULL; 2495 PetscScalar **rbuf_a=PETSC_NULL; 2496 Mat_Redundant *redund=PETSC_NULL; 2497 PetscContainer container; 2498 2499 PetscFunctionBegin; 2500 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2501 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2502 2503 if (reuse == MAT_REUSE_MATRIX) { 2504 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2505 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2506 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2507 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2508 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2509 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2510 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2511 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2512 2513 nsends = redund->nsends; 2514 nrecvs = redund->nrecvs; 2515 send_rank = redund->send_rank; 2516 recv_rank = redund->recv_rank; 2517 sbuf_nz = redund->sbuf_nz; 2518 rbuf_nz = redund->rbuf_nz; 2519 sbuf_j = redund->sbuf_j; 2520 sbuf_a = redund->sbuf_a; 2521 rbuf_j = redund->rbuf_j; 2522 rbuf_a = redund->rbuf_a; 2523 } 2524 2525 if (reuse == MAT_INITIAL_MATRIX){ 2526 PetscMPIInt subrank,subsize; 2527 PetscInt nleftover,np_subcomm; 2528 /* get the destination processors' id send_rank, nsends and nrecvs */ 2529 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2530 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2531 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2532 np_subcomm = size/nsubcomm; 2533 nleftover = size - nsubcomm*np_subcomm; 2534 nsends = 0; nrecvs = 0; 2535 for (i=0; i<size; i++){ /* i=rank*/ 2536 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2537 send_rank[nsends] = i; nsends++; 2538 recv_rank[nrecvs++] = i; 2539 } 2540 } 2541 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2542 i = size-nleftover-1; 2543 j = 0; 2544 while (j < nsubcomm - nleftover){ 2545 send_rank[nsends++] = i; 2546 i--; j++; 2547 } 2548 } 2549 2550 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2551 for (i=0; i<nleftover; i++){ 2552 recv_rank[nrecvs++] = size-nleftover+i; 2553 } 2554 } 2555 2556 /* allocate sbuf_j, sbuf_a */ 2557 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2558 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2559 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2560 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2561 2562 /* copy mat's local entries into the buffers */ 2563 if (reuse == MAT_INITIAL_MATRIX){ 2564 rownz_max = 0; 2565 rptr = sbuf_j; 2566 cols = sbuf_j + rend-rstart + 1; 2567 vals = sbuf_a; 2568 rptr[0] = 0; 2569 for (i=0; i<rend-rstart; i++){ 2570 row = i + rstart; 2571 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2572 ncols = nzA + nzB; 2573 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2574 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2575 /* load the column indices for this row into cols */ 2576 lwrite = 0; 2577 for (l=0; l<nzB; l++) { 2578 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2579 vals[lwrite] = aworkB[l]; 2580 cols[lwrite++] = ctmp; 2581 } 2582 } 2583 for (l=0; l<nzA; l++){ 2584 vals[lwrite] = aworkA[l]; 2585 cols[lwrite++] = cstart + cworkA[l]; 2586 } 2587 for (l=0; l<nzB; l++) { 2588 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2589 vals[lwrite] = aworkB[l]; 2590 cols[lwrite++] = ctmp; 2591 } 2592 } 2593 vals += ncols; 2594 cols += ncols; 2595 rptr[i+1] = rptr[i] + ncols; 2596 if (rownz_max < ncols) rownz_max = ncols; 2597 } 2598 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2599 } else { /* only copy matrix values into sbuf_a */ 2600 rptr = sbuf_j; 2601 vals = sbuf_a; 2602 rptr[0] = 0; 2603 for (i=0; i<rend-rstart; i++){ 2604 row = i + rstart; 2605 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2606 ncols = nzA + nzB; 2607 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2608 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2609 lwrite = 0; 2610 for (l=0; l<nzB; l++) { 2611 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2612 } 2613 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2616 } 2617 vals += ncols; 2618 rptr[i+1] = rptr[i] + ncols; 2619 } 2620 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2621 2622 /* send nzlocal to others, and recv other's nzlocal */ 2623 /*--------------------------------------------------*/ 2624 if (reuse == MAT_INITIAL_MATRIX){ 2625 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2626 s_waits2 = s_waits3 + nsends; 2627 s_waits1 = s_waits2 + nsends; 2628 r_waits1 = s_waits1 + nsends; 2629 r_waits2 = r_waits1 + nrecvs; 2630 r_waits3 = r_waits2 + nrecvs; 2631 } else { 2632 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2633 r_waits3 = s_waits3 + nsends; 2634 } 2635 2636 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2637 if (reuse == MAT_INITIAL_MATRIX){ 2638 /* get new tags to keep the communication clean */ 2639 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2641 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2642 2643 /* post receives of other's nzlocal */ 2644 for (i=0; i<nrecvs; i++){ 2645 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2646 } 2647 /* send nzlocal to others */ 2648 for (i=0; i<nsends; i++){ 2649 sbuf_nz[i] = nzlocal; 2650 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2651 } 2652 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2653 count = nrecvs; 2654 while (count) { 2655 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2656 recv_rank[imdex] = recv_status.MPI_SOURCE; 2657 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2658 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2659 2660 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2661 rbuf_nz[imdex] += i + 2; 2662 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2663 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2664 count--; 2665 } 2666 /* wait on sends of nzlocal */ 2667 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2668 /* send mat->i,j to others, and recv from other's */ 2669 /*------------------------------------------------*/ 2670 for (i=0; i<nsends; i++){ 2671 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2672 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2673 } 2674 /* wait on receives of mat->i,j */ 2675 /*------------------------------*/ 2676 count = nrecvs; 2677 while (count) { 2678 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2679 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2680 count--; 2681 } 2682 /* wait on sends of mat->i,j */ 2683 /*---------------------------*/ 2684 if (nsends) { 2685 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2686 } 2687 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2688 2689 /* post receives, send and receive mat->a */ 2690 /*----------------------------------------*/ 2691 for (imdex=0; imdex<nrecvs; imdex++) { 2692 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2693 } 2694 for (i=0; i<nsends; i++){ 2695 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2696 } 2697 count = nrecvs; 2698 while (count) { 2699 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2700 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2701 count--; 2702 } 2703 if (nsends) { 2704 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2705 } 2706 2707 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2708 2709 /* create redundant matrix */ 2710 /*-------------------------*/ 2711 if (reuse == MAT_INITIAL_MATRIX){ 2712 /* compute rownz_max for preallocation */ 2713 for (imdex=0; imdex<nrecvs; imdex++){ 2714 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2715 rptr = rbuf_j[imdex]; 2716 for (i=0; i<j; i++){ 2717 ncols = rptr[i+1] - rptr[i]; 2718 if (rownz_max < ncols) rownz_max = ncols; 2719 } 2720 } 2721 2722 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2723 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2724 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2725 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2726 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2727 } else { 2728 C = *matredundant; 2729 } 2730 2731 /* insert local matrix entries */ 2732 rptr = sbuf_j; 2733 cols = sbuf_j + rend-rstart + 1; 2734 vals = sbuf_a; 2735 for (i=0; i<rend-rstart; i++){ 2736 row = i + rstart; 2737 ncols = rptr[i+1] - rptr[i]; 2738 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2739 vals += ncols; 2740 cols += ncols; 2741 } 2742 /* insert received matrix entries */ 2743 for (imdex=0; imdex<nrecvs; imdex++){ 2744 rstart = rowrange[recv_rank[imdex]]; 2745 rend = rowrange[recv_rank[imdex]+1]; 2746 rptr = rbuf_j[imdex]; 2747 cols = rbuf_j[imdex] + rend-rstart + 1; 2748 vals = rbuf_a[imdex]; 2749 for (i=0; i<rend-rstart; i++){ 2750 row = i + rstart; 2751 ncols = rptr[i+1] - rptr[i]; 2752 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2753 vals += ncols; 2754 cols += ncols; 2755 } 2756 } 2757 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2760 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2761 if (reuse == MAT_INITIAL_MATRIX) { 2762 PetscContainer container; 2763 *matredundant = C; 2764 /* create a supporting struct and attach it to C for reuse */ 2765 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2766 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2767 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2768 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2769 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2770 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2771 2772 redund->nzlocal = nzlocal; 2773 redund->nsends = nsends; 2774 redund->nrecvs = nrecvs; 2775 redund->send_rank = send_rank; 2776 redund->recv_rank = recv_rank; 2777 redund->sbuf_nz = sbuf_nz; 2778 redund->rbuf_nz = rbuf_nz; 2779 redund->sbuf_j = sbuf_j; 2780 redund->sbuf_a = sbuf_a; 2781 redund->rbuf_j = rbuf_j; 2782 redund->rbuf_a = rbuf_a; 2783 2784 redund->Destroy = C->ops->destroy; 2785 C->ops->destroy = MatDestroy_MatRedundant; 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2793 { 2794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2795 PetscErrorCode ierr; 2796 PetscInt i,*idxb = 0; 2797 PetscScalar *va,*vb; 2798 Vec vtmp; 2799 2800 PetscFunctionBegin; 2801 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2802 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2803 if (idx) { 2804 for (i=0; i<A->rmap->n; i++) { 2805 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2806 } 2807 } 2808 2809 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2810 if (idx) { 2811 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2812 } 2813 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2814 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2815 2816 for (i=0; i<A->rmap->n; i++){ 2817 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2818 va[i] = vb[i]; 2819 if (idx) idx[i] = a->garray[idxb[i]]; 2820 } 2821 } 2822 2823 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2824 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2825 ierr = PetscFree(idxb);CHKERRQ(ierr); 2826 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2833 { 2834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2835 PetscErrorCode ierr; 2836 PetscInt i,*idxb = 0; 2837 PetscScalar *va,*vb; 2838 Vec vtmp; 2839 2840 PetscFunctionBegin; 2841 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2842 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2843 if (idx) { 2844 for (i=0; i<A->cmap->n; i++) { 2845 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2846 } 2847 } 2848 2849 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2850 if (idx) { 2851 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2852 } 2853 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2854 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2855 2856 for (i=0; i<A->rmap->n; i++){ 2857 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2858 va[i] = vb[i]; 2859 if (idx) idx[i] = a->garray[idxb[i]]; 2860 } 2861 } 2862 2863 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2864 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2865 ierr = PetscFree(idxb);CHKERRQ(ierr); 2866 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2873 { 2874 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2875 PetscInt n = A->rmap->n; 2876 PetscInt cstart = A->cmap->rstart; 2877 PetscInt *cmap = mat->garray; 2878 PetscInt *diagIdx, *offdiagIdx; 2879 Vec diagV, offdiagV; 2880 PetscScalar *a, *diagA, *offdiagA; 2881 PetscInt r; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2886 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2887 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2888 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2889 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2890 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2891 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2892 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2893 for(r = 0; r < n; ++r) { 2894 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2895 a[r] = diagA[r]; 2896 idx[r] = cstart + diagIdx[r]; 2897 } else { 2898 a[r] = offdiagA[r]; 2899 idx[r] = cmap[offdiagIdx[r]]; 2900 } 2901 } 2902 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2903 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2905 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2906 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2907 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2908 PetscFunctionReturn(0); 2909 } 2910 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2914 { 2915 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2916 PetscInt n = A->rmap->n; 2917 PetscInt cstart = A->cmap->rstart; 2918 PetscInt *cmap = mat->garray; 2919 PetscInt *diagIdx, *offdiagIdx; 2920 Vec diagV, offdiagV; 2921 PetscScalar *a, *diagA, *offdiagA; 2922 PetscInt r; 2923 PetscErrorCode ierr; 2924 2925 PetscFunctionBegin; 2926 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2927 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2928 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2929 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2930 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2932 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2933 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2934 for(r = 0; r < n; ++r) { 2935 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2936 a[r] = diagA[r]; 2937 idx[r] = cstart + diagIdx[r]; 2938 } else { 2939 a[r] = offdiagA[r]; 2940 idx[r] = cmap[offdiagIdx[r]]; 2941 } 2942 } 2943 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2944 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2946 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2947 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2948 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2955 { 2956 PetscErrorCode ierr; 2957 Mat *dummy; 2958 2959 PetscFunctionBegin; 2960 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2961 *newmat = *dummy; 2962 ierr = PetscFree(dummy);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2970 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2971 { 2972 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 2981 /* -------------------------------------------------------------------*/ 2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2983 MatGetRow_MPIAIJ, 2984 MatRestoreRow_MPIAIJ, 2985 MatMult_MPIAIJ, 2986 /* 4*/ MatMultAdd_MPIAIJ, 2987 MatMultTranspose_MPIAIJ, 2988 MatMultTransposeAdd_MPIAIJ, 2989 #ifdef PETSC_HAVE_PBGL 2990 MatSolve_MPIAIJ, 2991 #else 2992 0, 2993 #endif 2994 0, 2995 0, 2996 /*10*/ 0, 2997 0, 2998 0, 2999 MatSOR_MPIAIJ, 3000 MatTranspose_MPIAIJ, 3001 /*15*/ MatGetInfo_MPIAIJ, 3002 MatEqual_MPIAIJ, 3003 MatGetDiagonal_MPIAIJ, 3004 MatDiagonalScale_MPIAIJ, 3005 MatNorm_MPIAIJ, 3006 /*20*/ MatAssemblyBegin_MPIAIJ, 3007 MatAssemblyEnd_MPIAIJ, 3008 MatSetOption_MPIAIJ, 3009 MatZeroEntries_MPIAIJ, 3010 /*24*/ MatZeroRows_MPIAIJ, 3011 0, 3012 #ifdef PETSC_HAVE_PBGL 3013 0, 3014 #else 3015 0, 3016 #endif 3017 0, 3018 0, 3019 /*29*/ MatSetUp_MPIAIJ, 3020 #ifdef PETSC_HAVE_PBGL 3021 0, 3022 #else 3023 0, 3024 #endif 3025 0, 3026 0, 3027 0, 3028 /*34*/ MatDuplicate_MPIAIJ, 3029 0, 3030 0, 3031 0, 3032 0, 3033 /*39*/ MatAXPY_MPIAIJ, 3034 MatGetSubMatrices_MPIAIJ, 3035 MatIncreaseOverlap_MPIAIJ, 3036 MatGetValues_MPIAIJ, 3037 MatCopy_MPIAIJ, 3038 /*44*/ MatGetRowMax_MPIAIJ, 3039 MatScale_MPIAIJ, 3040 0, 3041 0, 3042 MatZeroRowsColumns_MPIAIJ, 3043 /*49*/ 0, 3044 0, 3045 0, 3046 0, 3047 0, 3048 /*54*/ MatFDColoringCreate_MPIAIJ, 3049 0, 3050 MatSetUnfactored_MPIAIJ, 3051 MatPermute_MPIAIJ, 3052 0, 3053 /*59*/ MatGetSubMatrix_MPIAIJ, 3054 MatDestroy_MPIAIJ, 3055 MatView_MPIAIJ, 3056 0, 3057 0, 3058 /*64*/ 0, 3059 0, 3060 0, 3061 0, 3062 0, 3063 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3064 MatGetRowMinAbs_MPIAIJ, 3065 0, 3066 MatSetColoring_MPIAIJ, 3067 #if defined(PETSC_HAVE_ADIC) 3068 MatSetValuesAdic_MPIAIJ, 3069 #else 3070 0, 3071 #endif 3072 MatSetValuesAdifor_MPIAIJ, 3073 /*75*/ MatFDColoringApply_AIJ, 3074 0, 3075 0, 3076 0, 3077 0, 3078 /*80*/ 0, 3079 0, 3080 0, 3081 /*83*/ MatLoad_MPIAIJ, 3082 0, 3083 0, 3084 0, 3085 0, 3086 0, 3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3088 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3089 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3090 MatPtAP_Basic, 3091 MatPtAPSymbolic_MPIAIJ, 3092 /*94*/ MatPtAPNumeric_MPIAIJ, 3093 0, 3094 0, 3095 0, 3096 0, 3097 /*99*/ 0, 3098 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3099 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3100 MatConjugate_MPIAIJ, 3101 0, 3102 /*104*/MatSetValuesRow_MPIAIJ, 3103 MatRealPart_MPIAIJ, 3104 MatImaginaryPart_MPIAIJ, 3105 0, 3106 0, 3107 /*109*/0, 3108 MatGetRedundantMatrix_MPIAIJ, 3109 MatGetRowMin_MPIAIJ, 3110 0, 3111 0, 3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3113 0, 3114 0, 3115 0, 3116 0, 3117 /*119*/0, 3118 0, 3119 0, 3120 0, 3121 MatGetMultiProcBlock_MPIAIJ, 3122 /*124*/MatFindNonZeroRows_MPIAIJ, 3123 MatGetColumnNorms_MPIAIJ, 3124 MatInvertBlockDiagonal_MPIAIJ, 3125 0, 3126 MatGetSubMatricesParallel_MPIAIJ, 3127 /*129*/0, 3128 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3129 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3130 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3131 0, 3132 /*134*/0, 3133 0, 3134 0, 3135 0, 3136 0 3137 }; 3138 3139 /* ----------------------------------------------------------------------------------------*/ 3140 3141 EXTERN_C_BEGIN 3142 #undef __FUNCT__ 3143 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3144 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3145 { 3146 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3147 PetscErrorCode ierr; 3148 3149 PetscFunctionBegin; 3150 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3151 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 EXTERN_C_END 3155 3156 EXTERN_C_BEGIN 3157 #undef __FUNCT__ 3158 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3159 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3160 { 3161 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3162 PetscErrorCode ierr; 3163 3164 PetscFunctionBegin; 3165 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3166 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3167 PetscFunctionReturn(0); 3168 } 3169 EXTERN_C_END 3170 3171 EXTERN_C_BEGIN 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3174 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3175 { 3176 Mat_MPIAIJ *b; 3177 PetscErrorCode ierr; 3178 PetscInt i; 3179 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3180 3181 PetscFunctionBegin; 3182 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3183 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3184 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3185 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3186 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3187 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3188 3189 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3190 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3191 if (d_nnz) { 3192 for (i=0; i<B->rmap->n; i++) { 3193 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3194 } 3195 } 3196 if (o_nnz) { 3197 for (i=0; i<B->rmap->n; i++) { 3198 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3199 } 3200 } 3201 b = (Mat_MPIAIJ*)B->data; 3202 3203 if (!B->preallocated) { 3204 /* Explicitly create 2 MATSEQAIJ matrices. */ 3205 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3206 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3207 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3208 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3209 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3210 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3211 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3212 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3213 } 3214 3215 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3216 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3217 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3218 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3219 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3220 B->preallocated = PETSC_TRUE; 3221 PetscFunctionReturn(0); 3222 } 3223 EXTERN_C_END 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3227 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3228 { 3229 Mat mat; 3230 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3231 PetscErrorCode ierr; 3232 3233 PetscFunctionBegin; 3234 *newmat = 0; 3235 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3236 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3237 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3238 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3239 a = (Mat_MPIAIJ*)mat->data; 3240 3241 mat->factortype = matin->factortype; 3242 mat->rmap->bs = matin->rmap->bs; 3243 mat->assembled = PETSC_TRUE; 3244 mat->insertmode = NOT_SET_VALUES; 3245 mat->preallocated = PETSC_TRUE; 3246 3247 a->size = oldmat->size; 3248 a->rank = oldmat->rank; 3249 a->donotstash = oldmat->donotstash; 3250 a->roworiented = oldmat->roworiented; 3251 a->rowindices = 0; 3252 a->rowvalues = 0; 3253 a->getrowactive = PETSC_FALSE; 3254 3255 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3256 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3257 3258 if (oldmat->colmap) { 3259 #if defined (PETSC_USE_CTABLE) 3260 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3261 #else 3262 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3263 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3264 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3265 #endif 3266 } else a->colmap = 0; 3267 if (oldmat->garray) { 3268 PetscInt len; 3269 len = oldmat->B->cmap->n; 3270 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3271 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3272 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3273 } else a->garray = 0; 3274 3275 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3276 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3277 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3278 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3279 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3280 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3281 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3282 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3283 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3284 *newmat = mat; 3285 PetscFunctionReturn(0); 3286 } 3287 3288 3289 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatLoad_MPIAIJ" 3292 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3293 { 3294 PetscScalar *vals,*svals; 3295 MPI_Comm comm = ((PetscObject)viewer)->comm; 3296 PetscErrorCode ierr; 3297 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3298 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3299 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3300 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3301 PetscInt cend,cstart,n,*rowners,sizesset=1; 3302 int fd; 3303 3304 PetscFunctionBegin; 3305 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3306 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3307 if (!rank) { 3308 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3309 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3310 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3311 } 3312 3313 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3314 3315 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3316 M = header[1]; N = header[2]; 3317 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3318 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3319 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3320 3321 /* If global sizes are set, check if they are consistent with that given in the file */ 3322 if (sizesset) { 3323 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3324 } 3325 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3326 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3327 3328 /* determine ownership of all rows */ 3329 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3330 else m = newMat->rmap->n; /* Set by user */ 3331 3332 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3333 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3334 3335 /* First process needs enough room for process with most rows */ 3336 if (!rank) { 3337 mmax = rowners[1]; 3338 for (i=2; i<size; i++) { 3339 mmax = PetscMax(mmax,rowners[i]); 3340 } 3341 } else mmax = m; 3342 3343 rowners[0] = 0; 3344 for (i=2; i<=size; i++) { 3345 rowners[i] += rowners[i-1]; 3346 } 3347 rstart = rowners[rank]; 3348 rend = rowners[rank+1]; 3349 3350 /* distribute row lengths to all processors */ 3351 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3352 if (!rank) { 3353 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3354 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3355 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3356 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3357 for (j=0; j<m; j++) { 3358 procsnz[0] += ourlens[j]; 3359 } 3360 for (i=1; i<size; i++) { 3361 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3362 /* calculate the number of nonzeros on each processor */ 3363 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3364 procsnz[i] += rowlengths[j]; 3365 } 3366 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3367 } 3368 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3369 } else { 3370 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3371 } 3372 3373 if (!rank) { 3374 /* determine max buffer needed and allocate it */ 3375 maxnz = 0; 3376 for (i=0; i<size; i++) { 3377 maxnz = PetscMax(maxnz,procsnz[i]); 3378 } 3379 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3380 3381 /* read in my part of the matrix column indices */ 3382 nz = procsnz[0]; 3383 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3384 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3385 3386 /* read in every one elses and ship off */ 3387 for (i=1; i<size; i++) { 3388 nz = procsnz[i]; 3389 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3390 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3391 } 3392 ierr = PetscFree(cols);CHKERRQ(ierr); 3393 } else { 3394 /* determine buffer space needed for message */ 3395 nz = 0; 3396 for (i=0; i<m; i++) { 3397 nz += ourlens[i]; 3398 } 3399 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3400 3401 /* receive message of column indices*/ 3402 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3403 } 3404 3405 /* determine column ownership if matrix is not square */ 3406 if (N != M) { 3407 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3408 else n = newMat->cmap->n; 3409 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3410 cstart = cend - n; 3411 } else { 3412 cstart = rstart; 3413 cend = rend; 3414 n = cend - cstart; 3415 } 3416 3417 /* loop over local rows, determining number of off diagonal entries */ 3418 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3419 jj = 0; 3420 for (i=0; i<m; i++) { 3421 for (j=0; j<ourlens[i]; j++) { 3422 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3423 jj++; 3424 } 3425 } 3426 3427 for (i=0; i<m; i++) { 3428 ourlens[i] -= offlens[i]; 3429 } 3430 if (!sizesset) { 3431 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3432 } 3433 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3434 3435 for (i=0; i<m; i++) { 3436 ourlens[i] += offlens[i]; 3437 } 3438 3439 if (!rank) { 3440 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3441 3442 /* read in my part of the matrix numerical values */ 3443 nz = procsnz[0]; 3444 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3445 3446 /* insert into matrix */ 3447 jj = rstart; 3448 smycols = mycols; 3449 svals = vals; 3450 for (i=0; i<m; i++) { 3451 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3452 smycols += ourlens[i]; 3453 svals += ourlens[i]; 3454 jj++; 3455 } 3456 3457 /* read in other processors and ship out */ 3458 for (i=1; i<size; i++) { 3459 nz = procsnz[i]; 3460 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3461 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3462 } 3463 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3464 } else { 3465 /* receive numeric values */ 3466 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3467 3468 /* receive message of values*/ 3469 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3470 3471 /* insert into matrix */ 3472 jj = rstart; 3473 smycols = mycols; 3474 svals = vals; 3475 for (i=0; i<m; i++) { 3476 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3477 smycols += ourlens[i]; 3478 svals += ourlens[i]; 3479 jj++; 3480 } 3481 } 3482 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3483 ierr = PetscFree(vals);CHKERRQ(ierr); 3484 ierr = PetscFree(mycols);CHKERRQ(ierr); 3485 ierr = PetscFree(rowners);CHKERRQ(ierr); 3486 3487 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3488 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3494 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3495 { 3496 PetscErrorCode ierr; 3497 IS iscol_local; 3498 PetscInt csize; 3499 3500 PetscFunctionBegin; 3501 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3502 if (call == MAT_REUSE_MATRIX) { 3503 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3504 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3505 } else { 3506 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3507 } 3508 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3509 if (call == MAT_INITIAL_MATRIX) { 3510 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3511 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3512 } 3513 PetscFunctionReturn(0); 3514 } 3515 3516 #undef __FUNCT__ 3517 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3518 /* 3519 Not great since it makes two copies of the submatrix, first an SeqAIJ 3520 in local and then by concatenating the local matrices the end result. 3521 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3522 3523 Note: This requires a sequential iscol with all indices. 3524 */ 3525 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3526 { 3527 PetscErrorCode ierr; 3528 PetscMPIInt rank,size; 3529 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3530 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3531 Mat *local,M,Mreuse; 3532 MatScalar *vwork,*aa; 3533 MPI_Comm comm = ((PetscObject)mat)->comm; 3534 Mat_SeqAIJ *aij; 3535 3536 3537 PetscFunctionBegin; 3538 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3539 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3540 3541 if (call == MAT_REUSE_MATRIX) { 3542 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3543 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3544 local = &Mreuse; 3545 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3546 } else { 3547 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3548 Mreuse = *local; 3549 ierr = PetscFree(local);CHKERRQ(ierr); 3550 } 3551 3552 /* 3553 m - number of local rows 3554 n - number of columns (same on all processors) 3555 rstart - first row in new global matrix generated 3556 */ 3557 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3558 if (call == MAT_INITIAL_MATRIX) { 3559 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3560 ii = aij->i; 3561 jj = aij->j; 3562 3563 /* 3564 Determine the number of non-zeros in the diagonal and off-diagonal 3565 portions of the matrix in order to do correct preallocation 3566 */ 3567 3568 /* first get start and end of "diagonal" columns */ 3569 if (csize == PETSC_DECIDE) { 3570 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3571 if (mglobal == n) { /* square matrix */ 3572 nlocal = m; 3573 } else { 3574 nlocal = n/size + ((n % size) > rank); 3575 } 3576 } else { 3577 nlocal = csize; 3578 } 3579 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3580 rstart = rend - nlocal; 3581 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3582 3583 /* next, compute all the lengths */ 3584 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3585 olens = dlens + m; 3586 for (i=0; i<m; i++) { 3587 jend = ii[i+1] - ii[i]; 3588 olen = 0; 3589 dlen = 0; 3590 for (j=0; j<jend; j++) { 3591 if (*jj < rstart || *jj >= rend) olen++; 3592 else dlen++; 3593 jj++; 3594 } 3595 olens[i] = olen; 3596 dlens[i] = dlen; 3597 } 3598 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3599 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3600 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3601 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3602 ierr = PetscFree(dlens);CHKERRQ(ierr); 3603 } else { 3604 PetscInt ml,nl; 3605 3606 M = *newmat; 3607 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3608 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3609 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3610 /* 3611 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3612 rather than the slower MatSetValues(). 3613 */ 3614 M->was_assembled = PETSC_TRUE; 3615 M->assembled = PETSC_FALSE; 3616 } 3617 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3618 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3619 ii = aij->i; 3620 jj = aij->j; 3621 aa = aij->a; 3622 for (i=0; i<m; i++) { 3623 row = rstart + i; 3624 nz = ii[i+1] - ii[i]; 3625 cwork = jj; jj += nz; 3626 vwork = aa; aa += nz; 3627 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3628 } 3629 3630 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 *newmat = M; 3633 3634 /* save submatrix used in processor for next request */ 3635 if (call == MAT_INITIAL_MATRIX) { 3636 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3637 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3638 } 3639 3640 PetscFunctionReturn(0); 3641 } 3642 3643 EXTERN_C_BEGIN 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3646 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3647 { 3648 PetscInt m,cstart, cend,j,nnz,i,d; 3649 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3650 const PetscInt *JJ; 3651 PetscScalar *values; 3652 PetscErrorCode ierr; 3653 3654 PetscFunctionBegin; 3655 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3656 3657 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3658 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3659 m = B->rmap->n; 3660 cstart = B->cmap->rstart; 3661 cend = B->cmap->rend; 3662 rstart = B->rmap->rstart; 3663 3664 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3665 3666 #if defined(PETSC_USE_DEBUGGING) 3667 for (i=0; i<m; i++) { 3668 nnz = Ii[i+1]- Ii[i]; 3669 JJ = J + Ii[i]; 3670 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3671 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3672 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3673 } 3674 #endif 3675 3676 for (i=0; i<m; i++) { 3677 nnz = Ii[i+1]- Ii[i]; 3678 JJ = J + Ii[i]; 3679 nnz_max = PetscMax(nnz_max,nnz); 3680 d = 0; 3681 for (j=0; j<nnz; j++) { 3682 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3683 } 3684 d_nnz[i] = d; 3685 o_nnz[i] = nnz - d; 3686 } 3687 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3688 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3689 3690 if (v) values = (PetscScalar*)v; 3691 else { 3692 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3693 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3694 } 3695 3696 for (i=0; i<m; i++) { 3697 ii = i + rstart; 3698 nnz = Ii[i+1]- Ii[i]; 3699 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3700 } 3701 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3703 3704 if (!v) { 3705 ierr = PetscFree(values);CHKERRQ(ierr); 3706 } 3707 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3708 PetscFunctionReturn(0); 3709 } 3710 EXTERN_C_END 3711 3712 #undef __FUNCT__ 3713 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3714 /*@ 3715 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3716 (the default parallel PETSc format). 3717 3718 Collective on MPI_Comm 3719 3720 Input Parameters: 3721 + B - the matrix 3722 . i - the indices into j for the start of each local row (starts with zero) 3723 . j - the column indices for each local row (starts with zero) 3724 - v - optional values in the matrix 3725 3726 Level: developer 3727 3728 Notes: 3729 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3730 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3731 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3732 3733 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3734 3735 The format which is used for the sparse matrix input, is equivalent to a 3736 row-major ordering.. i.e for the following matrix, the input data expected is 3737 as shown: 3738 3739 1 0 0 3740 2 0 3 P0 3741 ------- 3742 4 5 6 P1 3743 3744 Process0 [P0]: rows_owned=[0,1] 3745 i = {0,1,3} [size = nrow+1 = 2+1] 3746 j = {0,0,2} [size = nz = 6] 3747 v = {1,2,3} [size = nz = 6] 3748 3749 Process1 [P1]: rows_owned=[2] 3750 i = {0,3} [size = nrow+1 = 1+1] 3751 j = {0,1,2} [size = nz = 6] 3752 v = {4,5,6} [size = nz = 6] 3753 3754 .keywords: matrix, aij, compressed row, sparse, parallel 3755 3756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3757 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3758 @*/ 3759 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3760 { 3761 PetscErrorCode ierr; 3762 3763 PetscFunctionBegin; 3764 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3770 /*@C 3771 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3772 (the default parallel PETSc format). For good matrix assembly performance 3773 the user should preallocate the matrix storage by setting the parameters 3774 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3775 performance can be increased by more than a factor of 50. 3776 3777 Collective on MPI_Comm 3778 3779 Input Parameters: 3780 + A - the matrix 3781 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3782 (same value is used for all local rows) 3783 . d_nnz - array containing the number of nonzeros in the various rows of the 3784 DIAGONAL portion of the local submatrix (possibly different for each row) 3785 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3786 The size of this array is equal to the number of local rows, i.e 'm'. 3787 For matrices that will be factored, you must leave room for (and set) 3788 the diagonal entry even if it is zero. 3789 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3790 submatrix (same value is used for all local rows). 3791 - o_nnz - array containing the number of nonzeros in the various rows of the 3792 OFF-DIAGONAL portion of the local submatrix (possibly different for 3793 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3794 structure. The size of this array is equal to the number 3795 of local rows, i.e 'm'. 3796 3797 If the *_nnz parameter is given then the *_nz parameter is ignored 3798 3799 The AIJ format (also called the Yale sparse matrix format or 3800 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3801 storage. The stored row and column indices begin with zero. 3802 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3803 3804 The parallel matrix is partitioned such that the first m0 rows belong to 3805 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3806 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3807 3808 The DIAGONAL portion of the local submatrix of a processor can be defined 3809 as the submatrix which is obtained by extraction the part corresponding to 3810 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3811 first row that belongs to the processor, r2 is the last row belonging to 3812 the this processor, and c1-c2 is range of indices of the local part of a 3813 vector suitable for applying the matrix to. This is an mxn matrix. In the 3814 common case of a square matrix, the row and column ranges are the same and 3815 the DIAGONAL part is also square. The remaining portion of the local 3816 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3817 3818 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3819 3820 You can call MatGetInfo() to get information on how effective the preallocation was; 3821 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3822 You can also run with the option -info and look for messages with the string 3823 malloc in them to see if additional memory allocation was needed. 3824 3825 Example usage: 3826 3827 Consider the following 8x8 matrix with 34 non-zero values, that is 3828 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3829 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3830 as follows: 3831 3832 .vb 3833 1 2 0 | 0 3 0 | 0 4 3834 Proc0 0 5 6 | 7 0 0 | 8 0 3835 9 0 10 | 11 0 0 | 12 0 3836 ------------------------------------- 3837 13 0 14 | 15 16 17 | 0 0 3838 Proc1 0 18 0 | 19 20 21 | 0 0 3839 0 0 0 | 22 23 0 | 24 0 3840 ------------------------------------- 3841 Proc2 25 26 27 | 0 0 28 | 29 0 3842 30 0 0 | 31 32 33 | 0 34 3843 .ve 3844 3845 This can be represented as a collection of submatrices as: 3846 3847 .vb 3848 A B C 3849 D E F 3850 G H I 3851 .ve 3852 3853 Where the submatrices A,B,C are owned by proc0, D,E,F are 3854 owned by proc1, G,H,I are owned by proc2. 3855 3856 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3857 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3858 The 'M','N' parameters are 8,8, and have the same values on all procs. 3859 3860 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3861 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3862 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3863 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3864 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3865 matrix, ans [DF] as another SeqAIJ matrix. 3866 3867 When d_nz, o_nz parameters are specified, d_nz storage elements are 3868 allocated for every row of the local diagonal submatrix, and o_nz 3869 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3870 One way to choose d_nz and o_nz is to use the max nonzerors per local 3871 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3872 In this case, the values of d_nz,o_nz are: 3873 .vb 3874 proc0 : dnz = 2, o_nz = 2 3875 proc1 : dnz = 3, o_nz = 2 3876 proc2 : dnz = 1, o_nz = 4 3877 .ve 3878 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3879 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3880 for proc3. i.e we are using 12+15+10=37 storage locations to store 3881 34 values. 3882 3883 When d_nnz, o_nnz parameters are specified, the storage is specified 3884 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3885 In the above case the values for d_nnz,o_nnz are: 3886 .vb 3887 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3888 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3889 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3890 .ve 3891 Here the space allocated is sum of all the above values i.e 34, and 3892 hence pre-allocation is perfect. 3893 3894 Level: intermediate 3895 3896 .keywords: matrix, aij, compressed row, sparse, parallel 3897 3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3899 MPIAIJ, MatGetInfo() 3900 @*/ 3901 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3902 { 3903 PetscErrorCode ierr; 3904 3905 PetscFunctionBegin; 3906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3907 PetscValidType(B,1); 3908 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3909 PetscFunctionReturn(0); 3910 } 3911 3912 #undef __FUNCT__ 3913 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3914 /*@ 3915 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3916 CSR format the local rows. 3917 3918 Collective on MPI_Comm 3919 3920 Input Parameters: 3921 + comm - MPI communicator 3922 . m - number of local rows (Cannot be PETSC_DECIDE) 3923 . n - This value should be the same as the local size used in creating the 3924 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3925 calculated if N is given) For square matrices n is almost always m. 3926 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3927 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3928 . i - row indices 3929 . j - column indices 3930 - a - matrix values 3931 3932 Output Parameter: 3933 . mat - the matrix 3934 3935 Level: intermediate 3936 3937 Notes: 3938 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3939 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3940 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3941 3942 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3943 3944 The format which is used for the sparse matrix input, is equivalent to a 3945 row-major ordering.. i.e for the following matrix, the input data expected is 3946 as shown: 3947 3948 1 0 0 3949 2 0 3 P0 3950 ------- 3951 4 5 6 P1 3952 3953 Process0 [P0]: rows_owned=[0,1] 3954 i = {0,1,3} [size = nrow+1 = 2+1] 3955 j = {0,0,2} [size = nz = 6] 3956 v = {1,2,3} [size = nz = 6] 3957 3958 Process1 [P1]: rows_owned=[2] 3959 i = {0,3} [size = nrow+1 = 1+1] 3960 j = {0,1,2} [size = nz = 6] 3961 v = {4,5,6} [size = nz = 6] 3962 3963 .keywords: matrix, aij, compressed row, sparse, parallel 3964 3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3966 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3967 @*/ 3968 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3969 { 3970 PetscErrorCode ierr; 3971 3972 PetscFunctionBegin; 3973 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3974 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3975 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3976 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3977 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3978 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3979 PetscFunctionReturn(0); 3980 } 3981 3982 #undef __FUNCT__ 3983 #define __FUNCT__ "MatCreateAIJ" 3984 /*@C 3985 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3986 (the default parallel PETSc format). For good matrix assembly performance 3987 the user should preallocate the matrix storage by setting the parameters 3988 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3989 performance can be increased by more than a factor of 50. 3990 3991 Collective on MPI_Comm 3992 3993 Input Parameters: 3994 + comm - MPI communicator 3995 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3996 This value should be the same as the local size used in creating the 3997 y vector for the matrix-vector product y = Ax. 3998 . n - This value should be the same as the local size used in creating the 3999 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4000 calculated if N is given) For square matrices n is almost always m. 4001 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4002 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4003 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4004 (same value is used for all local rows) 4005 . d_nnz - array containing the number of nonzeros in the various rows of the 4006 DIAGONAL portion of the local submatrix (possibly different for each row) 4007 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4008 The size of this array is equal to the number of local rows, i.e 'm'. 4009 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4010 submatrix (same value is used for all local rows). 4011 - o_nnz - array containing the number of nonzeros in the various rows of the 4012 OFF-DIAGONAL portion of the local submatrix (possibly different for 4013 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4014 structure. The size of this array is equal to the number 4015 of local rows, i.e 'm'. 4016 4017 Output Parameter: 4018 . A - the matrix 4019 4020 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4021 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4022 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4023 4024 Notes: 4025 If the *_nnz parameter is given then the *_nz parameter is ignored 4026 4027 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4028 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4029 storage requirements for this matrix. 4030 4031 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4032 processor than it must be used on all processors that share the object for 4033 that argument. 4034 4035 The user MUST specify either the local or global matrix dimensions 4036 (possibly both). 4037 4038 The parallel matrix is partitioned across processors such that the 4039 first m0 rows belong to process 0, the next m1 rows belong to 4040 process 1, the next m2 rows belong to process 2 etc.. where 4041 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4042 values corresponding to [m x N] submatrix. 4043 4044 The columns are logically partitioned with the n0 columns belonging 4045 to 0th partition, the next n1 columns belonging to the next 4046 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4047 4048 The DIAGONAL portion of the local submatrix on any given processor 4049 is the submatrix corresponding to the rows and columns m,n 4050 corresponding to the given processor. i.e diagonal matrix on 4051 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4052 etc. The remaining portion of the local submatrix [m x (N-n)] 4053 constitute the OFF-DIAGONAL portion. The example below better 4054 illustrates this concept. 4055 4056 For a square global matrix we define each processor's diagonal portion 4057 to be its local rows and the corresponding columns (a square submatrix); 4058 each processor's off-diagonal portion encompasses the remainder of the 4059 local matrix (a rectangular submatrix). 4060 4061 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4062 4063 When calling this routine with a single process communicator, a matrix of 4064 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4065 type of communicator, use the construction mechanism: 4066 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4067 4068 By default, this format uses inodes (identical nodes) when possible. 4069 We search for consecutive rows with the same nonzero structure, thereby 4070 reusing matrix information to achieve increased efficiency. 4071 4072 Options Database Keys: 4073 + -mat_no_inode - Do not use inodes 4074 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4075 - -mat_aij_oneindex - Internally use indexing starting at 1 4076 rather than 0. Note that when calling MatSetValues(), 4077 the user still MUST index entries starting at 0! 4078 4079 4080 Example usage: 4081 4082 Consider the following 8x8 matrix with 34 non-zero values, that is 4083 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4084 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4085 as follows: 4086 4087 .vb 4088 1 2 0 | 0 3 0 | 0 4 4089 Proc0 0 5 6 | 7 0 0 | 8 0 4090 9 0 10 | 11 0 0 | 12 0 4091 ------------------------------------- 4092 13 0 14 | 15 16 17 | 0 0 4093 Proc1 0 18 0 | 19 20 21 | 0 0 4094 0 0 0 | 22 23 0 | 24 0 4095 ------------------------------------- 4096 Proc2 25 26 27 | 0 0 28 | 29 0 4097 30 0 0 | 31 32 33 | 0 34 4098 .ve 4099 4100 This can be represented as a collection of submatrices as: 4101 4102 .vb 4103 A B C 4104 D E F 4105 G H I 4106 .ve 4107 4108 Where the submatrices A,B,C are owned by proc0, D,E,F are 4109 owned by proc1, G,H,I are owned by proc2. 4110 4111 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4112 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4113 The 'M','N' parameters are 8,8, and have the same values on all procs. 4114 4115 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4116 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4117 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4118 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4119 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4120 matrix, ans [DF] as another SeqAIJ matrix. 4121 4122 When d_nz, o_nz parameters are specified, d_nz storage elements are 4123 allocated for every row of the local diagonal submatrix, and o_nz 4124 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4125 One way to choose d_nz and o_nz is to use the max nonzerors per local 4126 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4127 In this case, the values of d_nz,o_nz are: 4128 .vb 4129 proc0 : dnz = 2, o_nz = 2 4130 proc1 : dnz = 3, o_nz = 2 4131 proc2 : dnz = 1, o_nz = 4 4132 .ve 4133 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4134 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4135 for proc3. i.e we are using 12+15+10=37 storage locations to store 4136 34 values. 4137 4138 When d_nnz, o_nnz parameters are specified, the storage is specified 4139 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4140 In the above case the values for d_nnz,o_nnz are: 4141 .vb 4142 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4143 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4144 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4145 .ve 4146 Here the space allocated is sum of all the above values i.e 34, and 4147 hence pre-allocation is perfect. 4148 4149 Level: intermediate 4150 4151 .keywords: matrix, aij, compressed row, sparse, parallel 4152 4153 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4154 MPIAIJ, MatCreateMPIAIJWithArrays() 4155 @*/ 4156 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4157 { 4158 PetscErrorCode ierr; 4159 PetscMPIInt size; 4160 4161 PetscFunctionBegin; 4162 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4163 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4165 if (size > 1) { 4166 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4167 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4168 } else { 4169 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4170 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4171 } 4172 PetscFunctionReturn(0); 4173 } 4174 4175 #undef __FUNCT__ 4176 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4177 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4178 { 4179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4180 4181 PetscFunctionBegin; 4182 *Ad = a->A; 4183 *Ao = a->B; 4184 *colmap = a->garray; 4185 PetscFunctionReturn(0); 4186 } 4187 4188 #undef __FUNCT__ 4189 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4190 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4191 { 4192 PetscErrorCode ierr; 4193 PetscInt i; 4194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4195 4196 PetscFunctionBegin; 4197 if (coloring->ctype == IS_COLORING_GLOBAL) { 4198 ISColoringValue *allcolors,*colors; 4199 ISColoring ocoloring; 4200 4201 /* set coloring for diagonal portion */ 4202 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4203 4204 /* set coloring for off-diagonal portion */ 4205 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4206 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4207 for (i=0; i<a->B->cmap->n; i++) { 4208 colors[i] = allcolors[a->garray[i]]; 4209 } 4210 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4211 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4212 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4213 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4214 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4215 ISColoringValue *colors; 4216 PetscInt *larray; 4217 ISColoring ocoloring; 4218 4219 /* set coloring for diagonal portion */ 4220 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4221 for (i=0; i<a->A->cmap->n; i++) { 4222 larray[i] = i + A->cmap->rstart; 4223 } 4224 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4225 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4226 for (i=0; i<a->A->cmap->n; i++) { 4227 colors[i] = coloring->colors[larray[i]]; 4228 } 4229 ierr = PetscFree(larray);CHKERRQ(ierr); 4230 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4231 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4232 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4233 4234 /* set coloring for off-diagonal portion */ 4235 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4236 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4237 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4238 for (i=0; i<a->B->cmap->n; i++) { 4239 colors[i] = coloring->colors[larray[i]]; 4240 } 4241 ierr = PetscFree(larray);CHKERRQ(ierr); 4242 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4243 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4244 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4245 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4246 4247 PetscFunctionReturn(0); 4248 } 4249 4250 #if defined(PETSC_HAVE_ADIC) 4251 #undef __FUNCT__ 4252 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4253 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4254 { 4255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4256 PetscErrorCode ierr; 4257 4258 PetscFunctionBegin; 4259 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4260 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4261 PetscFunctionReturn(0); 4262 } 4263 #endif 4264 4265 #undef __FUNCT__ 4266 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4267 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4268 { 4269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4270 PetscErrorCode ierr; 4271 4272 PetscFunctionBegin; 4273 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4274 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4275 PetscFunctionReturn(0); 4276 } 4277 4278 #undef __FUNCT__ 4279 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4280 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4281 { 4282 PetscErrorCode ierr; 4283 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum; 4284 PetscInt *indx; 4285 4286 PetscFunctionBegin; 4287 /* This routine will ONLY return MPIAIJ type matrix */ 4288 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4289 if (n == PETSC_DECIDE){ 4290 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4291 } 4292 /* Check sum(n) = N */ 4293 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4294 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4295 4296 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4297 rstart -= m; 4298 4299 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4300 for (i=0;i<m;i++) { 4301 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4302 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4303 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4304 } 4305 4306 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4307 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4308 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4309 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4310 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4311 PetscFunctionReturn(0); 4312 } 4313 4314 #undef __FUNCT__ 4315 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4316 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4317 { 4318 PetscErrorCode ierr; 4319 PetscInt m,N,i,rstart,nnz,Ii; 4320 PetscInt *indx; 4321 PetscScalar *values; 4322 4323 PetscFunctionBegin; 4324 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4325 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4326 for (i=0;i<m;i++) { 4327 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4328 Ii = i + rstart; 4329 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4330 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4331 } 4332 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4333 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4339 /*@ 4340 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4341 matrices from each processor 4342 4343 Collective on MPI_Comm 4344 4345 Input Parameters: 4346 + comm - the communicators the parallel matrix will live on 4347 . inmat - the input sequential matrices 4348 . n - number of local columns (or PETSC_DECIDE) 4349 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4350 4351 Output Parameter: 4352 . outmat - the parallel matrix generated 4353 4354 Level: advanced 4355 4356 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4357 4358 @*/ 4359 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4360 { 4361 PetscErrorCode ierr; 4362 4363 PetscFunctionBegin; 4364 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4365 if (scall == MAT_INITIAL_MATRIX){ 4366 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4367 } 4368 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4369 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4370 PetscFunctionReturn(0); 4371 } 4372 4373 #undef __FUNCT__ 4374 #define __FUNCT__ "MatFileSplit" 4375 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4376 { 4377 PetscErrorCode ierr; 4378 PetscMPIInt rank; 4379 PetscInt m,N,i,rstart,nnz; 4380 size_t len; 4381 const PetscInt *indx; 4382 PetscViewer out; 4383 char *name; 4384 Mat B; 4385 const PetscScalar *values; 4386 4387 PetscFunctionBegin; 4388 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4389 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4390 /* Should this be the type of the diagonal block of A? */ 4391 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4392 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4393 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4394 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4395 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4396 for (i=0;i<m;i++) { 4397 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4398 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4399 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4400 } 4401 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4402 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4403 4404 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4405 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4406 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4407 sprintf(name,"%s.%d",outfile,rank); 4408 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4409 ierr = PetscFree(name); 4410 ierr = MatView(B,out);CHKERRQ(ierr); 4411 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4412 ierr = MatDestroy(&B);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 4416 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4419 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4420 { 4421 PetscErrorCode ierr; 4422 Mat_Merge_SeqsToMPI *merge; 4423 PetscContainer container; 4424 4425 PetscFunctionBegin; 4426 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4427 if (container) { 4428 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4433 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4434 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4435 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4436 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4437 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4438 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4439 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4440 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4441 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4442 ierr = PetscFree(merge);CHKERRQ(ierr); 4443 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4444 } 4445 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4446 PetscFunctionReturn(0); 4447 } 4448 4449 #include <../src/mat/utils/freespace.h> 4450 #include <petscbt.h> 4451 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4454 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4455 { 4456 PetscErrorCode ierr; 4457 MPI_Comm comm=((PetscObject)mpimat)->comm; 4458 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4459 PetscMPIInt size,rank,taga,*len_s; 4460 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4461 PetscInt proc,m; 4462 PetscInt **buf_ri,**buf_rj; 4463 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4464 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4465 MPI_Request *s_waits,*r_waits; 4466 MPI_Status *status; 4467 MatScalar *aa=a->a; 4468 MatScalar **abuf_r,*ba_i; 4469 Mat_Merge_SeqsToMPI *merge; 4470 PetscContainer container; 4471 4472 PetscFunctionBegin; 4473 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4474 4475 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4476 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4477 4478 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4479 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4480 4481 bi = merge->bi; 4482 bj = merge->bj; 4483 buf_ri = merge->buf_ri; 4484 buf_rj = merge->buf_rj; 4485 4486 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4487 owners = merge->rowmap->range; 4488 len_s = merge->len_s; 4489 4490 /* send and recv matrix values */ 4491 /*-----------------------------*/ 4492 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4493 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4494 4495 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4496 for (proc=0,k=0; proc<size; proc++){ 4497 if (!len_s[proc]) continue; 4498 i = owners[proc]; 4499 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4500 k++; 4501 } 4502 4503 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4504 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4505 ierr = PetscFree(status);CHKERRQ(ierr); 4506 4507 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4508 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4509 4510 /* insert mat values of mpimat */ 4511 /*----------------------------*/ 4512 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4513 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4514 4515 for (k=0; k<merge->nrecv; k++){ 4516 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4517 nrows = *(buf_ri_k[k]); 4518 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4519 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4520 } 4521 4522 /* set values of ba */ 4523 m = merge->rowmap->n; 4524 for (i=0; i<m; i++) { 4525 arow = owners[rank] + i; 4526 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4527 bnzi = bi[i+1] - bi[i]; 4528 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4529 4530 /* add local non-zero vals of this proc's seqmat into ba */ 4531 anzi = ai[arow+1] - ai[arow]; 4532 aj = a->j + ai[arow]; 4533 aa = a->a + ai[arow]; 4534 nextaj = 0; 4535 for (j=0; nextaj<anzi; j++){ 4536 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4537 ba_i[j] += aa[nextaj++]; 4538 } 4539 } 4540 4541 /* add received vals into ba */ 4542 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4543 /* i-th row */ 4544 if (i == *nextrow[k]) { 4545 anzi = *(nextai[k]+1) - *nextai[k]; 4546 aj = buf_rj[k] + *(nextai[k]); 4547 aa = abuf_r[k] + *(nextai[k]); 4548 nextaj = 0; 4549 for (j=0; nextaj<anzi; j++){ 4550 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4551 ba_i[j] += aa[nextaj++]; 4552 } 4553 } 4554 nextrow[k]++; nextai[k]++; 4555 } 4556 } 4557 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4558 } 4559 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4560 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4561 4562 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4563 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4564 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4565 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4566 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4567 PetscFunctionReturn(0); 4568 } 4569 4570 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4571 4572 #undef __FUNCT__ 4573 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4574 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4575 { 4576 PetscErrorCode ierr; 4577 Mat B_mpi; 4578 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4579 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4580 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4581 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4582 PetscInt len,proc,*dnz,*onz; 4583 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4584 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4585 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4586 MPI_Status *status; 4587 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4588 PetscBT lnkbt; 4589 Mat_Merge_SeqsToMPI *merge; 4590 PetscContainer container; 4591 4592 PetscFunctionBegin; 4593 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4594 4595 /* make sure it is a PETSc comm */ 4596 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4597 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4598 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4599 4600 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4601 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4602 4603 /* determine row ownership */ 4604 /*---------------------------------------------------------*/ 4605 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4606 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4607 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4608 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4609 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4610 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4611 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4612 4613 m = merge->rowmap->n; 4614 M = merge->rowmap->N; 4615 owners = merge->rowmap->range; 4616 4617 /* determine the number of messages to send, their lengths */ 4618 /*---------------------------------------------------------*/ 4619 len_s = merge->len_s; 4620 4621 len = 0; /* length of buf_si[] */ 4622 merge->nsend = 0; 4623 for (proc=0; proc<size; proc++){ 4624 len_si[proc] = 0; 4625 if (proc == rank){ 4626 len_s[proc] = 0; 4627 } else { 4628 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4629 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4630 } 4631 if (len_s[proc]) { 4632 merge->nsend++; 4633 nrows = 0; 4634 for (i=owners[proc]; i<owners[proc+1]; i++){ 4635 if (ai[i+1] > ai[i]) nrows++; 4636 } 4637 len_si[proc] = 2*(nrows+1); 4638 len += len_si[proc]; 4639 } 4640 } 4641 4642 /* determine the number and length of messages to receive for ij-structure */ 4643 /*-------------------------------------------------------------------------*/ 4644 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4645 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4646 4647 /* post the Irecv of j-structure */ 4648 /*-------------------------------*/ 4649 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4650 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4651 4652 /* post the Isend of j-structure */ 4653 /*--------------------------------*/ 4654 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4655 4656 for (proc=0, k=0; proc<size; proc++){ 4657 if (!len_s[proc]) continue; 4658 i = owners[proc]; 4659 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4660 k++; 4661 } 4662 4663 /* receives and sends of j-structure are complete */ 4664 /*------------------------------------------------*/ 4665 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4666 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4667 4668 /* send and recv i-structure */ 4669 /*---------------------------*/ 4670 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4671 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4672 4673 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4674 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4675 for (proc=0,k=0; proc<size; proc++){ 4676 if (!len_s[proc]) continue; 4677 /* form outgoing message for i-structure: 4678 buf_si[0]: nrows to be sent 4679 [1:nrows]: row index (global) 4680 [nrows+1:2*nrows+1]: i-structure index 4681 */ 4682 /*-------------------------------------------*/ 4683 nrows = len_si[proc]/2 - 1; 4684 buf_si_i = buf_si + nrows+1; 4685 buf_si[0] = nrows; 4686 buf_si_i[0] = 0; 4687 nrows = 0; 4688 for (i=owners[proc]; i<owners[proc+1]; i++){ 4689 anzi = ai[i+1] - ai[i]; 4690 if (anzi) { 4691 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4692 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4693 nrows++; 4694 } 4695 } 4696 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4697 k++; 4698 buf_si += len_si[proc]; 4699 } 4700 4701 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4702 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4703 4704 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4705 for (i=0; i<merge->nrecv; i++){ 4706 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4707 } 4708 4709 ierr = PetscFree(len_si);CHKERRQ(ierr); 4710 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4711 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4712 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4713 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4714 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4715 ierr = PetscFree(status);CHKERRQ(ierr); 4716 4717 /* compute a local seq matrix in each processor */ 4718 /*----------------------------------------------*/ 4719 /* allocate bi array and free space for accumulating nonzero column info */ 4720 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4721 bi[0] = 0; 4722 4723 /* create and initialize a linked list */ 4724 nlnk = N+1; 4725 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4726 4727 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4728 len = 0; 4729 len = ai[owners[rank+1]] - ai[owners[rank]]; 4730 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4731 current_space = free_space; 4732 4733 /* determine symbolic info for each local row */ 4734 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4735 4736 for (k=0; k<merge->nrecv; k++){ 4737 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4738 nrows = *buf_ri_k[k]; 4739 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4740 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4741 } 4742 4743 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4744 len = 0; 4745 for (i=0;i<m;i++) { 4746 bnzi = 0; 4747 /* add local non-zero cols of this proc's seqmat into lnk */ 4748 arow = owners[rank] + i; 4749 anzi = ai[arow+1] - ai[arow]; 4750 aj = a->j + ai[arow]; 4751 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4752 bnzi += nlnk; 4753 /* add received col data into lnk */ 4754 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4755 if (i == *nextrow[k]) { /* i-th row */ 4756 anzi = *(nextai[k]+1) - *nextai[k]; 4757 aj = buf_rj[k] + *nextai[k]; 4758 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4759 bnzi += nlnk; 4760 nextrow[k]++; nextai[k]++; 4761 } 4762 } 4763 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4764 4765 /* if free space is not available, make more free space */ 4766 if (current_space->local_remaining<bnzi) { 4767 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4768 nspacedouble++; 4769 } 4770 /* copy data into free space, then initialize lnk */ 4771 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4772 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4773 4774 current_space->array += bnzi; 4775 current_space->local_used += bnzi; 4776 current_space->local_remaining -= bnzi; 4777 4778 bi[i+1] = bi[i] + bnzi; 4779 } 4780 4781 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4782 4783 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4784 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4785 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4786 4787 /* create symbolic parallel matrix B_mpi */ 4788 /*---------------------------------------*/ 4789 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4790 if (n==PETSC_DECIDE) { 4791 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4792 } else { 4793 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4794 } 4795 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4796 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4797 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4798 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4799 4800 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4801 B_mpi->assembled = PETSC_FALSE; 4802 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4803 merge->bi = bi; 4804 merge->bj = bj; 4805 merge->buf_ri = buf_ri; 4806 merge->buf_rj = buf_rj; 4807 merge->coi = PETSC_NULL; 4808 merge->coj = PETSC_NULL; 4809 merge->owners_co = PETSC_NULL; 4810 4811 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4812 4813 /* attach the supporting struct to B_mpi for reuse */ 4814 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4815 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4816 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4817 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4818 *mpimat = B_mpi; 4819 4820 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4821 PetscFunctionReturn(0); 4822 } 4823 4824 #undef __FUNCT__ 4825 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4826 /*@C 4827 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4828 matrices from each processor 4829 4830 Collective on MPI_Comm 4831 4832 Input Parameters: 4833 + comm - the communicators the parallel matrix will live on 4834 . seqmat - the input sequential matrices 4835 . m - number of local rows (or PETSC_DECIDE) 4836 . n - number of local columns (or PETSC_DECIDE) 4837 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4838 4839 Output Parameter: 4840 . mpimat - the parallel matrix generated 4841 4842 Level: advanced 4843 4844 Notes: 4845 The dimensions of the sequential matrix in each processor MUST be the same. 4846 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4847 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4848 @*/ 4849 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4850 { 4851 PetscErrorCode ierr; 4852 PetscMPIInt size; 4853 4854 PetscFunctionBegin; 4855 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4856 if (size == 1){ 4857 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4858 if (scall == MAT_INITIAL_MATRIX){ 4859 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4860 } else { 4861 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4862 } 4863 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4864 PetscFunctionReturn(0); 4865 } 4866 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4867 if (scall == MAT_INITIAL_MATRIX){ 4868 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4869 } 4870 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4871 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4877 /*@ 4878 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4879 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4880 with MatGetSize() 4881 4882 Not Collective 4883 4884 Input Parameters: 4885 + A - the matrix 4886 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4887 4888 Output Parameter: 4889 . A_loc - the local sequential matrix generated 4890 4891 Level: developer 4892 4893 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4894 4895 @*/ 4896 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4897 { 4898 PetscErrorCode ierr; 4899 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4900 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4901 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4902 MatScalar *aa=a->a,*ba=b->a,*cam; 4903 PetscScalar *ca; 4904 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4905 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4906 PetscBool match; 4907 4908 PetscFunctionBegin; 4909 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4910 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4911 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4912 if (scall == MAT_INITIAL_MATRIX){ 4913 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4914 ci[0] = 0; 4915 for (i=0; i<am; i++){ 4916 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4917 } 4918 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4919 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4920 k = 0; 4921 for (i=0; i<am; i++) { 4922 ncols_o = bi[i+1] - bi[i]; 4923 ncols_d = ai[i+1] - ai[i]; 4924 /* off-diagonal portion of A */ 4925 for (jo=0; jo<ncols_o; jo++) { 4926 col = cmap[*bj]; 4927 if (col >= cstart) break; 4928 cj[k] = col; bj++; 4929 ca[k++] = *ba++; 4930 } 4931 /* diagonal portion of A */ 4932 for (j=0; j<ncols_d; j++) { 4933 cj[k] = cstart + *aj++; 4934 ca[k++] = *aa++; 4935 } 4936 /* off-diagonal portion of A */ 4937 for (j=jo; j<ncols_o; j++) { 4938 cj[k] = cmap[*bj++]; 4939 ca[k++] = *ba++; 4940 } 4941 } 4942 /* put together the new matrix */ 4943 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4944 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4945 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4946 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4947 mat->free_a = PETSC_TRUE; 4948 mat->free_ij = PETSC_TRUE; 4949 mat->nonew = 0; 4950 } else if (scall == MAT_REUSE_MATRIX){ 4951 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4952 ci = mat->i; cj = mat->j; cam = mat->a; 4953 for (i=0; i<am; i++) { 4954 /* off-diagonal portion of A */ 4955 ncols_o = bi[i+1] - bi[i]; 4956 for (jo=0; jo<ncols_o; jo++) { 4957 col = cmap[*bj]; 4958 if (col >= cstart) break; 4959 *cam++ = *ba++; bj++; 4960 } 4961 /* diagonal portion of A */ 4962 ncols_d = ai[i+1] - ai[i]; 4963 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4964 /* off-diagonal portion of A */ 4965 for (j=jo; j<ncols_o; j++) { 4966 *cam++ = *ba++; bj++; 4967 } 4968 } 4969 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4970 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4971 PetscFunctionReturn(0); 4972 } 4973 4974 #undef __FUNCT__ 4975 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4976 /*@C 4977 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4978 4979 Not Collective 4980 4981 Input Parameters: 4982 + A - the matrix 4983 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4984 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4985 4986 Output Parameter: 4987 . A_loc - the local sequential matrix generated 4988 4989 Level: developer 4990 4991 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4992 4993 @*/ 4994 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4995 { 4996 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4997 PetscErrorCode ierr; 4998 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4999 IS isrowa,iscola; 5000 Mat *aloc; 5001 PetscBool match; 5002 5003 PetscFunctionBegin; 5004 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5005 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5006 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5007 if (!row){ 5008 start = A->rmap->rstart; end = A->rmap->rend; 5009 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5010 } else { 5011 isrowa = *row; 5012 } 5013 if (!col){ 5014 start = A->cmap->rstart; 5015 cmap = a->garray; 5016 nzA = a->A->cmap->n; 5017 nzB = a->B->cmap->n; 5018 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5019 ncols = 0; 5020 for (i=0; i<nzB; i++) { 5021 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5022 else break; 5023 } 5024 imark = i; 5025 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5026 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5027 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5028 } else { 5029 iscola = *col; 5030 } 5031 if (scall != MAT_INITIAL_MATRIX){ 5032 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5033 aloc[0] = *A_loc; 5034 } 5035 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5036 *A_loc = aloc[0]; 5037 ierr = PetscFree(aloc);CHKERRQ(ierr); 5038 if (!row){ 5039 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5040 } 5041 if (!col){ 5042 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5043 } 5044 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5045 PetscFunctionReturn(0); 5046 } 5047 5048 #undef __FUNCT__ 5049 #define __FUNCT__ "MatGetBrowsOfAcols" 5050 /*@C 5051 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5052 5053 Collective on Mat 5054 5055 Input Parameters: 5056 + A,B - the matrices in mpiaij format 5057 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5058 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5059 5060 Output Parameter: 5061 + rowb, colb - index sets of rows and columns of B to extract 5062 - B_seq - the sequential matrix generated 5063 5064 Level: developer 5065 5066 @*/ 5067 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5068 { 5069 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5070 PetscErrorCode ierr; 5071 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5072 IS isrowb,iscolb; 5073 Mat *bseq=PETSC_NULL; 5074 5075 PetscFunctionBegin; 5076 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5077 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5078 } 5079 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5080 5081 if (scall == MAT_INITIAL_MATRIX){ 5082 start = A->cmap->rstart; 5083 cmap = a->garray; 5084 nzA = a->A->cmap->n; 5085 nzB = a->B->cmap->n; 5086 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5087 ncols = 0; 5088 for (i=0; i<nzB; i++) { /* row < local row index */ 5089 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5090 else break; 5091 } 5092 imark = i; 5093 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5094 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5095 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5096 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5097 } else { 5098 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5099 isrowb = *rowb; iscolb = *colb; 5100 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5101 bseq[0] = *B_seq; 5102 } 5103 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5104 *B_seq = bseq[0]; 5105 ierr = PetscFree(bseq);CHKERRQ(ierr); 5106 if (!rowb){ 5107 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5108 } else { 5109 *rowb = isrowb; 5110 } 5111 if (!colb){ 5112 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5113 } else { 5114 *colb = iscolb; 5115 } 5116 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5117 PetscFunctionReturn(0); 5118 } 5119 5120 #undef __FUNCT__ 5121 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5122 /* 5123 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5124 of the OFF-DIAGONAL portion of local A 5125 5126 Collective on Mat 5127 5128 Input Parameters: 5129 + A,B - the matrices in mpiaij format 5130 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5131 5132 Output Parameter: 5133 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5134 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5135 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5136 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5137 5138 Level: developer 5139 5140 */ 5141 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5142 { 5143 VecScatter_MPI_General *gen_to,*gen_from; 5144 PetscErrorCode ierr; 5145 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5146 Mat_SeqAIJ *b_oth; 5147 VecScatter ctx=a->Mvctx; 5148 MPI_Comm comm=((PetscObject)ctx)->comm; 5149 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5150 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5151 PetscScalar *rvalues,*svalues; 5152 MatScalar *b_otha,*bufa,*bufA; 5153 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5154 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5155 MPI_Status *sstatus,rstatus; 5156 PetscMPIInt jj; 5157 PetscInt *cols,sbs,rbs; 5158 PetscScalar *vals; 5159 5160 PetscFunctionBegin; 5161 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5162 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5163 } 5164 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5165 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5166 5167 gen_to = (VecScatter_MPI_General*)ctx->todata; 5168 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5169 rvalues = gen_from->values; /* holds the length of receiving row */ 5170 svalues = gen_to->values; /* holds the length of sending row */ 5171 nrecvs = gen_from->n; 5172 nsends = gen_to->n; 5173 5174 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5175 srow = gen_to->indices; /* local row index to be sent */ 5176 sstarts = gen_to->starts; 5177 sprocs = gen_to->procs; 5178 sstatus = gen_to->sstatus; 5179 sbs = gen_to->bs; 5180 rstarts = gen_from->starts; 5181 rprocs = gen_from->procs; 5182 rbs = gen_from->bs; 5183 5184 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5185 if (scall == MAT_INITIAL_MATRIX){ 5186 /* i-array */ 5187 /*---------*/ 5188 /* post receives */ 5189 for (i=0; i<nrecvs; i++){ 5190 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5191 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5192 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5193 } 5194 5195 /* pack the outgoing message */ 5196 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5197 sstartsj[0] = 0; rstartsj[0] = 0; 5198 len = 0; /* total length of j or a array to be sent */ 5199 k = 0; 5200 for (i=0; i<nsends; i++){ 5201 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5202 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5203 for (j=0; j<nrows; j++) { 5204 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5205 for (l=0; l<sbs; l++){ 5206 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5207 rowlen[j*sbs+l] = ncols; 5208 len += ncols; 5209 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5210 } 5211 k++; 5212 } 5213 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5214 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5215 } 5216 /* recvs and sends of i-array are completed */ 5217 i = nrecvs; 5218 while (i--) { 5219 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5220 } 5221 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5222 5223 /* allocate buffers for sending j and a arrays */ 5224 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5225 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5226 5227 /* create i-array of B_oth */ 5228 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5229 b_othi[0] = 0; 5230 len = 0; /* total length of j or a array to be received */ 5231 k = 0; 5232 for (i=0; i<nrecvs; i++){ 5233 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5234 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5235 for (j=0; j<nrows; j++) { 5236 b_othi[k+1] = b_othi[k] + rowlen[j]; 5237 len += rowlen[j]; k++; 5238 } 5239 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5240 } 5241 5242 /* allocate space for j and a arrrays of B_oth */ 5243 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5244 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5245 5246 /* j-array */ 5247 /*---------*/ 5248 /* post receives of j-array */ 5249 for (i=0; i<nrecvs; i++){ 5250 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5251 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5252 } 5253 5254 /* pack the outgoing message j-array */ 5255 k = 0; 5256 for (i=0; i<nsends; i++){ 5257 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5258 bufJ = bufj+sstartsj[i]; 5259 for (j=0; j<nrows; j++) { 5260 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5261 for (ll=0; ll<sbs; ll++){ 5262 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5263 for (l=0; l<ncols; l++){ 5264 *bufJ++ = cols[l]; 5265 } 5266 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5267 } 5268 } 5269 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5270 } 5271 5272 /* recvs and sends of j-array are completed */ 5273 i = nrecvs; 5274 while (i--) { 5275 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5276 } 5277 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5278 } else if (scall == MAT_REUSE_MATRIX){ 5279 sstartsj = *startsj_s; 5280 rstartsj = *startsj_r; 5281 bufa = *bufa_ptr; 5282 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5283 b_otha = b_oth->a; 5284 } else { 5285 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5286 } 5287 5288 /* a-array */ 5289 /*---------*/ 5290 /* post receives of a-array */ 5291 for (i=0; i<nrecvs; i++){ 5292 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5293 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5294 } 5295 5296 /* pack the outgoing message a-array */ 5297 k = 0; 5298 for (i=0; i<nsends; i++){ 5299 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5300 bufA = bufa+sstartsj[i]; 5301 for (j=0; j<nrows; j++) { 5302 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5303 for (ll=0; ll<sbs; ll++){ 5304 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5305 for (l=0; l<ncols; l++){ 5306 *bufA++ = vals[l]; 5307 } 5308 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5309 } 5310 } 5311 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5312 } 5313 /* recvs and sends of a-array are completed */ 5314 i = nrecvs; 5315 while (i--) { 5316 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5317 } 5318 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5319 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5320 5321 if (scall == MAT_INITIAL_MATRIX){ 5322 /* put together the new matrix */ 5323 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5324 5325 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5326 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5327 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5328 b_oth->free_a = PETSC_TRUE; 5329 b_oth->free_ij = PETSC_TRUE; 5330 b_oth->nonew = 0; 5331 5332 ierr = PetscFree(bufj);CHKERRQ(ierr); 5333 if (!startsj_s || !bufa_ptr){ 5334 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5335 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5336 } else { 5337 *startsj_s = sstartsj; 5338 *startsj_r = rstartsj; 5339 *bufa_ptr = bufa; 5340 } 5341 } 5342 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5343 PetscFunctionReturn(0); 5344 } 5345 5346 #undef __FUNCT__ 5347 #define __FUNCT__ "MatGetCommunicationStructs" 5348 /*@C 5349 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5350 5351 Not Collective 5352 5353 Input Parameters: 5354 . A - The matrix in mpiaij format 5355 5356 Output Parameter: 5357 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5358 . colmap - A map from global column index to local index into lvec 5359 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5360 5361 Level: developer 5362 5363 @*/ 5364 #if defined (PETSC_USE_CTABLE) 5365 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5366 #else 5367 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5368 #endif 5369 { 5370 Mat_MPIAIJ *a; 5371 5372 PetscFunctionBegin; 5373 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5374 PetscValidPointer(lvec, 2); 5375 PetscValidPointer(colmap, 3); 5376 PetscValidPointer(multScatter, 4); 5377 a = (Mat_MPIAIJ *) A->data; 5378 if (lvec) *lvec = a->lvec; 5379 if (colmap) *colmap = a->colmap; 5380 if (multScatter) *multScatter = a->Mvctx; 5381 PetscFunctionReturn(0); 5382 } 5383 5384 EXTERN_C_BEGIN 5385 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5386 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5387 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5388 EXTERN_C_END 5389 5390 #undef __FUNCT__ 5391 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5392 /* 5393 Computes (B'*A')' since computing B*A directly is untenable 5394 5395 n p p 5396 ( ) ( ) ( ) 5397 m ( A ) * n ( B ) = m ( C ) 5398 ( ) ( ) ( ) 5399 5400 */ 5401 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5402 { 5403 PetscErrorCode ierr; 5404 Mat At,Bt,Ct; 5405 5406 PetscFunctionBegin; 5407 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5408 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5409 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5410 ierr = MatDestroy(&At);CHKERRQ(ierr); 5411 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5412 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5413 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5414 PetscFunctionReturn(0); 5415 } 5416 5417 #undef __FUNCT__ 5418 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5419 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5420 { 5421 PetscErrorCode ierr; 5422 PetscInt m=A->rmap->n,n=B->cmap->n; 5423 Mat Cmat; 5424 5425 PetscFunctionBegin; 5426 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5427 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5428 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5429 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5430 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5431 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5432 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5433 *C = Cmat; 5434 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5435 PetscFunctionReturn(0); 5436 } 5437 5438 /* ----------------------------------------------------------------*/ 5439 #undef __FUNCT__ 5440 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5441 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5442 { 5443 PetscErrorCode ierr; 5444 5445 PetscFunctionBegin; 5446 if (scall == MAT_INITIAL_MATRIX){ 5447 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5448 } 5449 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5450 PetscFunctionReturn(0); 5451 } 5452 5453 EXTERN_C_BEGIN 5454 #if defined(PETSC_HAVE_MUMPS) 5455 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5456 #endif 5457 #if defined(PETSC_HAVE_PASTIX) 5458 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5459 #endif 5460 #if defined(PETSC_HAVE_SUPERLU_DIST) 5461 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5462 #endif 5463 #if defined(PETSC_HAVE_SPOOLES) 5464 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5465 #endif 5466 EXTERN_C_END 5467 5468 /*MC 5469 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5470 5471 Options Database Keys: 5472 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5473 5474 Level: beginner 5475 5476 .seealso: MatCreateAIJ() 5477 M*/ 5478 5479 EXTERN_C_BEGIN 5480 #undef __FUNCT__ 5481 #define __FUNCT__ "MatCreate_MPIAIJ" 5482 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5483 { 5484 Mat_MPIAIJ *b; 5485 PetscErrorCode ierr; 5486 PetscMPIInt size; 5487 5488 PetscFunctionBegin; 5489 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5490 5491 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5492 B->data = (void*)b; 5493 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5494 B->assembled = PETSC_FALSE; 5495 5496 B->insertmode = NOT_SET_VALUES; 5497 b->size = size; 5498 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5499 5500 /* build cache for off array entries formed */ 5501 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5502 b->donotstash = PETSC_FALSE; 5503 b->colmap = 0; 5504 b->garray = 0; 5505 b->roworiented = PETSC_TRUE; 5506 5507 /* stuff used for matrix vector multiply */ 5508 b->lvec = PETSC_NULL; 5509 b->Mvctx = PETSC_NULL; 5510 5511 /* stuff for MatGetRow() */ 5512 b->rowindices = 0; 5513 b->rowvalues = 0; 5514 b->getrowactive = PETSC_FALSE; 5515 5516 #if defined(PETSC_HAVE_SPOOLES) 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5518 "MatGetFactor_mpiaij_spooles", 5519 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5520 #endif 5521 #if defined(PETSC_HAVE_MUMPS) 5522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5523 "MatGetFactor_aij_mumps", 5524 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5525 #endif 5526 #if defined(PETSC_HAVE_PASTIX) 5527 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5528 "MatGetFactor_mpiaij_pastix", 5529 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5530 #endif 5531 #if defined(PETSC_HAVE_SUPERLU_DIST) 5532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5533 "MatGetFactor_mpiaij_superlu_dist", 5534 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5535 #endif 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5537 "MatStoreValues_MPIAIJ", 5538 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5540 "MatRetrieveValues_MPIAIJ", 5541 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5543 "MatGetDiagonalBlock_MPIAIJ", 5544 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5546 "MatIsTranspose_MPIAIJ", 5547 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5549 "MatMPIAIJSetPreallocation_MPIAIJ", 5550 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5551 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5552 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5553 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5554 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5555 "MatDiagonalScaleLocal_MPIAIJ", 5556 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5557 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5558 "MatConvert_MPIAIJ_MPIAIJPERM", 5559 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5560 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5561 "MatConvert_MPIAIJ_MPIAIJCRL", 5562 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5563 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5564 "MatConvert_MPIAIJ_MPISBAIJ", 5565 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5567 "MatMatMult_MPIDense_MPIAIJ", 5568 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5569 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5570 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5571 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5573 "MatMatMultNumeric_MPIDense_MPIAIJ", 5574 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5575 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5576 PetscFunctionReturn(0); 5577 } 5578 EXTERN_C_END 5579 5580 #undef __FUNCT__ 5581 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5582 /*@ 5583 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5584 and "off-diagonal" part of the matrix in CSR format. 5585 5586 Collective on MPI_Comm 5587 5588 Input Parameters: 5589 + comm - MPI communicator 5590 . m - number of local rows (Cannot be PETSC_DECIDE) 5591 . n - This value should be the same as the local size used in creating the 5592 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5593 calculated if N is given) For square matrices n is almost always m. 5594 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5595 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5596 . i - row indices for "diagonal" portion of matrix 5597 . j - column indices 5598 . a - matrix values 5599 . oi - row indices for "off-diagonal" portion of matrix 5600 . oj - column indices 5601 - oa - matrix values 5602 5603 Output Parameter: 5604 . mat - the matrix 5605 5606 Level: advanced 5607 5608 Notes: 5609 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5610 must free the arrays once the matrix has been destroyed and not before. 5611 5612 The i and j indices are 0 based 5613 5614 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5615 5616 This sets local rows and cannot be used to set off-processor values. 5617 5618 You cannot later use MatSetValues() to change values in this matrix. 5619 5620 .keywords: matrix, aij, compressed row, sparse, parallel 5621 5622 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5623 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5624 @*/ 5625 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5626 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5627 { 5628 PetscErrorCode ierr; 5629 Mat_MPIAIJ *maij; 5630 5631 PetscFunctionBegin; 5632 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5633 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5634 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5635 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5636 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5637 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5638 maij = (Mat_MPIAIJ*) (*mat)->data; 5639 maij->donotstash = PETSC_TRUE; 5640 (*mat)->preallocated = PETSC_TRUE; 5641 5642 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5643 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5644 5645 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5646 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5647 5648 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5649 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5650 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5651 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5652 5653 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5654 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5655 PetscFunctionReturn(0); 5656 } 5657 5658 /* 5659 Special version for direct calls from Fortran 5660 */ 5661 #include <petsc-private/fortranimpl.h> 5662 5663 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5664 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5665 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5666 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5667 #endif 5668 5669 /* Change these macros so can be used in void function */ 5670 #undef CHKERRQ 5671 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5672 #undef SETERRQ2 5673 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5674 #undef SETERRQ3 5675 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5676 #undef SETERRQ 5677 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5678 5679 EXTERN_C_BEGIN 5680 #undef __FUNCT__ 5681 #define __FUNCT__ "matsetvaluesmpiaij_" 5682 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5683 { 5684 Mat mat = *mmat; 5685 PetscInt m = *mm, n = *mn; 5686 InsertMode addv = *maddv; 5687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5688 PetscScalar value; 5689 PetscErrorCode ierr; 5690 5691 MatCheckPreallocated(mat,1); 5692 if (mat->insertmode == NOT_SET_VALUES) { 5693 mat->insertmode = addv; 5694 } 5695 #if defined(PETSC_USE_DEBUG) 5696 else if (mat->insertmode != addv) { 5697 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5698 } 5699 #endif 5700 { 5701 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5702 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5703 PetscBool roworiented = aij->roworiented; 5704 5705 /* Some Variables required in the macro */ 5706 Mat A = aij->A; 5707 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5708 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5709 MatScalar *aa = a->a; 5710 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5711 Mat B = aij->B; 5712 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5713 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5714 MatScalar *ba = b->a; 5715 5716 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5717 PetscInt nonew = a->nonew; 5718 MatScalar *ap1,*ap2; 5719 5720 PetscFunctionBegin; 5721 for (i=0; i<m; i++) { 5722 if (im[i] < 0) continue; 5723 #if defined(PETSC_USE_DEBUG) 5724 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5725 #endif 5726 if (im[i] >= rstart && im[i] < rend) { 5727 row = im[i] - rstart; 5728 lastcol1 = -1; 5729 rp1 = aj + ai[row]; 5730 ap1 = aa + ai[row]; 5731 rmax1 = aimax[row]; 5732 nrow1 = ailen[row]; 5733 low1 = 0; 5734 high1 = nrow1; 5735 lastcol2 = -1; 5736 rp2 = bj + bi[row]; 5737 ap2 = ba + bi[row]; 5738 rmax2 = bimax[row]; 5739 nrow2 = bilen[row]; 5740 low2 = 0; 5741 high2 = nrow2; 5742 5743 for (j=0; j<n; j++) { 5744 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5745 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5746 if (in[j] >= cstart && in[j] < cend){ 5747 col = in[j] - cstart; 5748 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5749 } else if (in[j] < 0) continue; 5750 #if defined(PETSC_USE_DEBUG) 5751 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5752 #endif 5753 else { 5754 if (mat->was_assembled) { 5755 if (!aij->colmap) { 5756 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5757 } 5758 #if defined (PETSC_USE_CTABLE) 5759 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5760 col--; 5761 #else 5762 col = aij->colmap[in[j]] - 1; 5763 #endif 5764 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5765 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5766 col = in[j]; 5767 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5768 B = aij->B; 5769 b = (Mat_SeqAIJ*)B->data; 5770 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5771 rp2 = bj + bi[row]; 5772 ap2 = ba + bi[row]; 5773 rmax2 = bimax[row]; 5774 nrow2 = bilen[row]; 5775 low2 = 0; 5776 high2 = nrow2; 5777 bm = aij->B->rmap->n; 5778 ba = b->a; 5779 } 5780 } else col = in[j]; 5781 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5782 } 5783 } 5784 } else { 5785 if (!aij->donotstash) { 5786 if (roworiented) { 5787 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5788 } else { 5789 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5790 } 5791 } 5792 } 5793 }} 5794 PetscFunctionReturnVoid(); 5795 } 5796 EXTERN_C_END 5797 5798