1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_ROW_ORIENTED: 1797 a->roworiented = flg; 1798 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1799 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_NEW_DIAGONALS: 1802 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1803 break; 1804 case MAT_IGNORE_OFF_PROC_ENTRIES: 1805 a->donotstash = flg; 1806 break; 1807 case MAT_SPD: 1808 A->spd_set = PETSC_TRUE; 1809 A->spd = flg; 1810 if (flg) { 1811 A->symmetric = PETSC_TRUE; 1812 A->structurally_symmetric = PETSC_TRUE; 1813 A->symmetric_set = PETSC_TRUE; 1814 A->structurally_symmetric_set = PETSC_TRUE; 1815 } 1816 break; 1817 case MAT_SYMMETRIC: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_STRUCTURALLY_SYMMETRIC: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_HERMITIAN: 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_SYMMETRY_ETERNAL: 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 break; 1829 default: 1830 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatGetRow_MPIAIJ" 1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1838 { 1839 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1840 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1841 PetscErrorCode ierr; 1842 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1843 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1844 PetscInt *cmap,*idx_p; 1845 1846 PetscFunctionBegin; 1847 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1848 mat->getrowactive = PETSC_TRUE; 1849 1850 if (!mat->rowvalues && (idx || v)) { 1851 /* 1852 allocate enough space to hold information from the longest row. 1853 */ 1854 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1855 PetscInt max = 1,tmp; 1856 for (i=0; i<matin->rmap->n; i++) { 1857 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1858 if (max < tmp) { max = tmp; } 1859 } 1860 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1861 } 1862 1863 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1864 lrow = row - rstart; 1865 1866 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1867 if (!v) {pvA = 0; pvB = 0;} 1868 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1869 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 nztot = nzA + nzB; 1872 1873 cmap = mat->garray; 1874 if (v || idx) { 1875 if (nztot) { 1876 /* Sort by increasing column numbers, assuming A and B already sorted */ 1877 PetscInt imark = -1; 1878 if (v) { 1879 *v = v_p = mat->rowvalues; 1880 for (i=0; i<nzB; i++) { 1881 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1882 else break; 1883 } 1884 imark = i; 1885 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1886 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1887 } 1888 if (idx) { 1889 *idx = idx_p = mat->rowindices; 1890 if (imark > -1) { 1891 for (i=0; i<imark; i++) { 1892 idx_p[i] = cmap[cworkB[i]]; 1893 } 1894 } else { 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1897 else break; 1898 } 1899 imark = i; 1900 } 1901 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1902 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1903 } 1904 } else { 1905 if (idx) *idx = 0; 1906 if (v) *v = 0; 1907 } 1908 } 1909 *nz = nztot; 1910 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1917 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1918 { 1919 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1920 1921 PetscFunctionBegin; 1922 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1923 aij->getrowactive = PETSC_FALSE; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatNorm_MPIAIJ" 1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1933 PetscErrorCode ierr; 1934 PetscInt i,j,cstart = mat->cmap->rstart; 1935 PetscReal sum = 0.0; 1936 MatScalar *v; 1937 1938 PetscFunctionBegin; 1939 if (aij->size == 1) { 1940 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1941 } else { 1942 if (type == NORM_FROBENIUS) { 1943 v = amat->a; 1944 for (i=0; i<amat->nz; i++) { 1945 #if defined(PETSC_USE_COMPLEX) 1946 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1947 #else 1948 sum += (*v)*(*v); v++; 1949 #endif 1950 } 1951 v = bmat->a; 1952 for (i=0; i<bmat->nz; i++) { 1953 #if defined(PETSC_USE_COMPLEX) 1954 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1955 #else 1956 sum += (*v)*(*v); v++; 1957 #endif 1958 } 1959 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1960 *norm = PetscSqrtReal(*norm); 1961 } else if (type == NORM_1) { /* max column norm */ 1962 PetscReal *tmp,*tmp2; 1963 PetscInt *jj,*garray = aij->garray; 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1965 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1966 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1967 *norm = 0.0; 1968 v = amat->a; jj = amat->j; 1969 for (j=0; j<amat->nz; j++) { 1970 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1971 } 1972 v = bmat->a; jj = bmat->j; 1973 for (j=0; j<bmat->nz; j++) { 1974 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1975 } 1976 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1977 for (j=0; j<mat->cmap->N; j++) { 1978 if (tmp2[j] > *norm) *norm = tmp2[j]; 1979 } 1980 ierr = PetscFree(tmp);CHKERRQ(ierr); 1981 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1982 } else if (type == NORM_INFINITY) { /* max row norm */ 1983 PetscReal ntemp = 0.0; 1984 for (j=0; j<aij->A->rmap->n; j++) { 1985 v = amat->a + amat->i[j]; 1986 sum = 0.0; 1987 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a + bmat->i[j]; 1991 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 if (sum > ntemp) ntemp = sum; 1995 } 1996 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1997 } else { 1998 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1999 } 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatTranspose_MPIAIJ" 2006 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2007 { 2008 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2009 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2010 PetscErrorCode ierr; 2011 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2012 PetscInt cstart=A->cmap->rstart,ncol; 2013 Mat B; 2014 MatScalar *array; 2015 2016 PetscFunctionBegin; 2017 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2018 2019 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2020 ai = Aloc->i; aj = Aloc->j; 2021 bi = Bloc->i; bj = Bloc->j; 2022 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2023 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2024 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2025 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2026 for (i=0; i<ai[ma]; i++){ 2027 d_nnz[aj[i]] ++; 2028 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2029 } 2030 2031 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2032 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2033 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2034 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2035 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2036 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2037 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2038 } else { 2039 B = *matout; 2040 } 2041 2042 /* copy over the A part */ 2043 array = Aloc->a; 2044 row = A->rmap->rstart; 2045 for (i=0; i<ma; i++) { 2046 ncol = ai[i+1]-ai[i]; 2047 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2048 row++; array += ncol; aj += ncol; 2049 } 2050 aj = Aloc->j; 2051 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2052 2053 /* copy over the B part */ 2054 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2055 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2056 array = Bloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2059 cols_tmp = cols; 2060 for (i=0; i<mb; i++) { 2061 ncol = bi[i+1]-bi[i]; 2062 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2063 row++; array += ncol; cols_tmp += ncol; 2064 } 2065 ierr = PetscFree(cols);CHKERRQ(ierr); 2066 2067 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2070 *matout = B; 2071 } else { 2072 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2079 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2080 { 2081 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2082 Mat a = aij->A,b = aij->B; 2083 PetscErrorCode ierr; 2084 PetscInt s1,s2,s3; 2085 2086 PetscFunctionBegin; 2087 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2088 if (rr) { 2089 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2090 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2091 /* Overlap communication with computation. */ 2092 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 } 2094 if (ll) { 2095 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2096 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2097 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2098 } 2099 /* scale the diagonal block */ 2100 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2101 2102 if (rr) { 2103 /* Do a scatter end and then right scale the off-diagonal block */ 2104 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2106 } 2107 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2114 { 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatEqual_MPIAIJ" 2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2126 { 2127 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2128 Mat a,b,c,d; 2129 PetscBool flg; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 a = matA->A; b = matA->B; 2134 c = matB->A; d = matB->B; 2135 2136 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2137 if (flg) { 2138 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2139 } 2140 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatCopy_MPIAIJ" 2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2147 { 2148 PetscErrorCode ierr; 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2151 2152 PetscFunctionBegin; 2153 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2154 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2155 /* because of the column compression in the off-processor part of the matrix a->B, 2156 the number of columns in a->B and b->B may be different, hence we cannot call 2157 the MatCopy() directly on the two parts. If need be, we can provide a more 2158 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2159 then copying the submatrices */ 2160 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2161 } else { 2162 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2163 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatSetUp_MPIAIJ" 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2183 { 2184 PetscInt i,m=Y->rmap->N; 2185 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2186 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2187 const PetscInt *xi = x->i,*yi = y->i; 2188 2189 PetscFunctionBegin; 2190 /* Set the number of nonzeros in the new matrix */ 2191 for(i=0; i<m; i++) { 2192 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2193 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2194 nnz[i] = 0; 2195 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2196 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2197 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2198 nnz[i]++; 2199 } 2200 for (; k<nzy; k++) nnz[i]++; 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatAXPY_MPIAIJ" 2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2208 { 2209 PetscErrorCode ierr; 2210 PetscInt i; 2211 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2212 PetscBLASInt bnz,one=1; 2213 Mat_SeqAIJ *x,*y; 2214 2215 PetscFunctionBegin; 2216 if (str == SAME_NONZERO_PATTERN) { 2217 PetscScalar alpha = a; 2218 x = (Mat_SeqAIJ *)xx->A->data; 2219 y = (Mat_SeqAIJ *)yy->A->data; 2220 bnz = PetscBLASIntCast(x->nz); 2221 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2222 x = (Mat_SeqAIJ *)xx->B->data; 2223 y = (Mat_SeqAIJ *)yy->B->data; 2224 bnz = PetscBLASIntCast(x->nz); 2225 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2226 } else if (str == SUBSET_NONZERO_PATTERN) { 2227 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2228 2229 x = (Mat_SeqAIJ *)xx->B->data; 2230 y = (Mat_SeqAIJ *)yy->B->data; 2231 if (y->xtoy && y->XtoY != xx->B) { 2232 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2233 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2234 } 2235 if (!y->xtoy) { /* get xtoy */ 2236 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2237 y->XtoY = xx->B; 2238 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2239 } 2240 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2241 } else { 2242 Mat B; 2243 PetscInt *nnz_d,*nnz_o; 2244 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2245 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2246 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2247 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2248 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2249 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,B); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatConjugate_MPIAIJ" 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatRealPart_MPIAIJ" 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2303 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #ifdef PETSC_HAVE_PBGL 2308 2309 #include <boost/parallel/mpi/bsp_process_group.hpp> 2310 #include <boost/graph/distributed/ilu_default_graph.hpp> 2311 #include <boost/graph/distributed/ilu_0_block.hpp> 2312 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2313 #include <boost/graph/distributed/petsc/interface.hpp> 2314 #include <boost/multi_array.hpp> 2315 #include <boost/parallel/distributed_property_map->hpp> 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2319 /* 2320 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2321 */ 2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2323 { 2324 namespace petsc = boost::distributed::petsc; 2325 2326 namespace graph_dist = boost::graph::distributed; 2327 using boost::graph::distributed::ilu_default::process_group_type; 2328 using boost::graph::ilu_permuted; 2329 2330 PetscBool row_identity, col_identity; 2331 PetscContainer c; 2332 PetscInt m, n, M, N; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2337 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2338 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2339 if (!row_identity || !col_identity) { 2340 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2341 } 2342 2343 process_group_type pg; 2344 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2345 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2346 lgraph_type& level_graph = *lgraph_p; 2347 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2348 2349 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2350 ilu_permuted(level_graph); 2351 2352 /* put together the new matrix */ 2353 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2354 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2355 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2356 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2357 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2358 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2363 ierr = PetscContainerSetPointer(c, lgraph_p); 2364 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2365 ierr = PetscContainerDestroy(&c); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2372 { 2373 PetscFunctionBegin; 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSolve_MPIAIJ" 2379 /* 2380 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2381 */ 2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2383 { 2384 namespace graph_dist = boost::graph::distributed; 2385 2386 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2387 lgraph_type* lgraph_p; 2388 PetscContainer c; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2393 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2394 ierr = VecCopy(b, x);CHKERRQ(ierr); 2395 2396 PetscScalar* array_x; 2397 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2398 PetscInt sx; 2399 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2400 2401 PetscScalar* array_b; 2402 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2403 PetscInt sb; 2404 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2405 2406 lgraph_type& level_graph = *lgraph_p; 2407 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2408 2409 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2410 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2411 ref_x(array_x, boost::extents[num_vertices(graph)]); 2412 2413 typedef boost::iterator_property_map<array_ref_type::iterator, 2414 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2415 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2416 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2417 2418 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2419 2420 PetscFunctionReturn(0); 2421 } 2422 #endif 2423 2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2425 PetscInt nzlocal,nsends,nrecvs; 2426 PetscMPIInt *send_rank,*recv_rank; 2427 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2428 PetscScalar *sbuf_a,**rbuf_a; 2429 PetscErrorCode (*Destroy)(Mat); 2430 } Mat_Redundant; 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2444 for (i=0; i<redund->nrecvs; i++){ 2445 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2446 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2449 ierr = PetscFree(redund);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatDestroy_MatRedundant" 2455 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2456 { 2457 PetscErrorCode ierr; 2458 PetscContainer container; 2459 Mat_Redundant *redund=PETSC_NULL; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2463 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2464 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2465 A->ops->destroy = redund->Destroy; 2466 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2467 if (A->ops->destroy) { 2468 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2476 { 2477 PetscMPIInt rank,size; 2478 MPI_Comm comm=((PetscObject)mat)->comm; 2479 PetscErrorCode ierr; 2480 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2481 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2482 PetscInt *rowrange=mat->rmap->range; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2484 Mat A=aij->A,B=aij->B,C=*matredundant; 2485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2486 PetscScalar *sbuf_a; 2487 PetscInt nzlocal=a->nz+b->nz; 2488 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2489 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2490 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2491 MatScalar *aworkA,*aworkB; 2492 PetscScalar *vals; 2493 PetscMPIInt tag1,tag2,tag3,imdex; 2494 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2495 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2496 MPI_Status recv_status,*send_status; 2497 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2498 PetscInt **rbuf_j=PETSC_NULL; 2499 PetscScalar **rbuf_a=PETSC_NULL; 2500 Mat_Redundant *redund=PETSC_NULL; 2501 PetscContainer container; 2502 2503 PetscFunctionBegin; 2504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 2507 if (reuse == MAT_REUSE_MATRIX) { 2508 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2509 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2510 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2511 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2512 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2513 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2514 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2515 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2516 2517 nsends = redund->nsends; 2518 nrecvs = redund->nrecvs; 2519 send_rank = redund->send_rank; 2520 recv_rank = redund->recv_rank; 2521 sbuf_nz = redund->sbuf_nz; 2522 rbuf_nz = redund->rbuf_nz; 2523 sbuf_j = redund->sbuf_j; 2524 sbuf_a = redund->sbuf_a; 2525 rbuf_j = redund->rbuf_j; 2526 rbuf_a = redund->rbuf_a; 2527 } 2528 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 PetscMPIInt subrank,subsize; 2531 PetscInt nleftover,np_subcomm; 2532 /* get the destination processors' id send_rank, nsends and nrecvs */ 2533 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2535 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2536 np_subcomm = size/nsubcomm; 2537 nleftover = size - nsubcomm*np_subcomm; 2538 nsends = 0; nrecvs = 0; 2539 for (i=0; i<size; i++){ /* i=rank*/ 2540 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2541 send_rank[nsends] = i; nsends++; 2542 recv_rank[nrecvs++] = i; 2543 } 2544 } 2545 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2546 i = size-nleftover-1; 2547 j = 0; 2548 while (j < nsubcomm - nleftover){ 2549 send_rank[nsends++] = i; 2550 i--; j++; 2551 } 2552 } 2553 2554 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2555 for (i=0; i<nleftover; i++){ 2556 recv_rank[nrecvs++] = size-nleftover+i; 2557 } 2558 } 2559 2560 /* allocate sbuf_j, sbuf_a */ 2561 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2562 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2563 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2564 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2565 2566 /* copy mat's local entries into the buffers */ 2567 if (reuse == MAT_INITIAL_MATRIX){ 2568 rownz_max = 0; 2569 rptr = sbuf_j; 2570 cols = sbuf_j + rend-rstart + 1; 2571 vals = sbuf_a; 2572 rptr[0] = 0; 2573 for (i=0; i<rend-rstart; i++){ 2574 row = i + rstart; 2575 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2576 ncols = nzA + nzB; 2577 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2578 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2579 /* load the column indices for this row into cols */ 2580 lwrite = 0; 2581 for (l=0; l<nzB; l++) { 2582 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2583 vals[lwrite] = aworkB[l]; 2584 cols[lwrite++] = ctmp; 2585 } 2586 } 2587 for (l=0; l<nzA; l++){ 2588 vals[lwrite] = aworkA[l]; 2589 cols[lwrite++] = cstart + cworkA[l]; 2590 } 2591 for (l=0; l<nzB; l++) { 2592 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2593 vals[lwrite] = aworkB[l]; 2594 cols[lwrite++] = ctmp; 2595 } 2596 } 2597 vals += ncols; 2598 cols += ncols; 2599 rptr[i+1] = rptr[i] + ncols; 2600 if (rownz_max < ncols) rownz_max = ncols; 2601 } 2602 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2603 } else { /* only copy matrix values into sbuf_a */ 2604 rptr = sbuf_j; 2605 vals = sbuf_a; 2606 rptr[0] = 0; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2610 ncols = nzA + nzB; 2611 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2612 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2616 } 2617 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2620 } 2621 vals += ncols; 2622 rptr[i+1] = rptr[i] + ncols; 2623 } 2624 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2625 2626 /* send nzlocal to others, and recv other's nzlocal */ 2627 /*--------------------------------------------------*/ 2628 if (reuse == MAT_INITIAL_MATRIX){ 2629 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2630 s_waits2 = s_waits3 + nsends; 2631 s_waits1 = s_waits2 + nsends; 2632 r_waits1 = s_waits1 + nsends; 2633 r_waits2 = r_waits1 + nrecvs; 2634 r_waits3 = r_waits2 + nrecvs; 2635 } else { 2636 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 r_waits3 = s_waits3 + nsends; 2638 } 2639 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 /* get new tags to keep the communication clean */ 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2645 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2646 2647 /* post receives of other's nzlocal */ 2648 for (i=0; i<nrecvs; i++){ 2649 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* send nzlocal to others */ 2652 for (i=0; i<nsends; i++){ 2653 sbuf_nz[i] = nzlocal; 2654 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2657 count = nrecvs; 2658 while (count) { 2659 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2660 recv_rank[imdex] = recv_status.MPI_SOURCE; 2661 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2662 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2663 2664 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2665 rbuf_nz[imdex] += i + 2; 2666 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2667 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2668 count--; 2669 } 2670 /* wait on sends of nzlocal */ 2671 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2672 /* send mat->i,j to others, and recv from other's */ 2673 /*------------------------------------------------*/ 2674 for (i=0; i<nsends; i++){ 2675 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2676 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of mat->i,j */ 2679 /*------------------------------*/ 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 /* wait on sends of mat->i,j */ 2687 /*---------------------------*/ 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2690 } 2691 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2692 2693 /* post receives, send and receive mat->a */ 2694 /*----------------------------------------*/ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2697 } 2698 for (i=0; i<nsends; i++){ 2699 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2700 } 2701 count = nrecvs; 2702 while (count) { 2703 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2704 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2705 count--; 2706 } 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2712 2713 /* create redundant matrix */ 2714 /*-------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX){ 2716 /* compute rownz_max for preallocation */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2719 rptr = rbuf_j[imdex]; 2720 for (i=0; i<j; i++){ 2721 ncols = rptr[i+1] - rptr[i]; 2722 if (rownz_max < ncols) rownz_max = ncols; 2723 } 2724 } 2725 2726 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2727 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2728 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2729 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2730 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2731 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2732 } else { 2733 C = *matredundant; 2734 } 2735 2736 /* insert local matrix entries */ 2737 rptr = sbuf_j; 2738 cols = sbuf_j + rend-rstart + 1; 2739 vals = sbuf_a; 2740 for (i=0; i<rend-rstart; i++){ 2741 row = i + rstart; 2742 ncols = rptr[i+1] - rptr[i]; 2743 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2744 vals += ncols; 2745 cols += ncols; 2746 } 2747 /* insert received matrix entries */ 2748 for (imdex=0; imdex<nrecvs; imdex++){ 2749 rstart = rowrange[recv_rank[imdex]]; 2750 rend = rowrange[recv_rank[imdex]+1]; 2751 rptr = rbuf_j[imdex]; 2752 cols = rbuf_j[imdex] + rend-rstart + 1; 2753 vals = rbuf_a[imdex]; 2754 for (i=0; i<rend-rstart; i++){ 2755 row = i + rstart; 2756 ncols = rptr[i+1] - rptr[i]; 2757 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2758 vals += ncols; 2759 cols += ncols; 2760 } 2761 } 2762 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2765 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2766 if (reuse == MAT_INITIAL_MATRIX) { 2767 PetscContainer container; 2768 *matredundant = C; 2769 /* create a supporting struct and attach it to C for reuse */ 2770 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2771 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2772 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2773 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2774 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2775 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2776 2777 redund->nzlocal = nzlocal; 2778 redund->nsends = nsends; 2779 redund->nrecvs = nrecvs; 2780 redund->send_rank = send_rank; 2781 redund->recv_rank = recv_rank; 2782 redund->sbuf_nz = sbuf_nz; 2783 redund->rbuf_nz = rbuf_nz; 2784 redund->sbuf_j = sbuf_j; 2785 redund->sbuf_a = sbuf_a; 2786 redund->rbuf_j = rbuf_j; 2787 redund->rbuf_a = rbuf_a; 2788 2789 redund->Destroy = C->ops->destroy; 2790 C->ops->destroy = MatDestroy_MatRedundant; 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2797 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2798 { 2799 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2800 PetscErrorCode ierr; 2801 PetscInt i,*idxb = 0; 2802 PetscScalar *va,*vb; 2803 Vec vtmp; 2804 2805 PetscFunctionBegin; 2806 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2807 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2808 if (idx) { 2809 for (i=0; i<A->rmap->n; i++) { 2810 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2811 } 2812 } 2813 2814 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2815 if (idx) { 2816 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2817 } 2818 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2819 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2820 2821 for (i=0; i<A->rmap->n; i++){ 2822 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2823 va[i] = vb[i]; 2824 if (idx) idx[i] = a->garray[idxb[i]]; 2825 } 2826 } 2827 2828 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2829 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2830 ierr = PetscFree(idxb);CHKERRQ(ierr); 2831 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2837 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2838 { 2839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2840 PetscErrorCode ierr; 2841 PetscInt i,*idxb = 0; 2842 PetscScalar *va,*vb; 2843 Vec vtmp; 2844 2845 PetscFunctionBegin; 2846 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2847 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2848 if (idx) { 2849 for (i=0; i<A->cmap->n; i++) { 2850 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2851 } 2852 } 2853 2854 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2855 if (idx) { 2856 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2857 } 2858 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2859 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2860 2861 for (i=0; i<A->rmap->n; i++){ 2862 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2863 va[i] = vb[i]; 2864 if (idx) idx[i] = a->garray[idxb[i]]; 2865 } 2866 } 2867 2868 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2870 ierr = PetscFree(idxb);CHKERRQ(ierr); 2871 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2877 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2878 { 2879 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2880 PetscInt n = A->rmap->n; 2881 PetscInt cstart = A->cmap->rstart; 2882 PetscInt *cmap = mat->garray; 2883 PetscInt *diagIdx, *offdiagIdx; 2884 Vec diagV, offdiagV; 2885 PetscScalar *a, *diagA, *offdiagA; 2886 PetscInt r; 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2891 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2892 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2893 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2894 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2895 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2896 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2897 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2898 for(r = 0; r < n; ++r) { 2899 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2900 a[r] = diagA[r]; 2901 idx[r] = cstart + diagIdx[r]; 2902 } else { 2903 a[r] = offdiagA[r]; 2904 idx[r] = cmap[offdiagIdx[r]]; 2905 } 2906 } 2907 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2909 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2910 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2911 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2912 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2918 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2919 { 2920 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2921 PetscInt n = A->rmap->n; 2922 PetscInt cstart = A->cmap->rstart; 2923 PetscInt *cmap = mat->garray; 2924 PetscInt *diagIdx, *offdiagIdx; 2925 Vec diagV, offdiagV; 2926 PetscScalar *a, *diagA, *offdiagA; 2927 PetscInt r; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2933 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2934 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2935 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2937 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2938 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2939 for(r = 0; r < n; ++r) { 2940 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2941 a[r] = diagA[r]; 2942 idx[r] = cstart + diagIdx[r]; 2943 } else { 2944 a[r] = offdiagA[r]; 2945 idx[r] = cmap[offdiagIdx[r]]; 2946 } 2947 } 2948 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2950 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2951 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2952 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2953 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2959 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2960 { 2961 PetscErrorCode ierr; 2962 Mat *dummy; 2963 2964 PetscFunctionBegin; 2965 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2966 *newmat = *dummy; 2967 ierr = PetscFree(dummy);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2975 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2976 { 2977 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 2986 /* -------------------------------------------------------------------*/ 2987 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2988 MatGetRow_MPIAIJ, 2989 MatRestoreRow_MPIAIJ, 2990 MatMult_MPIAIJ, 2991 /* 4*/ MatMultAdd_MPIAIJ, 2992 MatMultTranspose_MPIAIJ, 2993 MatMultTransposeAdd_MPIAIJ, 2994 #ifdef PETSC_HAVE_PBGL 2995 MatSolve_MPIAIJ, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 /*10*/ 0, 3002 0, 3003 0, 3004 MatSOR_MPIAIJ, 3005 MatTranspose_MPIAIJ, 3006 /*15*/ MatGetInfo_MPIAIJ, 3007 MatEqual_MPIAIJ, 3008 MatGetDiagonal_MPIAIJ, 3009 MatDiagonalScale_MPIAIJ, 3010 MatNorm_MPIAIJ, 3011 /*20*/ MatAssemblyBegin_MPIAIJ, 3012 MatAssemblyEnd_MPIAIJ, 3013 MatSetOption_MPIAIJ, 3014 MatZeroEntries_MPIAIJ, 3015 /*24*/ MatZeroRows_MPIAIJ, 3016 0, 3017 #ifdef PETSC_HAVE_PBGL 3018 0, 3019 #else 3020 0, 3021 #endif 3022 0, 3023 0, 3024 /*29*/ MatSetUp_MPIAIJ, 3025 #ifdef PETSC_HAVE_PBGL 3026 0, 3027 #else 3028 0, 3029 #endif 3030 0, 3031 0, 3032 0, 3033 /*34*/ MatDuplicate_MPIAIJ, 3034 0, 3035 0, 3036 0, 3037 0, 3038 /*39*/ MatAXPY_MPIAIJ, 3039 MatGetSubMatrices_MPIAIJ, 3040 MatIncreaseOverlap_MPIAIJ, 3041 MatGetValues_MPIAIJ, 3042 MatCopy_MPIAIJ, 3043 /*44*/ MatGetRowMax_MPIAIJ, 3044 MatScale_MPIAIJ, 3045 0, 3046 0, 3047 MatZeroRowsColumns_MPIAIJ, 3048 /*49*/ 0, 3049 0, 3050 0, 3051 0, 3052 0, 3053 /*54*/ MatFDColoringCreate_MPIAIJ, 3054 0, 3055 MatSetUnfactored_MPIAIJ, 3056 MatPermute_MPIAIJ, 3057 0, 3058 /*59*/ MatGetSubMatrix_MPIAIJ, 3059 MatDestroy_MPIAIJ, 3060 MatView_MPIAIJ, 3061 0, 3062 0, 3063 /*64*/ 0, 3064 0, 3065 0, 3066 0, 3067 0, 3068 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3069 MatGetRowMinAbs_MPIAIJ, 3070 0, 3071 MatSetColoring_MPIAIJ, 3072 #if defined(PETSC_HAVE_ADIC) 3073 MatSetValuesAdic_MPIAIJ, 3074 #else 3075 0, 3076 #endif 3077 MatSetValuesAdifor_MPIAIJ, 3078 /*75*/ MatFDColoringApply_AIJ, 3079 0, 3080 0, 3081 0, 3082 0, 3083 /*80*/ 0, 3084 0, 3085 0, 3086 /*83*/ MatLoad_MPIAIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 0, 3092 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3093 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3094 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3095 MatPtAP_Basic, 3096 MatPtAPSymbolic_MPIAIJ, 3097 /*94*/ MatPtAPNumeric_MPIAIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*99*/ 0, 3103 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3104 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3105 MatConjugate_MPIAIJ, 3106 0, 3107 /*104*/MatSetValuesRow_MPIAIJ, 3108 MatRealPart_MPIAIJ, 3109 MatImaginaryPart_MPIAIJ, 3110 0, 3111 0, 3112 /*109*/0, 3113 MatGetRedundantMatrix_MPIAIJ, 3114 MatGetRowMin_MPIAIJ, 3115 0, 3116 0, 3117 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3118 0, 3119 0, 3120 0, 3121 0, 3122 /*119*/0, 3123 0, 3124 0, 3125 0, 3126 MatGetMultiProcBlock_MPIAIJ, 3127 /*124*/MatFindNonZeroRows_MPIAIJ, 3128 MatGetColumnNorms_MPIAIJ, 3129 MatInvertBlockDiagonal_MPIAIJ, 3130 0, 3131 MatGetSubMatricesParallel_MPIAIJ, 3132 /*129*/0, 3133 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3134 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3135 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3136 0, 3137 /*134*/0, 3138 0, 3139 0, 3140 0, 3141 0 3142 }; 3143 3144 /* ----------------------------------------------------------------------------------------*/ 3145 3146 EXTERN_C_BEGIN 3147 #undef __FUNCT__ 3148 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3149 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3150 { 3151 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3152 PetscErrorCode ierr; 3153 3154 PetscFunctionBegin; 3155 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3156 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 EXTERN_C_END 3160 3161 EXTERN_C_BEGIN 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3164 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3165 { 3166 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3171 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 EXTERN_C_END 3175 3176 EXTERN_C_BEGIN 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3179 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3180 { 3181 Mat_MPIAIJ *b; 3182 PetscErrorCode ierr; 3183 PetscInt i; 3184 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3185 3186 PetscFunctionBegin; 3187 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3188 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3189 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3190 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3191 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3192 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3193 3194 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3195 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3196 if (d_nnz) { 3197 for (i=0; i<B->rmap->n; i++) { 3198 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3199 } 3200 } 3201 if (o_nnz) { 3202 for (i=0; i<B->rmap->n; i++) { 3203 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3204 } 3205 } 3206 b = (Mat_MPIAIJ*)B->data; 3207 3208 if (!B->preallocated) { 3209 /* Explicitly create 2 MATSEQAIJ matrices. */ 3210 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3211 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3212 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3213 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3214 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3215 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3216 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3217 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3218 } 3219 3220 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3221 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3222 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3223 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3224 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3225 B->preallocated = PETSC_TRUE; 3226 PetscFunctionReturn(0); 3227 } 3228 EXTERN_C_END 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3232 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3233 { 3234 Mat mat; 3235 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 *newmat = 0; 3240 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3241 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3242 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3243 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3244 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3245 a = (Mat_MPIAIJ*)mat->data; 3246 3247 mat->factortype = matin->factortype; 3248 mat->rmap->bs = matin->rmap->bs; 3249 mat->cmap->bs = matin->cmap->bs; 3250 mat->assembled = PETSC_TRUE; 3251 mat->insertmode = NOT_SET_VALUES; 3252 mat->preallocated = PETSC_TRUE; 3253 3254 a->size = oldmat->size; 3255 a->rank = oldmat->rank; 3256 a->donotstash = oldmat->donotstash; 3257 a->roworiented = oldmat->roworiented; 3258 a->rowindices = 0; 3259 a->rowvalues = 0; 3260 a->getrowactive = PETSC_FALSE; 3261 3262 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3263 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3264 3265 if (oldmat->colmap) { 3266 #if defined (PETSC_USE_CTABLE) 3267 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3268 #else 3269 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3270 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3271 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3272 #endif 3273 } else a->colmap = 0; 3274 if (oldmat->garray) { 3275 PetscInt len; 3276 len = oldmat->B->cmap->n; 3277 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3278 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3279 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3280 } else a->garray = 0; 3281 3282 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3283 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3284 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3285 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3286 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3287 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3288 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3289 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3290 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3291 *newmat = mat; 3292 PetscFunctionReturn(0); 3293 } 3294 3295 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatLoad_MPIAIJ" 3299 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3300 { 3301 PetscScalar *vals,*svals; 3302 MPI_Comm comm = ((PetscObject)viewer)->comm; 3303 PetscErrorCode ierr; 3304 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3305 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3306 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3307 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3308 PetscInt cend,cstart,n,*rowners,sizesset=1; 3309 int fd; 3310 3311 PetscFunctionBegin; 3312 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3313 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3314 if (!rank) { 3315 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3316 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3317 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3318 } 3319 3320 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3321 3322 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3323 M = header[1]; N = header[2]; 3324 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3325 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3326 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3327 3328 /* If global sizes are set, check if they are consistent with that given in the file */ 3329 if (sizesset) { 3330 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3331 } 3332 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3333 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3334 3335 /* determine ownership of all rows */ 3336 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3337 else m = newMat->rmap->n; /* Set by user */ 3338 3339 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3340 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3341 3342 /* First process needs enough room for process with most rows */ 3343 if (!rank) { 3344 mmax = rowners[1]; 3345 for (i=2; i<size; i++) { 3346 mmax = PetscMax(mmax,rowners[i]); 3347 } 3348 } else mmax = m; 3349 3350 rowners[0] = 0; 3351 for (i=2; i<=size; i++) { 3352 rowners[i] += rowners[i-1]; 3353 } 3354 rstart = rowners[rank]; 3355 rend = rowners[rank+1]; 3356 3357 /* distribute row lengths to all processors */ 3358 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3359 if (!rank) { 3360 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3361 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3362 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3363 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3364 for (j=0; j<m; j++) { 3365 procsnz[0] += ourlens[j]; 3366 } 3367 for (i=1; i<size; i++) { 3368 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3369 /* calculate the number of nonzeros on each processor */ 3370 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3371 procsnz[i] += rowlengths[j]; 3372 } 3373 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3374 } 3375 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3376 } else { 3377 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3378 } 3379 3380 if (!rank) { 3381 /* determine max buffer needed and allocate it */ 3382 maxnz = 0; 3383 for (i=0; i<size; i++) { 3384 maxnz = PetscMax(maxnz,procsnz[i]); 3385 } 3386 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3387 3388 /* read in my part of the matrix column indices */ 3389 nz = procsnz[0]; 3390 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3391 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3392 3393 /* read in every one elses and ship off */ 3394 for (i=1; i<size; i++) { 3395 nz = procsnz[i]; 3396 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3397 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3398 } 3399 ierr = PetscFree(cols);CHKERRQ(ierr); 3400 } else { 3401 /* determine buffer space needed for message */ 3402 nz = 0; 3403 for (i=0; i<m; i++) { 3404 nz += ourlens[i]; 3405 } 3406 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3407 3408 /* receive message of column indices*/ 3409 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3410 } 3411 3412 /* determine column ownership if matrix is not square */ 3413 if (N != M) { 3414 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3415 else n = newMat->cmap->n; 3416 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3417 cstart = cend - n; 3418 } else { 3419 cstart = rstart; 3420 cend = rend; 3421 n = cend - cstart; 3422 } 3423 3424 /* loop over local rows, determining number of off diagonal entries */ 3425 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3426 jj = 0; 3427 for (i=0; i<m; i++) { 3428 for (j=0; j<ourlens[i]; j++) { 3429 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3430 jj++; 3431 } 3432 } 3433 3434 for (i=0; i<m; i++) { 3435 ourlens[i] -= offlens[i]; 3436 } 3437 if (!sizesset) { 3438 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3439 } 3440 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3441 3442 for (i=0; i<m; i++) { 3443 ourlens[i] += offlens[i]; 3444 } 3445 3446 if (!rank) { 3447 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3448 3449 /* read in my part of the matrix numerical values */ 3450 nz = procsnz[0]; 3451 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3452 3453 /* insert into matrix */ 3454 jj = rstart; 3455 smycols = mycols; 3456 svals = vals; 3457 for (i=0; i<m; i++) { 3458 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3459 smycols += ourlens[i]; 3460 svals += ourlens[i]; 3461 jj++; 3462 } 3463 3464 /* read in other processors and ship out */ 3465 for (i=1; i<size; i++) { 3466 nz = procsnz[i]; 3467 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3468 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3469 } 3470 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3471 } else { 3472 /* receive numeric values */ 3473 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3474 3475 /* receive message of values*/ 3476 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3477 3478 /* insert into matrix */ 3479 jj = rstart; 3480 smycols = mycols; 3481 svals = vals; 3482 for (i=0; i<m; i++) { 3483 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3484 smycols += ourlens[i]; 3485 svals += ourlens[i]; 3486 jj++; 3487 } 3488 } 3489 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3490 ierr = PetscFree(vals);CHKERRQ(ierr); 3491 ierr = PetscFree(mycols);CHKERRQ(ierr); 3492 ierr = PetscFree(rowners);CHKERRQ(ierr); 3493 3494 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3495 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3496 PetscFunctionReturn(0); 3497 } 3498 3499 #undef __FUNCT__ 3500 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3501 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3502 { 3503 PetscErrorCode ierr; 3504 IS iscol_local; 3505 PetscInt csize; 3506 3507 PetscFunctionBegin; 3508 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3509 if (call == MAT_REUSE_MATRIX) { 3510 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3511 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3512 } else { 3513 PetscInt cbs; 3514 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3515 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3516 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3517 } 3518 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3519 if (call == MAT_INITIAL_MATRIX) { 3520 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3521 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3522 } 3523 PetscFunctionReturn(0); 3524 } 3525 3526 #undef __FUNCT__ 3527 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3528 /* 3529 Not great since it makes two copies of the submatrix, first an SeqAIJ 3530 in local and then by concatenating the local matrices the end result. 3531 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3532 3533 Note: This requires a sequential iscol with all indices. 3534 */ 3535 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3536 { 3537 PetscErrorCode ierr; 3538 PetscMPIInt rank,size; 3539 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3540 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3541 Mat *local,M,Mreuse; 3542 MatScalar *vwork,*aa; 3543 MPI_Comm comm = ((PetscObject)mat)->comm; 3544 Mat_SeqAIJ *aij; 3545 3546 3547 PetscFunctionBegin; 3548 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3549 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3550 3551 if (call == MAT_REUSE_MATRIX) { 3552 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3553 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3554 local = &Mreuse; 3555 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3556 } else { 3557 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3558 Mreuse = *local; 3559 ierr = PetscFree(local);CHKERRQ(ierr); 3560 } 3561 3562 /* 3563 m - number of local rows 3564 n - number of columns (same on all processors) 3565 rstart - first row in new global matrix generated 3566 */ 3567 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3568 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3569 if (call == MAT_INITIAL_MATRIX) { 3570 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3571 ii = aij->i; 3572 jj = aij->j; 3573 3574 /* 3575 Determine the number of non-zeros in the diagonal and off-diagonal 3576 portions of the matrix in order to do correct preallocation 3577 */ 3578 3579 /* first get start and end of "diagonal" columns */ 3580 if (csize == PETSC_DECIDE) { 3581 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3582 if (mglobal == n) { /* square matrix */ 3583 nlocal = m; 3584 } else { 3585 nlocal = n/size + ((n % size) > rank); 3586 } 3587 } else { 3588 nlocal = csize; 3589 } 3590 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3591 rstart = rend - nlocal; 3592 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3593 3594 /* next, compute all the lengths */ 3595 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3596 olens = dlens + m; 3597 for (i=0; i<m; i++) { 3598 jend = ii[i+1] - ii[i]; 3599 olen = 0; 3600 dlen = 0; 3601 for (j=0; j<jend; j++) { 3602 if (*jj < rstart || *jj >= rend) olen++; 3603 else dlen++; 3604 jj++; 3605 } 3606 olens[i] = olen; 3607 dlens[i] = dlen; 3608 } 3609 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3610 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3611 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3612 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3613 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3614 ierr = PetscFree(dlens);CHKERRQ(ierr); 3615 } else { 3616 PetscInt ml,nl; 3617 3618 M = *newmat; 3619 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3620 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3621 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3622 /* 3623 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3624 rather than the slower MatSetValues(). 3625 */ 3626 M->was_assembled = PETSC_TRUE; 3627 M->assembled = PETSC_FALSE; 3628 } 3629 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3630 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3631 ii = aij->i; 3632 jj = aij->j; 3633 aa = aij->a; 3634 for (i=0; i<m; i++) { 3635 row = rstart + i; 3636 nz = ii[i+1] - ii[i]; 3637 cwork = jj; jj += nz; 3638 vwork = aa; aa += nz; 3639 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3640 } 3641 3642 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3643 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3644 *newmat = M; 3645 3646 /* save submatrix used in processor for next request */ 3647 if (call == MAT_INITIAL_MATRIX) { 3648 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3649 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3650 } 3651 3652 PetscFunctionReturn(0); 3653 } 3654 3655 EXTERN_C_BEGIN 3656 #undef __FUNCT__ 3657 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3658 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3659 { 3660 PetscInt m,cstart, cend,j,nnz,i,d; 3661 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3662 const PetscInt *JJ; 3663 PetscScalar *values; 3664 PetscErrorCode ierr; 3665 3666 PetscFunctionBegin; 3667 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3668 3669 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3670 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3671 m = B->rmap->n; 3672 cstart = B->cmap->rstart; 3673 cend = B->cmap->rend; 3674 rstart = B->rmap->rstart; 3675 3676 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3677 3678 #if defined(PETSC_USE_DEBUGGING) 3679 for (i=0; i<m; i++) { 3680 nnz = Ii[i+1]- Ii[i]; 3681 JJ = J + Ii[i]; 3682 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3683 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3684 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3685 } 3686 #endif 3687 3688 for (i=0; i<m; i++) { 3689 nnz = Ii[i+1]- Ii[i]; 3690 JJ = J + Ii[i]; 3691 nnz_max = PetscMax(nnz_max,nnz); 3692 d = 0; 3693 for (j=0; j<nnz; j++) { 3694 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3695 } 3696 d_nnz[i] = d; 3697 o_nnz[i] = nnz - d; 3698 } 3699 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3700 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3701 3702 if (v) values = (PetscScalar*)v; 3703 else { 3704 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3705 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3706 } 3707 3708 for (i=0; i<m; i++) { 3709 ii = i + rstart; 3710 nnz = Ii[i+1]- Ii[i]; 3711 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3712 } 3713 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3714 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3715 3716 if (!v) { 3717 ierr = PetscFree(values);CHKERRQ(ierr); 3718 } 3719 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3720 PetscFunctionReturn(0); 3721 } 3722 EXTERN_C_END 3723 3724 #undef __FUNCT__ 3725 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3726 /*@ 3727 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3728 (the default parallel PETSc format). 3729 3730 Collective on MPI_Comm 3731 3732 Input Parameters: 3733 + B - the matrix 3734 . i - the indices into j for the start of each local row (starts with zero) 3735 . j - the column indices for each local row (starts with zero) 3736 - v - optional values in the matrix 3737 3738 Level: developer 3739 3740 Notes: 3741 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3742 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3743 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3744 3745 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3746 3747 The format which is used for the sparse matrix input, is equivalent to a 3748 row-major ordering.. i.e for the following matrix, the input data expected is 3749 as shown: 3750 3751 1 0 0 3752 2 0 3 P0 3753 ------- 3754 4 5 6 P1 3755 3756 Process0 [P0]: rows_owned=[0,1] 3757 i = {0,1,3} [size = nrow+1 = 2+1] 3758 j = {0,0,2} [size = nz = 6] 3759 v = {1,2,3} [size = nz = 6] 3760 3761 Process1 [P1]: rows_owned=[2] 3762 i = {0,3} [size = nrow+1 = 1+1] 3763 j = {0,1,2} [size = nz = 6] 3764 v = {4,5,6} [size = nz = 6] 3765 3766 .keywords: matrix, aij, compressed row, sparse, parallel 3767 3768 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3769 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3770 @*/ 3771 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3772 { 3773 PetscErrorCode ierr; 3774 3775 PetscFunctionBegin; 3776 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3777 PetscFunctionReturn(0); 3778 } 3779 3780 #undef __FUNCT__ 3781 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3782 /*@C 3783 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3784 (the default parallel PETSc format). For good matrix assembly performance 3785 the user should preallocate the matrix storage by setting the parameters 3786 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3787 performance can be increased by more than a factor of 50. 3788 3789 Collective on MPI_Comm 3790 3791 Input Parameters: 3792 + A - the matrix 3793 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3794 (same value is used for all local rows) 3795 . d_nnz - array containing the number of nonzeros in the various rows of the 3796 DIAGONAL portion of the local submatrix (possibly different for each row) 3797 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3798 The size of this array is equal to the number of local rows, i.e 'm'. 3799 For matrices that will be factored, you must leave room for (and set) 3800 the diagonal entry even if it is zero. 3801 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3802 submatrix (same value is used for all local rows). 3803 - o_nnz - array containing the number of nonzeros in the various rows of the 3804 OFF-DIAGONAL portion of the local submatrix (possibly different for 3805 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3806 structure. The size of this array is equal to the number 3807 of local rows, i.e 'm'. 3808 3809 If the *_nnz parameter is given then the *_nz parameter is ignored 3810 3811 The AIJ format (also called the Yale sparse matrix format or 3812 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3813 storage. The stored row and column indices begin with zero. 3814 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3815 3816 The parallel matrix is partitioned such that the first m0 rows belong to 3817 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3818 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3819 3820 The DIAGONAL portion of the local submatrix of a processor can be defined 3821 as the submatrix which is obtained by extraction the part corresponding to 3822 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3823 first row that belongs to the processor, r2 is the last row belonging to 3824 the this processor, and c1-c2 is range of indices of the local part of a 3825 vector suitable for applying the matrix to. This is an mxn matrix. In the 3826 common case of a square matrix, the row and column ranges are the same and 3827 the DIAGONAL part is also square. The remaining portion of the local 3828 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3829 3830 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3831 3832 You can call MatGetInfo() to get information on how effective the preallocation was; 3833 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3834 You can also run with the option -info and look for messages with the string 3835 malloc in them to see if additional memory allocation was needed. 3836 3837 Example usage: 3838 3839 Consider the following 8x8 matrix with 34 non-zero values, that is 3840 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3841 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3842 as follows: 3843 3844 .vb 3845 1 2 0 | 0 3 0 | 0 4 3846 Proc0 0 5 6 | 7 0 0 | 8 0 3847 9 0 10 | 11 0 0 | 12 0 3848 ------------------------------------- 3849 13 0 14 | 15 16 17 | 0 0 3850 Proc1 0 18 0 | 19 20 21 | 0 0 3851 0 0 0 | 22 23 0 | 24 0 3852 ------------------------------------- 3853 Proc2 25 26 27 | 0 0 28 | 29 0 3854 30 0 0 | 31 32 33 | 0 34 3855 .ve 3856 3857 This can be represented as a collection of submatrices as: 3858 3859 .vb 3860 A B C 3861 D E F 3862 G H I 3863 .ve 3864 3865 Where the submatrices A,B,C are owned by proc0, D,E,F are 3866 owned by proc1, G,H,I are owned by proc2. 3867 3868 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3869 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3870 The 'M','N' parameters are 8,8, and have the same values on all procs. 3871 3872 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3873 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3874 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3875 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3876 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3877 matrix, ans [DF] as another SeqAIJ matrix. 3878 3879 When d_nz, o_nz parameters are specified, d_nz storage elements are 3880 allocated for every row of the local diagonal submatrix, and o_nz 3881 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3882 One way to choose d_nz and o_nz is to use the max nonzerors per local 3883 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3884 In this case, the values of d_nz,o_nz are: 3885 .vb 3886 proc0 : dnz = 2, o_nz = 2 3887 proc1 : dnz = 3, o_nz = 2 3888 proc2 : dnz = 1, o_nz = 4 3889 .ve 3890 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3891 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3892 for proc3. i.e we are using 12+15+10=37 storage locations to store 3893 34 values. 3894 3895 When d_nnz, o_nnz parameters are specified, the storage is specified 3896 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3897 In the above case the values for d_nnz,o_nnz are: 3898 .vb 3899 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3900 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3901 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3902 .ve 3903 Here the space allocated is sum of all the above values i.e 34, and 3904 hence pre-allocation is perfect. 3905 3906 Level: intermediate 3907 3908 .keywords: matrix, aij, compressed row, sparse, parallel 3909 3910 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3911 MPIAIJ, MatGetInfo() 3912 @*/ 3913 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3914 { 3915 PetscErrorCode ierr; 3916 3917 PetscFunctionBegin; 3918 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3919 PetscValidType(B,1); 3920 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3921 PetscFunctionReturn(0); 3922 } 3923 3924 #undef __FUNCT__ 3925 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3926 /*@ 3927 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3928 CSR format the local rows. 3929 3930 Collective on MPI_Comm 3931 3932 Input Parameters: 3933 + comm - MPI communicator 3934 . m - number of local rows (Cannot be PETSC_DECIDE) 3935 . n - This value should be the same as the local size used in creating the 3936 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3937 calculated if N is given) For square matrices n is almost always m. 3938 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3939 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3940 . i - row indices 3941 . j - column indices 3942 - a - matrix values 3943 3944 Output Parameter: 3945 . mat - the matrix 3946 3947 Level: intermediate 3948 3949 Notes: 3950 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3951 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3952 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3953 3954 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3955 3956 The format which is used for the sparse matrix input, is equivalent to a 3957 row-major ordering.. i.e for the following matrix, the input data expected is 3958 as shown: 3959 3960 1 0 0 3961 2 0 3 P0 3962 ------- 3963 4 5 6 P1 3964 3965 Process0 [P0]: rows_owned=[0,1] 3966 i = {0,1,3} [size = nrow+1 = 2+1] 3967 j = {0,0,2} [size = nz = 6] 3968 v = {1,2,3} [size = nz = 6] 3969 3970 Process1 [P1]: rows_owned=[2] 3971 i = {0,3} [size = nrow+1 = 1+1] 3972 j = {0,1,2} [size = nz = 6] 3973 v = {4,5,6} [size = nz = 6] 3974 3975 .keywords: matrix, aij, compressed row, sparse, parallel 3976 3977 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3978 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3979 @*/ 3980 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3981 { 3982 PetscErrorCode ierr; 3983 3984 PetscFunctionBegin; 3985 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3986 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3987 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3988 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3989 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 3990 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3991 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3992 PetscFunctionReturn(0); 3993 } 3994 3995 #undef __FUNCT__ 3996 #define __FUNCT__ "MatCreateAIJ" 3997 /*@C 3998 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3999 (the default parallel PETSc format). For good matrix assembly performance 4000 the user should preallocate the matrix storage by setting the parameters 4001 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4002 performance can be increased by more than a factor of 50. 4003 4004 Collective on MPI_Comm 4005 4006 Input Parameters: 4007 + comm - MPI communicator 4008 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4009 This value should be the same as the local size used in creating the 4010 y vector for the matrix-vector product y = Ax. 4011 . n - This value should be the same as the local size used in creating the 4012 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4013 calculated if N is given) For square matrices n is almost always m. 4014 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4015 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4016 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4017 (same value is used for all local rows) 4018 . d_nnz - array containing the number of nonzeros in the various rows of the 4019 DIAGONAL portion of the local submatrix (possibly different for each row) 4020 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4021 The size of this array is equal to the number of local rows, i.e 'm'. 4022 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4023 submatrix (same value is used for all local rows). 4024 - o_nnz - array containing the number of nonzeros in the various rows of the 4025 OFF-DIAGONAL portion of the local submatrix (possibly different for 4026 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4027 structure. The size of this array is equal to the number 4028 of local rows, i.e 'm'. 4029 4030 Output Parameter: 4031 . A - the matrix 4032 4033 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4034 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4035 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4036 4037 Notes: 4038 If the *_nnz parameter is given then the *_nz parameter is ignored 4039 4040 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4041 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4042 storage requirements for this matrix. 4043 4044 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4045 processor than it must be used on all processors that share the object for 4046 that argument. 4047 4048 The user MUST specify either the local or global matrix dimensions 4049 (possibly both). 4050 4051 The parallel matrix is partitioned across processors such that the 4052 first m0 rows belong to process 0, the next m1 rows belong to 4053 process 1, the next m2 rows belong to process 2 etc.. where 4054 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4055 values corresponding to [m x N] submatrix. 4056 4057 The columns are logically partitioned with the n0 columns belonging 4058 to 0th partition, the next n1 columns belonging to the next 4059 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4060 4061 The DIAGONAL portion of the local submatrix on any given processor 4062 is the submatrix corresponding to the rows and columns m,n 4063 corresponding to the given processor. i.e diagonal matrix on 4064 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4065 etc. The remaining portion of the local submatrix [m x (N-n)] 4066 constitute the OFF-DIAGONAL portion. The example below better 4067 illustrates this concept. 4068 4069 For a square global matrix we define each processor's diagonal portion 4070 to be its local rows and the corresponding columns (a square submatrix); 4071 each processor's off-diagonal portion encompasses the remainder of the 4072 local matrix (a rectangular submatrix). 4073 4074 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4075 4076 When calling this routine with a single process communicator, a matrix of 4077 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4078 type of communicator, use the construction mechanism: 4079 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4080 4081 By default, this format uses inodes (identical nodes) when possible. 4082 We search for consecutive rows with the same nonzero structure, thereby 4083 reusing matrix information to achieve increased efficiency. 4084 4085 Options Database Keys: 4086 + -mat_no_inode - Do not use inodes 4087 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4088 - -mat_aij_oneindex - Internally use indexing starting at 1 4089 rather than 0. Note that when calling MatSetValues(), 4090 the user still MUST index entries starting at 0! 4091 4092 4093 Example usage: 4094 4095 Consider the following 8x8 matrix with 34 non-zero values, that is 4096 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4097 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4098 as follows: 4099 4100 .vb 4101 1 2 0 | 0 3 0 | 0 4 4102 Proc0 0 5 6 | 7 0 0 | 8 0 4103 9 0 10 | 11 0 0 | 12 0 4104 ------------------------------------- 4105 13 0 14 | 15 16 17 | 0 0 4106 Proc1 0 18 0 | 19 20 21 | 0 0 4107 0 0 0 | 22 23 0 | 24 0 4108 ------------------------------------- 4109 Proc2 25 26 27 | 0 0 28 | 29 0 4110 30 0 0 | 31 32 33 | 0 34 4111 .ve 4112 4113 This can be represented as a collection of submatrices as: 4114 4115 .vb 4116 A B C 4117 D E F 4118 G H I 4119 .ve 4120 4121 Where the submatrices A,B,C are owned by proc0, D,E,F are 4122 owned by proc1, G,H,I are owned by proc2. 4123 4124 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4125 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4126 The 'M','N' parameters are 8,8, and have the same values on all procs. 4127 4128 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4129 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4130 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4131 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4132 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4133 matrix, ans [DF] as another SeqAIJ matrix. 4134 4135 When d_nz, o_nz parameters are specified, d_nz storage elements are 4136 allocated for every row of the local diagonal submatrix, and o_nz 4137 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4138 One way to choose d_nz and o_nz is to use the max nonzerors per local 4139 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4140 In this case, the values of d_nz,o_nz are: 4141 .vb 4142 proc0 : dnz = 2, o_nz = 2 4143 proc1 : dnz = 3, o_nz = 2 4144 proc2 : dnz = 1, o_nz = 4 4145 .ve 4146 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4147 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4148 for proc3. i.e we are using 12+15+10=37 storage locations to store 4149 34 values. 4150 4151 When d_nnz, o_nnz parameters are specified, the storage is specified 4152 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4153 In the above case the values for d_nnz,o_nnz are: 4154 .vb 4155 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4156 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4157 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4158 .ve 4159 Here the space allocated is sum of all the above values i.e 34, and 4160 hence pre-allocation is perfect. 4161 4162 Level: intermediate 4163 4164 .keywords: matrix, aij, compressed row, sparse, parallel 4165 4166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4167 MPIAIJ, MatCreateMPIAIJWithArrays() 4168 @*/ 4169 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4170 { 4171 PetscErrorCode ierr; 4172 PetscMPIInt size; 4173 4174 PetscFunctionBegin; 4175 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4176 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4177 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4178 if (size > 1) { 4179 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4180 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4181 } else { 4182 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4183 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4184 } 4185 PetscFunctionReturn(0); 4186 } 4187 4188 #undef __FUNCT__ 4189 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4190 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4191 { 4192 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4193 4194 PetscFunctionBegin; 4195 *Ad = a->A; 4196 *Ao = a->B; 4197 *colmap = a->garray; 4198 PetscFunctionReturn(0); 4199 } 4200 4201 #undef __FUNCT__ 4202 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4203 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4204 { 4205 PetscErrorCode ierr; 4206 PetscInt i; 4207 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4208 4209 PetscFunctionBegin; 4210 if (coloring->ctype == IS_COLORING_GLOBAL) { 4211 ISColoringValue *allcolors,*colors; 4212 ISColoring ocoloring; 4213 4214 /* set coloring for diagonal portion */ 4215 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4216 4217 /* set coloring for off-diagonal portion */ 4218 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4219 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4220 for (i=0; i<a->B->cmap->n; i++) { 4221 colors[i] = allcolors[a->garray[i]]; 4222 } 4223 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4224 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4225 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4226 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4227 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4228 ISColoringValue *colors; 4229 PetscInt *larray; 4230 ISColoring ocoloring; 4231 4232 /* set coloring for diagonal portion */ 4233 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4234 for (i=0; i<a->A->cmap->n; i++) { 4235 larray[i] = i + A->cmap->rstart; 4236 } 4237 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4238 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4239 for (i=0; i<a->A->cmap->n; i++) { 4240 colors[i] = coloring->colors[larray[i]]; 4241 } 4242 ierr = PetscFree(larray);CHKERRQ(ierr); 4243 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4244 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4245 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4246 4247 /* set coloring for off-diagonal portion */ 4248 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4249 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4250 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4251 for (i=0; i<a->B->cmap->n; i++) { 4252 colors[i] = coloring->colors[larray[i]]; 4253 } 4254 ierr = PetscFree(larray);CHKERRQ(ierr); 4255 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4256 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4257 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4258 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4259 4260 PetscFunctionReturn(0); 4261 } 4262 4263 #if defined(PETSC_HAVE_ADIC) 4264 #undef __FUNCT__ 4265 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4266 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4267 { 4268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4269 PetscErrorCode ierr; 4270 4271 PetscFunctionBegin; 4272 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4273 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4274 PetscFunctionReturn(0); 4275 } 4276 #endif 4277 4278 #undef __FUNCT__ 4279 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4280 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4281 { 4282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4283 PetscErrorCode ierr; 4284 4285 PetscFunctionBegin; 4286 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4287 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4288 PetscFunctionReturn(0); 4289 } 4290 4291 #undef __FUNCT__ 4292 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4293 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4294 { 4295 PetscErrorCode ierr; 4296 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4297 PetscInt *indx; 4298 4299 PetscFunctionBegin; 4300 /* This routine will ONLY return MPIAIJ type matrix */ 4301 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4302 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4303 if (n == PETSC_DECIDE){ 4304 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4305 } 4306 /* Check sum(n) = N */ 4307 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4308 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4309 4310 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4311 rstart -= m; 4312 4313 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4314 for (i=0;i<m;i++) { 4315 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4316 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4317 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4318 } 4319 4320 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4321 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4322 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4323 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4324 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4325 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4326 PetscFunctionReturn(0); 4327 } 4328 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4331 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4332 { 4333 PetscErrorCode ierr; 4334 PetscInt m,N,i,rstart,nnz,Ii; 4335 PetscInt *indx; 4336 PetscScalar *values; 4337 4338 PetscFunctionBegin; 4339 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4340 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4341 for (i=0;i<m;i++) { 4342 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4343 Ii = i + rstart; 4344 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4345 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4346 } 4347 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4348 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4349 PetscFunctionReturn(0); 4350 } 4351 4352 #undef __FUNCT__ 4353 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4354 /*@ 4355 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4356 matrices from each processor 4357 4358 Collective on MPI_Comm 4359 4360 Input Parameters: 4361 + comm - the communicators the parallel matrix will live on 4362 . inmat - the input sequential matrices 4363 . n - number of local columns (or PETSC_DECIDE) 4364 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4365 4366 Output Parameter: 4367 . outmat - the parallel matrix generated 4368 4369 Level: advanced 4370 4371 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4372 4373 @*/ 4374 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4375 { 4376 PetscErrorCode ierr; 4377 4378 PetscFunctionBegin; 4379 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4380 if (scall == MAT_INITIAL_MATRIX){ 4381 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4382 } 4383 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4384 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4385 PetscFunctionReturn(0); 4386 } 4387 4388 #undef __FUNCT__ 4389 #define __FUNCT__ "MatFileSplit" 4390 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4391 { 4392 PetscErrorCode ierr; 4393 PetscMPIInt rank; 4394 PetscInt m,N,i,rstart,nnz; 4395 size_t len; 4396 const PetscInt *indx; 4397 PetscViewer out; 4398 char *name; 4399 Mat B; 4400 const PetscScalar *values; 4401 4402 PetscFunctionBegin; 4403 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4404 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4405 /* Should this be the type of the diagonal block of A? */ 4406 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4407 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4408 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4409 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4410 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4411 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4412 for (i=0;i<m;i++) { 4413 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4414 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4415 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4416 } 4417 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4418 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4419 4420 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4421 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4422 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4423 sprintf(name,"%s.%d",outfile,rank); 4424 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4425 ierr = PetscFree(name); 4426 ierr = MatView(B,out);CHKERRQ(ierr); 4427 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4428 ierr = MatDestroy(&B);CHKERRQ(ierr); 4429 PetscFunctionReturn(0); 4430 } 4431 4432 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4433 #undef __FUNCT__ 4434 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4435 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4436 { 4437 PetscErrorCode ierr; 4438 Mat_Merge_SeqsToMPI *merge; 4439 PetscContainer container; 4440 4441 PetscFunctionBegin; 4442 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4443 if (container) { 4444 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4445 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4446 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4448 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4449 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4450 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4451 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4452 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4453 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4454 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4455 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4456 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4457 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4458 ierr = PetscFree(merge);CHKERRQ(ierr); 4459 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4460 } 4461 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4462 PetscFunctionReturn(0); 4463 } 4464 4465 #include <../src/mat/utils/freespace.h> 4466 #include <petscbt.h> 4467 4468 #undef __FUNCT__ 4469 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4470 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4471 { 4472 PetscErrorCode ierr; 4473 MPI_Comm comm=((PetscObject)mpimat)->comm; 4474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4475 PetscMPIInt size,rank,taga,*len_s; 4476 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4477 PetscInt proc,m; 4478 PetscInt **buf_ri,**buf_rj; 4479 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4480 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4481 MPI_Request *s_waits,*r_waits; 4482 MPI_Status *status; 4483 MatScalar *aa=a->a; 4484 MatScalar **abuf_r,*ba_i; 4485 Mat_Merge_SeqsToMPI *merge; 4486 PetscContainer container; 4487 4488 PetscFunctionBegin; 4489 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4490 4491 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4492 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4493 4494 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4495 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4496 4497 bi = merge->bi; 4498 bj = merge->bj; 4499 buf_ri = merge->buf_ri; 4500 buf_rj = merge->buf_rj; 4501 4502 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4503 owners = merge->rowmap->range; 4504 len_s = merge->len_s; 4505 4506 /* send and recv matrix values */ 4507 /*-----------------------------*/ 4508 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4509 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4510 4511 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4512 for (proc=0,k=0; proc<size; proc++){ 4513 if (!len_s[proc]) continue; 4514 i = owners[proc]; 4515 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4516 k++; 4517 } 4518 4519 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4520 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4521 ierr = PetscFree(status);CHKERRQ(ierr); 4522 4523 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4524 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4525 4526 /* insert mat values of mpimat */ 4527 /*----------------------------*/ 4528 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4529 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4530 4531 for (k=0; k<merge->nrecv; k++){ 4532 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4533 nrows = *(buf_ri_k[k]); 4534 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4535 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4536 } 4537 4538 /* set values of ba */ 4539 m = merge->rowmap->n; 4540 for (i=0; i<m; i++) { 4541 arow = owners[rank] + i; 4542 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4543 bnzi = bi[i+1] - bi[i]; 4544 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4545 4546 /* add local non-zero vals of this proc's seqmat into ba */ 4547 anzi = ai[arow+1] - ai[arow]; 4548 aj = a->j + ai[arow]; 4549 aa = a->a + ai[arow]; 4550 nextaj = 0; 4551 for (j=0; nextaj<anzi; j++){ 4552 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4553 ba_i[j] += aa[nextaj++]; 4554 } 4555 } 4556 4557 /* add received vals into ba */ 4558 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4559 /* i-th row */ 4560 if (i == *nextrow[k]) { 4561 anzi = *(nextai[k]+1) - *nextai[k]; 4562 aj = buf_rj[k] + *(nextai[k]); 4563 aa = abuf_r[k] + *(nextai[k]); 4564 nextaj = 0; 4565 for (j=0; nextaj<anzi; j++){ 4566 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4567 ba_i[j] += aa[nextaj++]; 4568 } 4569 } 4570 nextrow[k]++; nextai[k]++; 4571 } 4572 } 4573 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4574 } 4575 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4576 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4577 4578 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4579 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4580 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4581 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4582 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4583 PetscFunctionReturn(0); 4584 } 4585 4586 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4591 { 4592 PetscErrorCode ierr; 4593 Mat B_mpi; 4594 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4595 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4596 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4597 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4598 PetscInt len,proc,*dnz,*onz,bs,cbs; 4599 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4600 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4601 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4602 MPI_Status *status; 4603 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4604 PetscBT lnkbt; 4605 Mat_Merge_SeqsToMPI *merge; 4606 PetscContainer container; 4607 4608 PetscFunctionBegin; 4609 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4610 4611 /* make sure it is a PETSc comm */ 4612 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4613 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4614 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4615 4616 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4617 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4618 4619 /* determine row ownership */ 4620 /*---------------------------------------------------------*/ 4621 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4622 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4623 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4624 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4625 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4626 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4627 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4628 4629 m = merge->rowmap->n; 4630 M = merge->rowmap->N; 4631 owners = merge->rowmap->range; 4632 4633 /* determine the number of messages to send, their lengths */ 4634 /*---------------------------------------------------------*/ 4635 len_s = merge->len_s; 4636 4637 len = 0; /* length of buf_si[] */ 4638 merge->nsend = 0; 4639 for (proc=0; proc<size; proc++){ 4640 len_si[proc] = 0; 4641 if (proc == rank){ 4642 len_s[proc] = 0; 4643 } else { 4644 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4645 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4646 } 4647 if (len_s[proc]) { 4648 merge->nsend++; 4649 nrows = 0; 4650 for (i=owners[proc]; i<owners[proc+1]; i++){ 4651 if (ai[i+1] > ai[i]) nrows++; 4652 } 4653 len_si[proc] = 2*(nrows+1); 4654 len += len_si[proc]; 4655 } 4656 } 4657 4658 /* determine the number and length of messages to receive for ij-structure */ 4659 /*-------------------------------------------------------------------------*/ 4660 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4661 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4662 4663 /* post the Irecv of j-structure */ 4664 /*-------------------------------*/ 4665 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4666 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4667 4668 /* post the Isend of j-structure */ 4669 /*--------------------------------*/ 4670 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4671 4672 for (proc=0, k=0; proc<size; proc++){ 4673 if (!len_s[proc]) continue; 4674 i = owners[proc]; 4675 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4676 k++; 4677 } 4678 4679 /* receives and sends of j-structure are complete */ 4680 /*------------------------------------------------*/ 4681 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4682 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4683 4684 /* send and recv i-structure */ 4685 /*---------------------------*/ 4686 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4687 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4688 4689 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4690 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4691 for (proc=0,k=0; proc<size; proc++){ 4692 if (!len_s[proc]) continue; 4693 /* form outgoing message for i-structure: 4694 buf_si[0]: nrows to be sent 4695 [1:nrows]: row index (global) 4696 [nrows+1:2*nrows+1]: i-structure index 4697 */ 4698 /*-------------------------------------------*/ 4699 nrows = len_si[proc]/2 - 1; 4700 buf_si_i = buf_si + nrows+1; 4701 buf_si[0] = nrows; 4702 buf_si_i[0] = 0; 4703 nrows = 0; 4704 for (i=owners[proc]; i<owners[proc+1]; i++){ 4705 anzi = ai[i+1] - ai[i]; 4706 if (anzi) { 4707 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4708 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4709 nrows++; 4710 } 4711 } 4712 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4713 k++; 4714 buf_si += len_si[proc]; 4715 } 4716 4717 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4718 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4719 4720 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4721 for (i=0; i<merge->nrecv; i++){ 4722 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4723 } 4724 4725 ierr = PetscFree(len_si);CHKERRQ(ierr); 4726 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4727 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4728 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4729 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4730 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4731 ierr = PetscFree(status);CHKERRQ(ierr); 4732 4733 /* compute a local seq matrix in each processor */ 4734 /*----------------------------------------------*/ 4735 /* allocate bi array and free space for accumulating nonzero column info */ 4736 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4737 bi[0] = 0; 4738 4739 /* create and initialize a linked list */ 4740 nlnk = N+1; 4741 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4742 4743 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4744 len = 0; 4745 len = ai[owners[rank+1]] - ai[owners[rank]]; 4746 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4747 current_space = free_space; 4748 4749 /* determine symbolic info for each local row */ 4750 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4751 4752 for (k=0; k<merge->nrecv; k++){ 4753 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4754 nrows = *buf_ri_k[k]; 4755 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4756 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4757 } 4758 4759 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4760 len = 0; 4761 for (i=0;i<m;i++) { 4762 bnzi = 0; 4763 /* add local non-zero cols of this proc's seqmat into lnk */ 4764 arow = owners[rank] + i; 4765 anzi = ai[arow+1] - ai[arow]; 4766 aj = a->j + ai[arow]; 4767 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4768 bnzi += nlnk; 4769 /* add received col data into lnk */ 4770 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4771 if (i == *nextrow[k]) { /* i-th row */ 4772 anzi = *(nextai[k]+1) - *nextai[k]; 4773 aj = buf_rj[k] + *nextai[k]; 4774 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4775 bnzi += nlnk; 4776 nextrow[k]++; nextai[k]++; 4777 } 4778 } 4779 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4780 4781 /* if free space is not available, make more free space */ 4782 if (current_space->local_remaining<bnzi) { 4783 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4784 nspacedouble++; 4785 } 4786 /* copy data into free space, then initialize lnk */ 4787 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4788 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4789 4790 current_space->array += bnzi; 4791 current_space->local_used += bnzi; 4792 current_space->local_remaining -= bnzi; 4793 4794 bi[i+1] = bi[i] + bnzi; 4795 } 4796 4797 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4798 4799 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4800 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4801 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4802 4803 /* create symbolic parallel matrix B_mpi */ 4804 /*---------------------------------------*/ 4805 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4806 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4807 if (n==PETSC_DECIDE) { 4808 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4809 } else { 4810 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4811 } 4812 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4813 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4814 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4815 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4816 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4817 4818 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4819 B_mpi->assembled = PETSC_FALSE; 4820 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4821 merge->bi = bi; 4822 merge->bj = bj; 4823 merge->buf_ri = buf_ri; 4824 merge->buf_rj = buf_rj; 4825 merge->coi = PETSC_NULL; 4826 merge->coj = PETSC_NULL; 4827 merge->owners_co = PETSC_NULL; 4828 4829 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4830 4831 /* attach the supporting struct to B_mpi for reuse */ 4832 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4833 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4834 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4835 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4836 *mpimat = B_mpi; 4837 4838 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4839 PetscFunctionReturn(0); 4840 } 4841 4842 #undef __FUNCT__ 4843 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4844 /*@C 4845 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4846 matrices from each processor 4847 4848 Collective on MPI_Comm 4849 4850 Input Parameters: 4851 + comm - the communicators the parallel matrix will live on 4852 . seqmat - the input sequential matrices 4853 . m - number of local rows (or PETSC_DECIDE) 4854 . n - number of local columns (or PETSC_DECIDE) 4855 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4856 4857 Output Parameter: 4858 . mpimat - the parallel matrix generated 4859 4860 Level: advanced 4861 4862 Notes: 4863 The dimensions of the sequential matrix in each processor MUST be the same. 4864 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4865 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4866 @*/ 4867 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4868 { 4869 PetscErrorCode ierr; 4870 PetscMPIInt size; 4871 4872 PetscFunctionBegin; 4873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4874 if (size == 1){ 4875 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4876 if (scall == MAT_INITIAL_MATRIX){ 4877 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4878 } else { 4879 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4880 } 4881 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4882 PetscFunctionReturn(0); 4883 } 4884 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4885 if (scall == MAT_INITIAL_MATRIX){ 4886 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4887 } 4888 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4889 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4890 PetscFunctionReturn(0); 4891 } 4892 4893 #undef __FUNCT__ 4894 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4895 /*@ 4896 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4897 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4898 with MatGetSize() 4899 4900 Not Collective 4901 4902 Input Parameters: 4903 + A - the matrix 4904 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4905 4906 Output Parameter: 4907 . A_loc - the local sequential matrix generated 4908 4909 Level: developer 4910 4911 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4912 4913 @*/ 4914 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4915 { 4916 PetscErrorCode ierr; 4917 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4918 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4919 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4920 MatScalar *aa=a->a,*ba=b->a,*cam; 4921 PetscScalar *ca; 4922 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4923 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4924 PetscBool match; 4925 4926 PetscFunctionBegin; 4927 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4928 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4929 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4930 if (scall == MAT_INITIAL_MATRIX){ 4931 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4932 ci[0] = 0; 4933 for (i=0; i<am; i++){ 4934 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4935 } 4936 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4937 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4938 k = 0; 4939 for (i=0; i<am; i++) { 4940 ncols_o = bi[i+1] - bi[i]; 4941 ncols_d = ai[i+1] - ai[i]; 4942 /* off-diagonal portion of A */ 4943 for (jo=0; jo<ncols_o; jo++) { 4944 col = cmap[*bj]; 4945 if (col >= cstart) break; 4946 cj[k] = col; bj++; 4947 ca[k++] = *ba++; 4948 } 4949 /* diagonal portion of A */ 4950 for (j=0; j<ncols_d; j++) { 4951 cj[k] = cstart + *aj++; 4952 ca[k++] = *aa++; 4953 } 4954 /* off-diagonal portion of A */ 4955 for (j=jo; j<ncols_o; j++) { 4956 cj[k] = cmap[*bj++]; 4957 ca[k++] = *ba++; 4958 } 4959 } 4960 /* put together the new matrix */ 4961 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4962 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4963 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4964 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4965 mat->free_a = PETSC_TRUE; 4966 mat->free_ij = PETSC_TRUE; 4967 mat->nonew = 0; 4968 } else if (scall == MAT_REUSE_MATRIX){ 4969 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4970 ci = mat->i; cj = mat->j; cam = mat->a; 4971 for (i=0; i<am; i++) { 4972 /* off-diagonal portion of A */ 4973 ncols_o = bi[i+1] - bi[i]; 4974 for (jo=0; jo<ncols_o; jo++) { 4975 col = cmap[*bj]; 4976 if (col >= cstart) break; 4977 *cam++ = *ba++; bj++; 4978 } 4979 /* diagonal portion of A */ 4980 ncols_d = ai[i+1] - ai[i]; 4981 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4982 /* off-diagonal portion of A */ 4983 for (j=jo; j<ncols_o; j++) { 4984 *cam++ = *ba++; bj++; 4985 } 4986 } 4987 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4988 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4989 PetscFunctionReturn(0); 4990 } 4991 4992 #undef __FUNCT__ 4993 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4994 /*@C 4995 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4996 4997 Not Collective 4998 4999 Input Parameters: 5000 + A - the matrix 5001 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5002 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5003 5004 Output Parameter: 5005 . A_loc - the local sequential matrix generated 5006 5007 Level: developer 5008 5009 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5010 5011 @*/ 5012 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5013 { 5014 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5015 PetscErrorCode ierr; 5016 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5017 IS isrowa,iscola; 5018 Mat *aloc; 5019 PetscBool match; 5020 5021 PetscFunctionBegin; 5022 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5023 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5024 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5025 if (!row){ 5026 start = A->rmap->rstart; end = A->rmap->rend; 5027 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5028 } else { 5029 isrowa = *row; 5030 } 5031 if (!col){ 5032 start = A->cmap->rstart; 5033 cmap = a->garray; 5034 nzA = a->A->cmap->n; 5035 nzB = a->B->cmap->n; 5036 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5037 ncols = 0; 5038 for (i=0; i<nzB; i++) { 5039 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5040 else break; 5041 } 5042 imark = i; 5043 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5044 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5045 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5046 } else { 5047 iscola = *col; 5048 } 5049 if (scall != MAT_INITIAL_MATRIX){ 5050 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5051 aloc[0] = *A_loc; 5052 } 5053 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5054 *A_loc = aloc[0]; 5055 ierr = PetscFree(aloc);CHKERRQ(ierr); 5056 if (!row){ 5057 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5058 } 5059 if (!col){ 5060 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5061 } 5062 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5063 PetscFunctionReturn(0); 5064 } 5065 5066 #undef __FUNCT__ 5067 #define __FUNCT__ "MatGetBrowsOfAcols" 5068 /*@C 5069 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5070 5071 Collective on Mat 5072 5073 Input Parameters: 5074 + A,B - the matrices in mpiaij format 5075 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5076 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5077 5078 Output Parameter: 5079 + rowb, colb - index sets of rows and columns of B to extract 5080 - B_seq - the sequential matrix generated 5081 5082 Level: developer 5083 5084 @*/ 5085 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5086 { 5087 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5088 PetscErrorCode ierr; 5089 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5090 IS isrowb,iscolb; 5091 Mat *bseq=PETSC_NULL; 5092 5093 PetscFunctionBegin; 5094 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5095 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5096 } 5097 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5098 5099 if (scall == MAT_INITIAL_MATRIX){ 5100 start = A->cmap->rstart; 5101 cmap = a->garray; 5102 nzA = a->A->cmap->n; 5103 nzB = a->B->cmap->n; 5104 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5105 ncols = 0; 5106 for (i=0; i<nzB; i++) { /* row < local row index */ 5107 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5108 else break; 5109 } 5110 imark = i; 5111 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5112 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5113 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5114 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5115 } else { 5116 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5117 isrowb = *rowb; iscolb = *colb; 5118 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5119 bseq[0] = *B_seq; 5120 } 5121 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5122 *B_seq = bseq[0]; 5123 ierr = PetscFree(bseq);CHKERRQ(ierr); 5124 if (!rowb){ 5125 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5126 } else { 5127 *rowb = isrowb; 5128 } 5129 if (!colb){ 5130 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5131 } else { 5132 *colb = iscolb; 5133 } 5134 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5135 PetscFunctionReturn(0); 5136 } 5137 5138 #undef __FUNCT__ 5139 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5140 /* 5141 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5142 of the OFF-DIAGONAL portion of local A 5143 5144 Collective on Mat 5145 5146 Input Parameters: 5147 + A,B - the matrices in mpiaij format 5148 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5149 5150 Output Parameter: 5151 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5152 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5153 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5154 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5155 5156 Level: developer 5157 5158 */ 5159 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5160 { 5161 VecScatter_MPI_General *gen_to,*gen_from; 5162 PetscErrorCode ierr; 5163 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5164 Mat_SeqAIJ *b_oth; 5165 VecScatter ctx=a->Mvctx; 5166 MPI_Comm comm=((PetscObject)ctx)->comm; 5167 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5168 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5169 PetscScalar *rvalues,*svalues; 5170 MatScalar *b_otha,*bufa,*bufA; 5171 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5172 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5173 MPI_Status *sstatus,rstatus; 5174 PetscMPIInt jj; 5175 PetscInt *cols,sbs,rbs; 5176 PetscScalar *vals; 5177 5178 PetscFunctionBegin; 5179 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5180 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5181 } 5182 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5183 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5184 5185 gen_to = (VecScatter_MPI_General*)ctx->todata; 5186 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5187 rvalues = gen_from->values; /* holds the length of receiving row */ 5188 svalues = gen_to->values; /* holds the length of sending row */ 5189 nrecvs = gen_from->n; 5190 nsends = gen_to->n; 5191 5192 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5193 srow = gen_to->indices; /* local row index to be sent */ 5194 sstarts = gen_to->starts; 5195 sprocs = gen_to->procs; 5196 sstatus = gen_to->sstatus; 5197 sbs = gen_to->bs; 5198 rstarts = gen_from->starts; 5199 rprocs = gen_from->procs; 5200 rbs = gen_from->bs; 5201 5202 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5203 if (scall == MAT_INITIAL_MATRIX){ 5204 /* i-array */ 5205 /*---------*/ 5206 /* post receives */ 5207 for (i=0; i<nrecvs; i++){ 5208 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5209 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5210 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5211 } 5212 5213 /* pack the outgoing message */ 5214 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5215 sstartsj[0] = 0; rstartsj[0] = 0; 5216 len = 0; /* total length of j or a array to be sent */ 5217 k = 0; 5218 for (i=0; i<nsends; i++){ 5219 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5220 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5221 for (j=0; j<nrows; j++) { 5222 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5223 for (l=0; l<sbs; l++){ 5224 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5225 rowlen[j*sbs+l] = ncols; 5226 len += ncols; 5227 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5228 } 5229 k++; 5230 } 5231 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5232 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5233 } 5234 /* recvs and sends of i-array are completed */ 5235 i = nrecvs; 5236 while (i--) { 5237 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5238 } 5239 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5240 5241 /* allocate buffers for sending j and a arrays */ 5242 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5243 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5244 5245 /* create i-array of B_oth */ 5246 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5247 b_othi[0] = 0; 5248 len = 0; /* total length of j or a array to be received */ 5249 k = 0; 5250 for (i=0; i<nrecvs; i++){ 5251 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5252 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5253 for (j=0; j<nrows; j++) { 5254 b_othi[k+1] = b_othi[k] + rowlen[j]; 5255 len += rowlen[j]; k++; 5256 } 5257 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5258 } 5259 5260 /* allocate space for j and a arrrays of B_oth */ 5261 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5262 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5263 5264 /* j-array */ 5265 /*---------*/ 5266 /* post receives of j-array */ 5267 for (i=0; i<nrecvs; i++){ 5268 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5269 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5270 } 5271 5272 /* pack the outgoing message j-array */ 5273 k = 0; 5274 for (i=0; i<nsends; i++){ 5275 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5276 bufJ = bufj+sstartsj[i]; 5277 for (j=0; j<nrows; j++) { 5278 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5279 for (ll=0; ll<sbs; ll++){ 5280 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5281 for (l=0; l<ncols; l++){ 5282 *bufJ++ = cols[l]; 5283 } 5284 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5285 } 5286 } 5287 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5288 } 5289 5290 /* recvs and sends of j-array are completed */ 5291 i = nrecvs; 5292 while (i--) { 5293 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5294 } 5295 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5296 } else if (scall == MAT_REUSE_MATRIX){ 5297 sstartsj = *startsj_s; 5298 rstartsj = *startsj_r; 5299 bufa = *bufa_ptr; 5300 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5301 b_otha = b_oth->a; 5302 } else { 5303 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5304 } 5305 5306 /* a-array */ 5307 /*---------*/ 5308 /* post receives of a-array */ 5309 for (i=0; i<nrecvs; i++){ 5310 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5311 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* pack the outgoing message a-array */ 5315 k = 0; 5316 for (i=0; i<nsends; i++){ 5317 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5318 bufA = bufa+sstartsj[i]; 5319 for (j=0; j<nrows; j++) { 5320 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5321 for (ll=0; ll<sbs; ll++){ 5322 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5323 for (l=0; l<ncols; l++){ 5324 *bufA++ = vals[l]; 5325 } 5326 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5327 } 5328 } 5329 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5330 } 5331 /* recvs and sends of a-array are completed */ 5332 i = nrecvs; 5333 while (i--) { 5334 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5335 } 5336 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5337 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5338 5339 if (scall == MAT_INITIAL_MATRIX){ 5340 /* put together the new matrix */ 5341 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5342 5343 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5344 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5345 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5346 b_oth->free_a = PETSC_TRUE; 5347 b_oth->free_ij = PETSC_TRUE; 5348 b_oth->nonew = 0; 5349 5350 ierr = PetscFree(bufj);CHKERRQ(ierr); 5351 if (!startsj_s || !bufa_ptr){ 5352 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5353 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5354 } else { 5355 *startsj_s = sstartsj; 5356 *startsj_r = rstartsj; 5357 *bufa_ptr = bufa; 5358 } 5359 } 5360 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5361 PetscFunctionReturn(0); 5362 } 5363 5364 #undef __FUNCT__ 5365 #define __FUNCT__ "MatGetCommunicationStructs" 5366 /*@C 5367 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5368 5369 Not Collective 5370 5371 Input Parameters: 5372 . A - The matrix in mpiaij format 5373 5374 Output Parameter: 5375 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5376 . colmap - A map from global column index to local index into lvec 5377 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5378 5379 Level: developer 5380 5381 @*/ 5382 #if defined (PETSC_USE_CTABLE) 5383 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5384 #else 5385 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5386 #endif 5387 { 5388 Mat_MPIAIJ *a; 5389 5390 PetscFunctionBegin; 5391 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5392 PetscValidPointer(lvec, 2); 5393 PetscValidPointer(colmap, 3); 5394 PetscValidPointer(multScatter, 4); 5395 a = (Mat_MPIAIJ *) A->data; 5396 if (lvec) *lvec = a->lvec; 5397 if (colmap) *colmap = a->colmap; 5398 if (multScatter) *multScatter = a->Mvctx; 5399 PetscFunctionReturn(0); 5400 } 5401 5402 EXTERN_C_BEGIN 5403 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5404 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5405 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5406 EXTERN_C_END 5407 5408 #undef __FUNCT__ 5409 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5410 /* 5411 Computes (B'*A')' since computing B*A directly is untenable 5412 5413 n p p 5414 ( ) ( ) ( ) 5415 m ( A ) * n ( B ) = m ( C ) 5416 ( ) ( ) ( ) 5417 5418 */ 5419 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5420 { 5421 PetscErrorCode ierr; 5422 Mat At,Bt,Ct; 5423 5424 PetscFunctionBegin; 5425 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5426 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5427 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5428 ierr = MatDestroy(&At);CHKERRQ(ierr); 5429 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5430 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5431 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5432 PetscFunctionReturn(0); 5433 } 5434 5435 #undef __FUNCT__ 5436 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5437 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5438 { 5439 PetscErrorCode ierr; 5440 PetscInt m=A->rmap->n,n=B->cmap->n; 5441 Mat Cmat; 5442 5443 PetscFunctionBegin; 5444 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5445 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5446 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5447 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5448 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5449 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5450 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5451 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5452 *C = Cmat; 5453 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5454 PetscFunctionReturn(0); 5455 } 5456 5457 /* ----------------------------------------------------------------*/ 5458 #undef __FUNCT__ 5459 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5460 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5461 { 5462 PetscErrorCode ierr; 5463 5464 PetscFunctionBegin; 5465 if (scall == MAT_INITIAL_MATRIX){ 5466 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5467 } 5468 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5469 PetscFunctionReturn(0); 5470 } 5471 5472 EXTERN_C_BEGIN 5473 #if defined(PETSC_HAVE_MUMPS) 5474 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5475 #endif 5476 #if defined(PETSC_HAVE_PASTIX) 5477 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5478 #endif 5479 #if defined(PETSC_HAVE_SUPERLU_DIST) 5480 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5481 #endif 5482 #if defined(PETSC_HAVE_SPOOLES) 5483 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5484 #endif 5485 EXTERN_C_END 5486 5487 /*MC 5488 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5489 5490 Options Database Keys: 5491 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5492 5493 Level: beginner 5494 5495 .seealso: MatCreateAIJ() 5496 M*/ 5497 5498 EXTERN_C_BEGIN 5499 #undef __FUNCT__ 5500 #define __FUNCT__ "MatCreate_MPIAIJ" 5501 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5502 { 5503 Mat_MPIAIJ *b; 5504 PetscErrorCode ierr; 5505 PetscMPIInt size; 5506 5507 PetscFunctionBegin; 5508 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5509 5510 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5511 B->data = (void*)b; 5512 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5513 B->assembled = PETSC_FALSE; 5514 5515 B->insertmode = NOT_SET_VALUES; 5516 b->size = size; 5517 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5518 5519 /* build cache for off array entries formed */ 5520 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5521 b->donotstash = PETSC_FALSE; 5522 b->colmap = 0; 5523 b->garray = 0; 5524 b->roworiented = PETSC_TRUE; 5525 5526 /* stuff used for matrix vector multiply */ 5527 b->lvec = PETSC_NULL; 5528 b->Mvctx = PETSC_NULL; 5529 5530 /* stuff for MatGetRow() */ 5531 b->rowindices = 0; 5532 b->rowvalues = 0; 5533 b->getrowactive = PETSC_FALSE; 5534 5535 #if defined(PETSC_HAVE_SPOOLES) 5536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5537 "MatGetFactor_mpiaij_spooles", 5538 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5539 #endif 5540 #if defined(PETSC_HAVE_MUMPS) 5541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5542 "MatGetFactor_aij_mumps", 5543 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5544 #endif 5545 #if defined(PETSC_HAVE_PASTIX) 5546 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5547 "MatGetFactor_mpiaij_pastix", 5548 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5549 #endif 5550 #if defined(PETSC_HAVE_SUPERLU_DIST) 5551 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5552 "MatGetFactor_mpiaij_superlu_dist", 5553 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5554 #endif 5555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5556 "MatStoreValues_MPIAIJ", 5557 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5558 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5559 "MatRetrieveValues_MPIAIJ", 5560 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5562 "MatGetDiagonalBlock_MPIAIJ", 5563 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5564 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5565 "MatIsTranspose_MPIAIJ", 5566 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5568 "MatMPIAIJSetPreallocation_MPIAIJ", 5569 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5571 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5572 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5574 "MatDiagonalScaleLocal_MPIAIJ", 5575 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5577 "MatConvert_MPIAIJ_MPIAIJPERM", 5578 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5580 "MatConvert_MPIAIJ_MPIAIJCRL", 5581 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5582 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5583 "MatConvert_MPIAIJ_MPISBAIJ", 5584 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5586 "MatMatMult_MPIDense_MPIAIJ", 5587 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5589 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5590 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5592 "MatMatMultNumeric_MPIDense_MPIAIJ", 5593 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5595 PetscFunctionReturn(0); 5596 } 5597 EXTERN_C_END 5598 5599 #undef __FUNCT__ 5600 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5601 /*@ 5602 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5603 and "off-diagonal" part of the matrix in CSR format. 5604 5605 Collective on MPI_Comm 5606 5607 Input Parameters: 5608 + comm - MPI communicator 5609 . m - number of local rows (Cannot be PETSC_DECIDE) 5610 . n - This value should be the same as the local size used in creating the 5611 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5612 calculated if N is given) For square matrices n is almost always m. 5613 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5614 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5615 . i - row indices for "diagonal" portion of matrix 5616 . j - column indices 5617 . a - matrix values 5618 . oi - row indices for "off-diagonal" portion of matrix 5619 . oj - column indices 5620 - oa - matrix values 5621 5622 Output Parameter: 5623 . mat - the matrix 5624 5625 Level: advanced 5626 5627 Notes: 5628 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5629 must free the arrays once the matrix has been destroyed and not before. 5630 5631 The i and j indices are 0 based 5632 5633 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5634 5635 This sets local rows and cannot be used to set off-processor values. 5636 5637 You cannot later use MatSetValues() to change values in this matrix. 5638 5639 .keywords: matrix, aij, compressed row, sparse, parallel 5640 5641 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5642 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5643 @*/ 5644 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5645 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5646 { 5647 PetscErrorCode ierr; 5648 Mat_MPIAIJ *maij; 5649 5650 PetscFunctionBegin; 5651 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5652 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5653 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5654 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5655 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5656 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5657 maij = (Mat_MPIAIJ*) (*mat)->data; 5658 maij->donotstash = PETSC_TRUE; 5659 (*mat)->preallocated = PETSC_TRUE; 5660 5661 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5662 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5663 5664 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5665 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5666 5667 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5668 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5669 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5670 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5671 5672 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5673 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5674 PetscFunctionReturn(0); 5675 } 5676 5677 /* 5678 Special version for direct calls from Fortran 5679 */ 5680 #include <petsc-private/fortranimpl.h> 5681 5682 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5683 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5684 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5685 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5686 #endif 5687 5688 /* Change these macros so can be used in void function */ 5689 #undef CHKERRQ 5690 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5691 #undef SETERRQ2 5692 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5693 #undef SETERRQ3 5694 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5695 #undef SETERRQ 5696 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5697 5698 EXTERN_C_BEGIN 5699 #undef __FUNCT__ 5700 #define __FUNCT__ "matsetvaluesmpiaij_" 5701 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5702 { 5703 Mat mat = *mmat; 5704 PetscInt m = *mm, n = *mn; 5705 InsertMode addv = *maddv; 5706 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5707 PetscScalar value; 5708 PetscErrorCode ierr; 5709 5710 MatCheckPreallocated(mat,1); 5711 if (mat->insertmode == NOT_SET_VALUES) { 5712 mat->insertmode = addv; 5713 } 5714 #if defined(PETSC_USE_DEBUG) 5715 else if (mat->insertmode != addv) { 5716 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5717 } 5718 #endif 5719 { 5720 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5721 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5722 PetscBool roworiented = aij->roworiented; 5723 5724 /* Some Variables required in the macro */ 5725 Mat A = aij->A; 5726 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5727 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5728 MatScalar *aa = a->a; 5729 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5730 Mat B = aij->B; 5731 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5732 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5733 MatScalar *ba = b->a; 5734 5735 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5736 PetscInt nonew = a->nonew; 5737 MatScalar *ap1,*ap2; 5738 5739 PetscFunctionBegin; 5740 for (i=0; i<m; i++) { 5741 if (im[i] < 0) continue; 5742 #if defined(PETSC_USE_DEBUG) 5743 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5744 #endif 5745 if (im[i] >= rstart && im[i] < rend) { 5746 row = im[i] - rstart; 5747 lastcol1 = -1; 5748 rp1 = aj + ai[row]; 5749 ap1 = aa + ai[row]; 5750 rmax1 = aimax[row]; 5751 nrow1 = ailen[row]; 5752 low1 = 0; 5753 high1 = nrow1; 5754 lastcol2 = -1; 5755 rp2 = bj + bi[row]; 5756 ap2 = ba + bi[row]; 5757 rmax2 = bimax[row]; 5758 nrow2 = bilen[row]; 5759 low2 = 0; 5760 high2 = nrow2; 5761 5762 for (j=0; j<n; j++) { 5763 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5764 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5765 if (in[j] >= cstart && in[j] < cend){ 5766 col = in[j] - cstart; 5767 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5768 } else if (in[j] < 0) continue; 5769 #if defined(PETSC_USE_DEBUG) 5770 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5771 #endif 5772 else { 5773 if (mat->was_assembled) { 5774 if (!aij->colmap) { 5775 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5776 } 5777 #if defined (PETSC_USE_CTABLE) 5778 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5779 col--; 5780 #else 5781 col = aij->colmap[in[j]] - 1; 5782 #endif 5783 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5784 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5785 col = in[j]; 5786 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5787 B = aij->B; 5788 b = (Mat_SeqAIJ*)B->data; 5789 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5790 rp2 = bj + bi[row]; 5791 ap2 = ba + bi[row]; 5792 rmax2 = bimax[row]; 5793 nrow2 = bilen[row]; 5794 low2 = 0; 5795 high2 = nrow2; 5796 bm = aij->B->rmap->n; 5797 ba = b->a; 5798 } 5799 } else col = in[j]; 5800 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5801 } 5802 } 5803 } else { 5804 if (!aij->donotstash) { 5805 if (roworiented) { 5806 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5807 } else { 5808 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5809 } 5810 } 5811 } 5812 }} 5813 PetscFunctionReturnVoid(); 5814 } 5815 EXTERN_C_END 5816 5817