1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 MatCheckPreallocated(A,1); 1794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1795 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1796 break; 1797 case MAT_ROW_ORIENTED: 1798 a->roworiented = flg; 1799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1800 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1801 break; 1802 case MAT_NEW_DIAGONALS: 1803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1804 break; 1805 case MAT_IGNORE_OFF_PROC_ENTRIES: 1806 a->donotstash = flg; 1807 break; 1808 case MAT_SPD: 1809 A->spd_set = PETSC_TRUE; 1810 A->spd = flg; 1811 if (flg) { 1812 A->symmetric = PETSC_TRUE; 1813 A->structurally_symmetric = PETSC_TRUE; 1814 A->symmetric_set = PETSC_TRUE; 1815 A->structurally_symmetric_set = PETSC_TRUE; 1816 } 1817 break; 1818 case MAT_SYMMETRIC: 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_STRUCTURALLY_SYMMETRIC: 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 break; 1824 case MAT_HERMITIAN: 1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_SYMMETRY_ETERNAL: 1828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1829 break; 1830 default: 1831 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatGetRow_MPIAIJ" 1838 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1839 { 1840 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1841 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1842 PetscErrorCode ierr; 1843 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1844 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1845 PetscInt *cmap,*idx_p; 1846 1847 PetscFunctionBegin; 1848 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1849 mat->getrowactive = PETSC_TRUE; 1850 1851 if (!mat->rowvalues && (idx || v)) { 1852 /* 1853 allocate enough space to hold information from the longest row. 1854 */ 1855 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1856 PetscInt max = 1,tmp; 1857 for (i=0; i<matin->rmap->n; i++) { 1858 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1859 if (max < tmp) { max = tmp; } 1860 } 1861 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1862 } 1863 1864 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1865 lrow = row - rstart; 1866 1867 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1868 if (!v) {pvA = 0; pvB = 0;} 1869 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1870 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1871 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1872 nztot = nzA + nzB; 1873 1874 cmap = mat->garray; 1875 if (v || idx) { 1876 if (nztot) { 1877 /* Sort by increasing column numbers, assuming A and B already sorted */ 1878 PetscInt imark = -1; 1879 if (v) { 1880 *v = v_p = mat->rowvalues; 1881 for (i=0; i<nzB; i++) { 1882 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1883 else break; 1884 } 1885 imark = i; 1886 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1887 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1888 } 1889 if (idx) { 1890 *idx = idx_p = mat->rowindices; 1891 if (imark > -1) { 1892 for (i=0; i<imark; i++) { 1893 idx_p[i] = cmap[cworkB[i]]; 1894 } 1895 } else { 1896 for (i=0; i<nzB; i++) { 1897 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1898 else break; 1899 } 1900 imark = i; 1901 } 1902 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1903 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1904 } 1905 } else { 1906 if (idx) *idx = 0; 1907 if (v) *v = 0; 1908 } 1909 } 1910 *nz = nztot; 1911 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1912 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1918 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1921 1922 PetscFunctionBegin; 1923 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1924 aij->getrowactive = PETSC_FALSE; 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatNorm_MPIAIJ" 1930 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1934 PetscErrorCode ierr; 1935 PetscInt i,j,cstart = mat->cmap->rstart; 1936 PetscReal sum = 0.0; 1937 MatScalar *v; 1938 1939 PetscFunctionBegin; 1940 if (aij->size == 1) { 1941 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1942 } else { 1943 if (type == NORM_FROBENIUS) { 1944 v = amat->a; 1945 for (i=0; i<amat->nz; i++) { 1946 #if defined(PETSC_USE_COMPLEX) 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 #else 1949 sum += (*v)*(*v); v++; 1950 #endif 1951 } 1952 v = bmat->a; 1953 for (i=0; i<bmat->nz; i++) { 1954 #if defined(PETSC_USE_COMPLEX) 1955 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1956 #else 1957 sum += (*v)*(*v); v++; 1958 #endif 1959 } 1960 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1961 *norm = PetscSqrtReal(*norm); 1962 } else if (type == NORM_1) { /* max column norm */ 1963 PetscReal *tmp,*tmp2; 1964 PetscInt *jj,*garray = aij->garray; 1965 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1966 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1967 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1968 *norm = 0.0; 1969 v = amat->a; jj = amat->j; 1970 for (j=0; j<amat->nz; j++) { 1971 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1972 } 1973 v = bmat->a; jj = bmat->j; 1974 for (j=0; j<bmat->nz; j++) { 1975 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1976 } 1977 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1978 for (j=0; j<mat->cmap->N; j++) { 1979 if (tmp2[j] > *norm) *norm = tmp2[j]; 1980 } 1981 ierr = PetscFree(tmp);CHKERRQ(ierr); 1982 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1983 } else if (type == NORM_INFINITY) { /* max row norm */ 1984 PetscReal ntemp = 0.0; 1985 for (j=0; j<aij->A->rmap->n; j++) { 1986 v = amat->a + amat->i[j]; 1987 sum = 0.0; 1988 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1989 sum += PetscAbsScalar(*v); v++; 1990 } 1991 v = bmat->a + bmat->i[j]; 1992 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1993 sum += PetscAbsScalar(*v); v++; 1994 } 1995 if (sum > ntemp) ntemp = sum; 1996 } 1997 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1998 } else { 1999 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2000 } 2001 } 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatTranspose_MPIAIJ" 2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2010 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2011 PetscErrorCode ierr; 2012 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2013 PetscInt cstart=A->cmap->rstart,ncol; 2014 Mat B; 2015 MatScalar *array; 2016 2017 PetscFunctionBegin; 2018 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2019 2020 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2021 ai = Aloc->i; aj = Aloc->j; 2022 bi = Bloc->i; bj = Bloc->j; 2023 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2024 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2025 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2026 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2027 for (i=0; i<ai[ma]; i++){ 2028 d_nnz[aj[i]] ++; 2029 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2030 } 2031 2032 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2033 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2034 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2035 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2036 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2037 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2038 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2039 } else { 2040 B = *matout; 2041 } 2042 2043 /* copy over the A part */ 2044 array = Aloc->a; 2045 row = A->rmap->rstart; 2046 for (i=0; i<ma; i++) { 2047 ncol = ai[i+1]-ai[i]; 2048 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2049 row++; array += ncol; aj += ncol; 2050 } 2051 aj = Aloc->j; 2052 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2053 2054 /* copy over the B part */ 2055 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2056 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2057 array = Bloc->a; 2058 row = A->rmap->rstart; 2059 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2060 cols_tmp = cols; 2061 for (i=0; i<mb; i++) { 2062 ncol = bi[i+1]-bi[i]; 2063 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2064 row++; array += ncol; cols_tmp += ncol; 2065 } 2066 ierr = PetscFree(cols);CHKERRQ(ierr); 2067 2068 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2071 *matout = B; 2072 } else { 2073 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2080 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2081 { 2082 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2083 Mat a = aij->A,b = aij->B; 2084 PetscErrorCode ierr; 2085 PetscInt s1,s2,s3; 2086 2087 PetscFunctionBegin; 2088 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2089 if (rr) { 2090 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2091 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2092 /* Overlap communication with computation. */ 2093 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 } 2095 if (ll) { 2096 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2097 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2098 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2099 } 2100 /* scale the diagonal block */ 2101 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2102 2103 if (rr) { 2104 /* Do a scatter end and then right scale the off-diagonal block */ 2105 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2107 } 2108 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2114 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2115 { 2116 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatEqual_MPIAIJ" 2126 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2127 { 2128 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2129 Mat a,b,c,d; 2130 PetscBool flg; 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 a = matA->A; b = matA->B; 2135 c = matB->A; d = matB->B; 2136 2137 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2138 if (flg) { 2139 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2140 } 2141 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatCopy_MPIAIJ" 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatSetUp_MPIAIJ" 2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2172 { 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2182 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2184 { 2185 PetscInt i,m=Y->rmap->N; 2186 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2187 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2188 const PetscInt *xi = x->i,*yi = y->i; 2189 2190 PetscFunctionBegin; 2191 /* Set the number of nonzeros in the new matrix */ 2192 for(i=0; i<m; i++) { 2193 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2194 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2195 nnz[i] = 0; 2196 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2197 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2198 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2199 nnz[i]++; 2200 } 2201 for (; k<nzy; k++) nnz[i]++; 2202 } 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatAXPY_MPIAIJ" 2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2209 { 2210 PetscErrorCode ierr; 2211 PetscInt i; 2212 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2213 PetscBLASInt bnz,one=1; 2214 Mat_SeqAIJ *x,*y; 2215 2216 PetscFunctionBegin; 2217 if (str == SAME_NONZERO_PATTERN) { 2218 PetscScalar alpha = a; 2219 x = (Mat_SeqAIJ *)xx->A->data; 2220 y = (Mat_SeqAIJ *)yy->A->data; 2221 bnz = PetscBLASIntCast(x->nz); 2222 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2223 x = (Mat_SeqAIJ *)xx->B->data; 2224 y = (Mat_SeqAIJ *)yy->B->data; 2225 bnz = PetscBLASIntCast(x->nz); 2226 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2227 } else if (str == SUBSET_NONZERO_PATTERN) { 2228 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2229 2230 x = (Mat_SeqAIJ *)xx->B->data; 2231 y = (Mat_SeqAIJ *)yy->B->data; 2232 if (y->xtoy && y->XtoY != xx->B) { 2233 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2234 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2235 } 2236 if (!y->xtoy) { /* get xtoy */ 2237 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2238 y->XtoY = xx->B; 2239 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2240 } 2241 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2246 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2247 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2248 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2249 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2250 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,B); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatConjugate_MPIAIJ" 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatRealPart_MPIAIJ" 2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2285 { 2286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2291 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2298 { 2299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2304 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #ifdef PETSC_HAVE_PBGL 2309 2310 #include <boost/parallel/mpi/bsp_process_group.hpp> 2311 #include <boost/graph/distributed/ilu_default_graph.hpp> 2312 #include <boost/graph/distributed/ilu_0_block.hpp> 2313 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2314 #include <boost/graph/distributed/petsc/interface.hpp> 2315 #include <boost/multi_array.hpp> 2316 #include <boost/parallel/distributed_property_map->hpp> 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2320 /* 2321 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2322 */ 2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2324 { 2325 namespace petsc = boost::distributed::petsc; 2326 2327 namespace graph_dist = boost::graph::distributed; 2328 using boost::graph::distributed::ilu_default::process_group_type; 2329 using boost::graph::ilu_permuted; 2330 2331 PetscBool row_identity, col_identity; 2332 PetscContainer c; 2333 PetscInt m, n, M, N; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2338 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2339 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2340 if (!row_identity || !col_identity) { 2341 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2342 } 2343 2344 process_group_type pg; 2345 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2346 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2347 lgraph_type& level_graph = *lgraph_p; 2348 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2349 2350 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2351 ilu_permuted(level_graph); 2352 2353 /* put together the new matrix */ 2354 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2355 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2356 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2357 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2358 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2359 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2360 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2362 2363 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2364 ierr = PetscContainerSetPointer(c, lgraph_p); 2365 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2366 ierr = PetscContainerDestroy(&c); 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2372 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2373 { 2374 PetscFunctionBegin; 2375 PetscFunctionReturn(0); 2376 } 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatSolve_MPIAIJ" 2380 /* 2381 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2382 */ 2383 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2384 { 2385 namespace graph_dist = boost::graph::distributed; 2386 2387 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2388 lgraph_type* lgraph_p; 2389 PetscContainer c; 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2394 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2395 ierr = VecCopy(b, x);CHKERRQ(ierr); 2396 2397 PetscScalar* array_x; 2398 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2399 PetscInt sx; 2400 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2401 2402 PetscScalar* array_b; 2403 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2404 PetscInt sb; 2405 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2406 2407 lgraph_type& level_graph = *lgraph_p; 2408 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2409 2410 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2411 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2412 ref_x(array_x, boost::extents[num_vertices(graph)]); 2413 2414 typedef boost::iterator_property_map<array_ref_type::iterator, 2415 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2416 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2417 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2418 2419 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2420 2421 PetscFunctionReturn(0); 2422 } 2423 #endif 2424 2425 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2426 PetscInt nzlocal,nsends,nrecvs; 2427 PetscMPIInt *send_rank,*recv_rank; 2428 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2429 PetscScalar *sbuf_a,**rbuf_a; 2430 PetscErrorCode (*Destroy)(Mat); 2431 } Mat_Redundant; 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2435 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2436 { 2437 PetscErrorCode ierr; 2438 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2439 PetscInt i; 2440 2441 PetscFunctionBegin; 2442 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2444 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2445 for (i=0; i<redund->nrecvs; i++){ 2446 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2447 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2448 } 2449 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2450 ierr = PetscFree(redund);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatDestroy_MatRedundant" 2456 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2457 { 2458 PetscErrorCode ierr; 2459 PetscContainer container; 2460 Mat_Redundant *redund=PETSC_NULL; 2461 2462 PetscFunctionBegin; 2463 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2464 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2465 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2466 A->ops->destroy = redund->Destroy; 2467 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2468 if (A->ops->destroy) { 2469 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2470 } 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2476 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2477 { 2478 PetscMPIInt rank,size; 2479 MPI_Comm comm=((PetscObject)mat)->comm; 2480 PetscErrorCode ierr; 2481 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2482 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2483 PetscInt *rowrange=mat->rmap->range; 2484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2485 Mat A=aij->A,B=aij->B,C=*matredundant; 2486 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2487 PetscScalar *sbuf_a; 2488 PetscInt nzlocal=a->nz+b->nz; 2489 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2490 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2491 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2492 MatScalar *aworkA,*aworkB; 2493 PetscScalar *vals; 2494 PetscMPIInt tag1,tag2,tag3,imdex; 2495 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2496 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2497 MPI_Status recv_status,*send_status; 2498 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2499 PetscInt **rbuf_j=PETSC_NULL; 2500 PetscScalar **rbuf_a=PETSC_NULL; 2501 Mat_Redundant *redund=PETSC_NULL; 2502 PetscContainer container; 2503 2504 PetscFunctionBegin; 2505 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2506 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2507 2508 if (reuse == MAT_REUSE_MATRIX) { 2509 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2510 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2511 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2512 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2513 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2514 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2515 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2516 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2517 2518 nsends = redund->nsends; 2519 nrecvs = redund->nrecvs; 2520 send_rank = redund->send_rank; 2521 recv_rank = redund->recv_rank; 2522 sbuf_nz = redund->sbuf_nz; 2523 rbuf_nz = redund->rbuf_nz; 2524 sbuf_j = redund->sbuf_j; 2525 sbuf_a = redund->sbuf_a; 2526 rbuf_j = redund->rbuf_j; 2527 rbuf_a = redund->rbuf_a; 2528 } 2529 2530 if (reuse == MAT_INITIAL_MATRIX){ 2531 PetscMPIInt subrank,subsize; 2532 PetscInt nleftover,np_subcomm; 2533 /* get the destination processors' id send_rank, nsends and nrecvs */ 2534 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2535 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2536 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2537 np_subcomm = size/nsubcomm; 2538 nleftover = size - nsubcomm*np_subcomm; 2539 nsends = 0; nrecvs = 0; 2540 for (i=0; i<size; i++){ /* i=rank*/ 2541 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2542 send_rank[nsends] = i; nsends++; 2543 recv_rank[nrecvs++] = i; 2544 } 2545 } 2546 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2547 i = size-nleftover-1; 2548 j = 0; 2549 while (j < nsubcomm - nleftover){ 2550 send_rank[nsends++] = i; 2551 i--; j++; 2552 } 2553 } 2554 2555 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2556 for (i=0; i<nleftover; i++){ 2557 recv_rank[nrecvs++] = size-nleftover+i; 2558 } 2559 } 2560 2561 /* allocate sbuf_j, sbuf_a */ 2562 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2563 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2564 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2565 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2566 2567 /* copy mat's local entries into the buffers */ 2568 if (reuse == MAT_INITIAL_MATRIX){ 2569 rownz_max = 0; 2570 rptr = sbuf_j; 2571 cols = sbuf_j + rend-rstart + 1; 2572 vals = sbuf_a; 2573 rptr[0] = 0; 2574 for (i=0; i<rend-rstart; i++){ 2575 row = i + rstart; 2576 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2577 ncols = nzA + nzB; 2578 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2579 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2580 /* load the column indices for this row into cols */ 2581 lwrite = 0; 2582 for (l=0; l<nzB; l++) { 2583 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2584 vals[lwrite] = aworkB[l]; 2585 cols[lwrite++] = ctmp; 2586 } 2587 } 2588 for (l=0; l<nzA; l++){ 2589 vals[lwrite] = aworkA[l]; 2590 cols[lwrite++] = cstart + cworkA[l]; 2591 } 2592 for (l=0; l<nzB; l++) { 2593 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2594 vals[lwrite] = aworkB[l]; 2595 cols[lwrite++] = ctmp; 2596 } 2597 } 2598 vals += ncols; 2599 cols += ncols; 2600 rptr[i+1] = rptr[i] + ncols; 2601 if (rownz_max < ncols) rownz_max = ncols; 2602 } 2603 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2604 } else { /* only copy matrix values into sbuf_a */ 2605 rptr = sbuf_j; 2606 vals = sbuf_a; 2607 rptr[0] = 0; 2608 for (i=0; i<rend-rstart; i++){ 2609 row = i + rstart; 2610 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2611 ncols = nzA + nzB; 2612 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2613 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2614 lwrite = 0; 2615 for (l=0; l<nzB; l++) { 2616 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2617 } 2618 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2619 for (l=0; l<nzB; l++) { 2620 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2621 } 2622 vals += ncols; 2623 rptr[i+1] = rptr[i] + ncols; 2624 } 2625 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2626 2627 /* send nzlocal to others, and recv other's nzlocal */ 2628 /*--------------------------------------------------*/ 2629 if (reuse == MAT_INITIAL_MATRIX){ 2630 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2631 s_waits2 = s_waits3 + nsends; 2632 s_waits1 = s_waits2 + nsends; 2633 r_waits1 = s_waits1 + nsends; 2634 r_waits2 = r_waits1 + nrecvs; 2635 r_waits3 = r_waits2 + nrecvs; 2636 } else { 2637 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2638 r_waits3 = s_waits3 + nsends; 2639 } 2640 2641 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2642 if (reuse == MAT_INITIAL_MATRIX){ 2643 /* get new tags to keep the communication clean */ 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2645 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2646 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2647 2648 /* post receives of other's nzlocal */ 2649 for (i=0; i<nrecvs; i++){ 2650 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2651 } 2652 /* send nzlocal to others */ 2653 for (i=0; i<nsends; i++){ 2654 sbuf_nz[i] = nzlocal; 2655 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2656 } 2657 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2658 count = nrecvs; 2659 while (count) { 2660 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2661 recv_rank[imdex] = recv_status.MPI_SOURCE; 2662 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2663 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2664 2665 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2666 rbuf_nz[imdex] += i + 2; 2667 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2668 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2669 count--; 2670 } 2671 /* wait on sends of nzlocal */ 2672 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2673 /* send mat->i,j to others, and recv from other's */ 2674 /*------------------------------------------------*/ 2675 for (i=0; i<nsends; i++){ 2676 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2677 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2678 } 2679 /* wait on receives of mat->i,j */ 2680 /*------------------------------*/ 2681 count = nrecvs; 2682 while (count) { 2683 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2684 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2685 count--; 2686 } 2687 /* wait on sends of mat->i,j */ 2688 /*---------------------------*/ 2689 if (nsends) { 2690 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2691 } 2692 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2693 2694 /* post receives, send and receive mat->a */ 2695 /*----------------------------------------*/ 2696 for (imdex=0; imdex<nrecvs; imdex++) { 2697 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2698 } 2699 for (i=0; i<nsends; i++){ 2700 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2701 } 2702 count = nrecvs; 2703 while (count) { 2704 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2705 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2706 count--; 2707 } 2708 if (nsends) { 2709 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2710 } 2711 2712 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2713 2714 /* create redundant matrix */ 2715 /*-------------------------*/ 2716 if (reuse == MAT_INITIAL_MATRIX){ 2717 /* compute rownz_max for preallocation */ 2718 for (imdex=0; imdex<nrecvs; imdex++){ 2719 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2720 rptr = rbuf_j[imdex]; 2721 for (i=0; i<j; i++){ 2722 ncols = rptr[i+1] - rptr[i]; 2723 if (rownz_max < ncols) rownz_max = ncols; 2724 } 2725 } 2726 2727 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2728 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2729 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2730 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2731 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2732 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2733 } else { 2734 C = *matredundant; 2735 } 2736 2737 /* insert local matrix entries */ 2738 rptr = sbuf_j; 2739 cols = sbuf_j + rend-rstart + 1; 2740 vals = sbuf_a; 2741 for (i=0; i<rend-rstart; i++){ 2742 row = i + rstart; 2743 ncols = rptr[i+1] - rptr[i]; 2744 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2745 vals += ncols; 2746 cols += ncols; 2747 } 2748 /* insert received matrix entries */ 2749 for (imdex=0; imdex<nrecvs; imdex++){ 2750 rstart = rowrange[recv_rank[imdex]]; 2751 rend = rowrange[recv_rank[imdex]+1]; 2752 rptr = rbuf_j[imdex]; 2753 cols = rbuf_j[imdex] + rend-rstart + 1; 2754 vals = rbuf_a[imdex]; 2755 for (i=0; i<rend-rstart; i++){ 2756 row = i + rstart; 2757 ncols = rptr[i+1] - rptr[i]; 2758 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2759 vals += ncols; 2760 cols += ncols; 2761 } 2762 } 2763 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2765 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2766 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2767 if (reuse == MAT_INITIAL_MATRIX) { 2768 PetscContainer container; 2769 *matredundant = C; 2770 /* create a supporting struct and attach it to C for reuse */ 2771 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2772 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2773 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2774 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2775 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2776 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2777 2778 redund->nzlocal = nzlocal; 2779 redund->nsends = nsends; 2780 redund->nrecvs = nrecvs; 2781 redund->send_rank = send_rank; 2782 redund->recv_rank = recv_rank; 2783 redund->sbuf_nz = sbuf_nz; 2784 redund->rbuf_nz = rbuf_nz; 2785 redund->sbuf_j = sbuf_j; 2786 redund->sbuf_a = sbuf_a; 2787 redund->rbuf_j = rbuf_j; 2788 redund->rbuf_a = rbuf_a; 2789 2790 redund->Destroy = C->ops->destroy; 2791 C->ops->destroy = MatDestroy_MatRedundant; 2792 } 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2798 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2799 { 2800 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2801 PetscErrorCode ierr; 2802 PetscInt i,*idxb = 0; 2803 PetscScalar *va,*vb; 2804 Vec vtmp; 2805 2806 PetscFunctionBegin; 2807 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2808 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2809 if (idx) { 2810 for (i=0; i<A->rmap->n; i++) { 2811 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2812 } 2813 } 2814 2815 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2816 if (idx) { 2817 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2818 } 2819 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2820 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2821 2822 for (i=0; i<A->rmap->n; i++){ 2823 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2824 va[i] = vb[i]; 2825 if (idx) idx[i] = a->garray[idxb[i]]; 2826 } 2827 } 2828 2829 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2830 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2831 ierr = PetscFree(idxb);CHKERRQ(ierr); 2832 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2838 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2839 { 2840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2841 PetscErrorCode ierr; 2842 PetscInt i,*idxb = 0; 2843 PetscScalar *va,*vb; 2844 Vec vtmp; 2845 2846 PetscFunctionBegin; 2847 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2848 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2849 if (idx) { 2850 for (i=0; i<A->cmap->n; i++) { 2851 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2852 } 2853 } 2854 2855 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2856 if (idx) { 2857 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2858 } 2859 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2860 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2861 2862 for (i=0; i<A->rmap->n; i++){ 2863 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2864 va[i] = vb[i]; 2865 if (idx) idx[i] = a->garray[idxb[i]]; 2866 } 2867 } 2868 2869 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2870 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2871 ierr = PetscFree(idxb);CHKERRQ(ierr); 2872 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2873 PetscFunctionReturn(0); 2874 } 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2878 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2879 { 2880 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2881 PetscInt n = A->rmap->n; 2882 PetscInt cstart = A->cmap->rstart; 2883 PetscInt *cmap = mat->garray; 2884 PetscInt *diagIdx, *offdiagIdx; 2885 Vec diagV, offdiagV; 2886 PetscScalar *a, *diagA, *offdiagA; 2887 PetscInt r; 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2892 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2893 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2894 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2895 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2896 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2897 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2898 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2899 for(r = 0; r < n; ++r) { 2900 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2901 a[r] = diagA[r]; 2902 idx[r] = cstart + diagIdx[r]; 2903 } else { 2904 a[r] = offdiagA[r]; 2905 idx[r] = cmap[offdiagIdx[r]]; 2906 } 2907 } 2908 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2909 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2910 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2911 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2912 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2913 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2914 PetscFunctionReturn(0); 2915 } 2916 2917 #undef __FUNCT__ 2918 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2919 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2920 { 2921 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2922 PetscInt n = A->rmap->n; 2923 PetscInt cstart = A->cmap->rstart; 2924 PetscInt *cmap = mat->garray; 2925 PetscInt *diagIdx, *offdiagIdx; 2926 Vec diagV, offdiagV; 2927 PetscScalar *a, *diagA, *offdiagA; 2928 PetscInt r; 2929 PetscErrorCode ierr; 2930 2931 PetscFunctionBegin; 2932 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2933 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2934 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2935 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2936 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2937 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2938 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2939 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2940 for(r = 0; r < n; ++r) { 2941 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2942 a[r] = diagA[r]; 2943 idx[r] = cstart + diagIdx[r]; 2944 } else { 2945 a[r] = offdiagA[r]; 2946 idx[r] = cmap[offdiagIdx[r]]; 2947 } 2948 } 2949 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2950 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2951 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2952 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2953 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2954 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2960 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2961 { 2962 PetscErrorCode ierr; 2963 Mat *dummy; 2964 2965 PetscFunctionBegin; 2966 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2967 *newmat = *dummy; 2968 ierr = PetscFree(dummy);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2973 2974 #undef __FUNCT__ 2975 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2976 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2977 { 2978 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2979 PetscErrorCode ierr; 2980 2981 PetscFunctionBegin; 2982 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2983 PetscFunctionReturn(0); 2984 } 2985 2986 2987 /* -------------------------------------------------------------------*/ 2988 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2989 MatGetRow_MPIAIJ, 2990 MatRestoreRow_MPIAIJ, 2991 MatMult_MPIAIJ, 2992 /* 4*/ MatMultAdd_MPIAIJ, 2993 MatMultTranspose_MPIAIJ, 2994 MatMultTransposeAdd_MPIAIJ, 2995 #ifdef PETSC_HAVE_PBGL 2996 MatSolve_MPIAIJ, 2997 #else 2998 0, 2999 #endif 3000 0, 3001 0, 3002 /*10*/ 0, 3003 0, 3004 0, 3005 MatSOR_MPIAIJ, 3006 MatTranspose_MPIAIJ, 3007 /*15*/ MatGetInfo_MPIAIJ, 3008 MatEqual_MPIAIJ, 3009 MatGetDiagonal_MPIAIJ, 3010 MatDiagonalScale_MPIAIJ, 3011 MatNorm_MPIAIJ, 3012 /*20*/ MatAssemblyBegin_MPIAIJ, 3013 MatAssemblyEnd_MPIAIJ, 3014 MatSetOption_MPIAIJ, 3015 MatZeroEntries_MPIAIJ, 3016 /*24*/ MatZeroRows_MPIAIJ, 3017 0, 3018 #ifdef PETSC_HAVE_PBGL 3019 0, 3020 #else 3021 0, 3022 #endif 3023 0, 3024 0, 3025 /*29*/ MatSetUp_MPIAIJ, 3026 #ifdef PETSC_HAVE_PBGL 3027 0, 3028 #else 3029 0, 3030 #endif 3031 0, 3032 0, 3033 0, 3034 /*34*/ MatDuplicate_MPIAIJ, 3035 0, 3036 0, 3037 0, 3038 0, 3039 /*39*/ MatAXPY_MPIAIJ, 3040 MatGetSubMatrices_MPIAIJ, 3041 MatIncreaseOverlap_MPIAIJ, 3042 MatGetValues_MPIAIJ, 3043 MatCopy_MPIAIJ, 3044 /*44*/ MatGetRowMax_MPIAIJ, 3045 MatScale_MPIAIJ, 3046 0, 3047 0, 3048 MatZeroRowsColumns_MPIAIJ, 3049 /*49*/ 0, 3050 0, 3051 0, 3052 0, 3053 0, 3054 /*54*/ MatFDColoringCreate_MPIAIJ, 3055 0, 3056 MatSetUnfactored_MPIAIJ, 3057 0, /* MatPermute_MPIAIJ, impl currently broken */ 3058 0, 3059 /*59*/ MatGetSubMatrix_MPIAIJ, 3060 MatDestroy_MPIAIJ, 3061 MatView_MPIAIJ, 3062 0, 3063 0, 3064 /*64*/ 0, 3065 0, 3066 0, 3067 0, 3068 0, 3069 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3070 MatGetRowMinAbs_MPIAIJ, 3071 0, 3072 MatSetColoring_MPIAIJ, 3073 #if defined(PETSC_HAVE_ADIC) 3074 MatSetValuesAdic_MPIAIJ, 3075 #else 3076 0, 3077 #endif 3078 MatSetValuesAdifor_MPIAIJ, 3079 /*75*/ MatFDColoringApply_AIJ, 3080 0, 3081 0, 3082 0, 3083 0, 3084 /*80*/ 0, 3085 0, 3086 0, 3087 /*83*/ MatLoad_MPIAIJ, 3088 0, 3089 0, 3090 0, 3091 0, 3092 0, 3093 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3094 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3095 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3096 MatPtAP_Basic, 3097 MatPtAPSymbolic_MPIAIJ, 3098 /*94*/ MatPtAPNumeric_MPIAIJ, 3099 0, 3100 0, 3101 0, 3102 0, 3103 /*99*/ 0, 3104 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3105 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3106 MatConjugate_MPIAIJ, 3107 0, 3108 /*104*/MatSetValuesRow_MPIAIJ, 3109 MatRealPart_MPIAIJ, 3110 MatImaginaryPart_MPIAIJ, 3111 0, 3112 0, 3113 /*109*/0, 3114 MatGetRedundantMatrix_MPIAIJ, 3115 MatGetRowMin_MPIAIJ, 3116 0, 3117 0, 3118 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3119 0, 3120 0, 3121 0, 3122 0, 3123 /*119*/0, 3124 0, 3125 0, 3126 0, 3127 MatGetMultiProcBlock_MPIAIJ, 3128 /*124*/MatFindNonZeroRows_MPIAIJ, 3129 MatGetColumnNorms_MPIAIJ, 3130 MatInvertBlockDiagonal_MPIAIJ, 3131 0, 3132 MatGetSubMatricesParallel_MPIAIJ, 3133 /*129*/0, 3134 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3135 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3136 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3137 0, 3138 /*134*/0, 3139 0, 3140 0, 3141 0, 3142 0 3143 }; 3144 3145 /* ----------------------------------------------------------------------------------------*/ 3146 3147 EXTERN_C_BEGIN 3148 #undef __FUNCT__ 3149 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3150 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3151 { 3152 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3157 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3158 PetscFunctionReturn(0); 3159 } 3160 EXTERN_C_END 3161 3162 EXTERN_C_BEGIN 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3165 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3166 { 3167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3168 PetscErrorCode ierr; 3169 3170 PetscFunctionBegin; 3171 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3172 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 EXTERN_C_END 3176 3177 EXTERN_C_BEGIN 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3180 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3181 { 3182 Mat_MPIAIJ *b; 3183 PetscErrorCode ierr; 3184 PetscInt i; 3185 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3186 3187 PetscFunctionBegin; 3188 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3189 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3190 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3191 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3192 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3193 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3194 3195 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3196 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3197 if (d_nnz) { 3198 for (i=0; i<B->rmap->n; i++) { 3199 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3200 } 3201 } 3202 if (o_nnz) { 3203 for (i=0; i<B->rmap->n; i++) { 3204 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3205 } 3206 } 3207 b = (Mat_MPIAIJ*)B->data; 3208 3209 if (!B->preallocated) { 3210 /* Explicitly create 2 MATSEQAIJ matrices. */ 3211 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3212 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3213 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3214 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3215 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3216 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3217 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3218 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3219 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3220 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3221 } 3222 3223 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3224 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3225 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3226 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3227 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3228 B->preallocated = PETSC_TRUE; 3229 PetscFunctionReturn(0); 3230 } 3231 EXTERN_C_END 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3235 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3236 { 3237 Mat mat; 3238 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3239 PetscErrorCode ierr; 3240 3241 PetscFunctionBegin; 3242 *newmat = 0; 3243 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3244 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3245 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3246 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3247 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3248 a = (Mat_MPIAIJ*)mat->data; 3249 3250 mat->factortype = matin->factortype; 3251 mat->rmap->bs = matin->rmap->bs; 3252 mat->cmap->bs = matin->cmap->bs; 3253 mat->assembled = PETSC_TRUE; 3254 mat->insertmode = NOT_SET_VALUES; 3255 mat->preallocated = PETSC_TRUE; 3256 3257 a->size = oldmat->size; 3258 a->rank = oldmat->rank; 3259 a->donotstash = oldmat->donotstash; 3260 a->roworiented = oldmat->roworiented; 3261 a->rowindices = 0; 3262 a->rowvalues = 0; 3263 a->getrowactive = PETSC_FALSE; 3264 3265 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3266 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3267 3268 if (oldmat->colmap) { 3269 #if defined (PETSC_USE_CTABLE) 3270 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3271 #else 3272 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3273 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3274 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3275 #endif 3276 } else a->colmap = 0; 3277 if (oldmat->garray) { 3278 PetscInt len; 3279 len = oldmat->B->cmap->n; 3280 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3281 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3282 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3283 } else a->garray = 0; 3284 3285 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3286 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3287 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3288 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3289 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3290 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3291 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3292 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3293 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3294 *newmat = mat; 3295 PetscFunctionReturn(0); 3296 } 3297 3298 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "MatLoad_MPIAIJ" 3302 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3303 { 3304 PetscScalar *vals,*svals; 3305 MPI_Comm comm = ((PetscObject)viewer)->comm; 3306 PetscErrorCode ierr; 3307 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3308 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3309 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3310 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3311 PetscInt cend,cstart,n,*rowners,sizesset=1; 3312 int fd; 3313 3314 PetscFunctionBegin; 3315 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3316 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3317 if (!rank) { 3318 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3319 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3320 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3321 } 3322 3323 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3324 3325 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3326 M = header[1]; N = header[2]; 3327 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3328 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3329 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3330 3331 /* If global sizes are set, check if they are consistent with that given in the file */ 3332 if (sizesset) { 3333 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3334 } 3335 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3336 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3337 3338 /* determine ownership of all rows */ 3339 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3340 else m = newMat->rmap->n; /* Set by user */ 3341 3342 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3343 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3344 3345 /* First process needs enough room for process with most rows */ 3346 if (!rank) { 3347 mmax = rowners[1]; 3348 for (i=2; i<size; i++) { 3349 mmax = PetscMax(mmax,rowners[i]); 3350 } 3351 } else mmax = m; 3352 3353 rowners[0] = 0; 3354 for (i=2; i<=size; i++) { 3355 rowners[i] += rowners[i-1]; 3356 } 3357 rstart = rowners[rank]; 3358 rend = rowners[rank+1]; 3359 3360 /* distribute row lengths to all processors */ 3361 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3362 if (!rank) { 3363 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3364 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3365 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3366 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3367 for (j=0; j<m; j++) { 3368 procsnz[0] += ourlens[j]; 3369 } 3370 for (i=1; i<size; i++) { 3371 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3372 /* calculate the number of nonzeros on each processor */ 3373 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3374 procsnz[i] += rowlengths[j]; 3375 } 3376 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3377 } 3378 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3379 } else { 3380 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3381 } 3382 3383 if (!rank) { 3384 /* determine max buffer needed and allocate it */ 3385 maxnz = 0; 3386 for (i=0; i<size; i++) { 3387 maxnz = PetscMax(maxnz,procsnz[i]); 3388 } 3389 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3390 3391 /* read in my part of the matrix column indices */ 3392 nz = procsnz[0]; 3393 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3394 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3395 3396 /* read in every one elses and ship off */ 3397 for (i=1; i<size; i++) { 3398 nz = procsnz[i]; 3399 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3400 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3401 } 3402 ierr = PetscFree(cols);CHKERRQ(ierr); 3403 } else { 3404 /* determine buffer space needed for message */ 3405 nz = 0; 3406 for (i=0; i<m; i++) { 3407 nz += ourlens[i]; 3408 } 3409 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3410 3411 /* receive message of column indices*/ 3412 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3413 } 3414 3415 /* determine column ownership if matrix is not square */ 3416 if (N != M) { 3417 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3418 else n = newMat->cmap->n; 3419 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3420 cstart = cend - n; 3421 } else { 3422 cstart = rstart; 3423 cend = rend; 3424 n = cend - cstart; 3425 } 3426 3427 /* loop over local rows, determining number of off diagonal entries */ 3428 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3429 jj = 0; 3430 for (i=0; i<m; i++) { 3431 for (j=0; j<ourlens[i]; j++) { 3432 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3433 jj++; 3434 } 3435 } 3436 3437 for (i=0; i<m; i++) { 3438 ourlens[i] -= offlens[i]; 3439 } 3440 if (!sizesset) { 3441 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3442 } 3443 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3444 3445 for (i=0; i<m; i++) { 3446 ourlens[i] += offlens[i]; 3447 } 3448 3449 if (!rank) { 3450 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3451 3452 /* read in my part of the matrix numerical values */ 3453 nz = procsnz[0]; 3454 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3455 3456 /* insert into matrix */ 3457 jj = rstart; 3458 smycols = mycols; 3459 svals = vals; 3460 for (i=0; i<m; i++) { 3461 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3462 smycols += ourlens[i]; 3463 svals += ourlens[i]; 3464 jj++; 3465 } 3466 3467 /* read in other processors and ship out */ 3468 for (i=1; i<size; i++) { 3469 nz = procsnz[i]; 3470 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3471 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3472 } 3473 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3474 } else { 3475 /* receive numeric values */ 3476 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3477 3478 /* receive message of values*/ 3479 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3480 3481 /* insert into matrix */ 3482 jj = rstart; 3483 smycols = mycols; 3484 svals = vals; 3485 for (i=0; i<m; i++) { 3486 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3487 smycols += ourlens[i]; 3488 svals += ourlens[i]; 3489 jj++; 3490 } 3491 } 3492 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3493 ierr = PetscFree(vals);CHKERRQ(ierr); 3494 ierr = PetscFree(mycols);CHKERRQ(ierr); 3495 ierr = PetscFree(rowners);CHKERRQ(ierr); 3496 3497 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3498 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3499 PetscFunctionReturn(0); 3500 } 3501 3502 #undef __FUNCT__ 3503 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3504 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3505 { 3506 PetscErrorCode ierr; 3507 IS iscol_local; 3508 PetscInt csize; 3509 3510 PetscFunctionBegin; 3511 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3512 if (call == MAT_REUSE_MATRIX) { 3513 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3514 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3515 } else { 3516 PetscInt cbs; 3517 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3518 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3519 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3520 } 3521 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3522 if (call == MAT_INITIAL_MATRIX) { 3523 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3524 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3525 } 3526 PetscFunctionReturn(0); 3527 } 3528 3529 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3532 /* 3533 Not great since it makes two copies of the submatrix, first an SeqAIJ 3534 in local and then by concatenating the local matrices the end result. 3535 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3536 3537 Note: This requires a sequential iscol with all indices. 3538 */ 3539 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3540 { 3541 PetscErrorCode ierr; 3542 PetscMPIInt rank,size; 3543 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3544 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3545 PetscBool allcolumns, colflag; 3546 Mat M,Mreuse; 3547 MatScalar *vwork,*aa; 3548 MPI_Comm comm = ((PetscObject)mat)->comm; 3549 Mat_SeqAIJ *aij; 3550 3551 3552 PetscFunctionBegin; 3553 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3554 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3555 3556 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3557 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3558 if (colflag && ncol == mat->cmap->N){ 3559 allcolumns = PETSC_TRUE; 3560 } else { 3561 allcolumns = PETSC_FALSE; 3562 } 3563 if (call == MAT_REUSE_MATRIX) { 3564 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3565 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3566 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3567 } else { 3568 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3569 } 3570 3571 /* 3572 m - number of local rows 3573 n - number of columns (same on all processors) 3574 rstart - first row in new global matrix generated 3575 */ 3576 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3577 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3578 if (call == MAT_INITIAL_MATRIX) { 3579 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3580 ii = aij->i; 3581 jj = aij->j; 3582 3583 /* 3584 Determine the number of non-zeros in the diagonal and off-diagonal 3585 portions of the matrix in order to do correct preallocation 3586 */ 3587 3588 /* first get start and end of "diagonal" columns */ 3589 if (csize == PETSC_DECIDE) { 3590 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3591 if (mglobal == n) { /* square matrix */ 3592 nlocal = m; 3593 } else { 3594 nlocal = n/size + ((n % size) > rank); 3595 } 3596 } else { 3597 nlocal = csize; 3598 } 3599 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3600 rstart = rend - nlocal; 3601 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3602 3603 /* next, compute all the lengths */ 3604 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3605 olens = dlens + m; 3606 for (i=0; i<m; i++) { 3607 jend = ii[i+1] - ii[i]; 3608 olen = 0; 3609 dlen = 0; 3610 for (j=0; j<jend; j++) { 3611 if (*jj < rstart || *jj >= rend) olen++; 3612 else dlen++; 3613 jj++; 3614 } 3615 olens[i] = olen; 3616 dlens[i] = dlen; 3617 } 3618 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3619 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3620 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3621 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3622 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3623 ierr = PetscFree(dlens);CHKERRQ(ierr); 3624 } else { 3625 PetscInt ml,nl; 3626 3627 M = *newmat; 3628 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3629 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3630 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3631 /* 3632 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3633 rather than the slower MatSetValues(). 3634 */ 3635 M->was_assembled = PETSC_TRUE; 3636 M->assembled = PETSC_FALSE; 3637 } 3638 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3639 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3640 ii = aij->i; 3641 jj = aij->j; 3642 aa = aij->a; 3643 for (i=0; i<m; i++) { 3644 row = rstart + i; 3645 nz = ii[i+1] - ii[i]; 3646 cwork = jj; jj += nz; 3647 vwork = aa; aa += nz; 3648 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3649 } 3650 3651 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3652 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3653 *newmat = M; 3654 3655 /* save submatrix used in processor for next request */ 3656 if (call == MAT_INITIAL_MATRIX) { 3657 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3658 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3659 } 3660 3661 PetscFunctionReturn(0); 3662 } 3663 3664 EXTERN_C_BEGIN 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3667 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3668 { 3669 PetscInt m,cstart, cend,j,nnz,i,d; 3670 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3671 const PetscInt *JJ; 3672 PetscScalar *values; 3673 PetscErrorCode ierr; 3674 3675 PetscFunctionBegin; 3676 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3677 3678 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3679 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3680 m = B->rmap->n; 3681 cstart = B->cmap->rstart; 3682 cend = B->cmap->rend; 3683 rstart = B->rmap->rstart; 3684 3685 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3686 3687 #if defined(PETSC_USE_DEBUGGING) 3688 for (i=0; i<m; i++) { 3689 nnz = Ii[i+1]- Ii[i]; 3690 JJ = J + Ii[i]; 3691 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3692 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3693 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3694 } 3695 #endif 3696 3697 for (i=0; i<m; i++) { 3698 nnz = Ii[i+1]- Ii[i]; 3699 JJ = J + Ii[i]; 3700 nnz_max = PetscMax(nnz_max,nnz); 3701 d = 0; 3702 for (j=0; j<nnz; j++) { 3703 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3704 } 3705 d_nnz[i] = d; 3706 o_nnz[i] = nnz - d; 3707 } 3708 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3709 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3710 3711 if (v) values = (PetscScalar*)v; 3712 else { 3713 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3714 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3715 } 3716 3717 for (i=0; i<m; i++) { 3718 ii = i + rstart; 3719 nnz = Ii[i+1]- Ii[i]; 3720 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3721 } 3722 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3723 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3724 3725 if (!v) { 3726 ierr = PetscFree(values);CHKERRQ(ierr); 3727 } 3728 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3729 PetscFunctionReturn(0); 3730 } 3731 EXTERN_C_END 3732 3733 #undef __FUNCT__ 3734 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3735 /*@ 3736 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3737 (the default parallel PETSc format). 3738 3739 Collective on MPI_Comm 3740 3741 Input Parameters: 3742 + B - the matrix 3743 . i - the indices into j for the start of each local row (starts with zero) 3744 . j - the column indices for each local row (starts with zero) 3745 - v - optional values in the matrix 3746 3747 Level: developer 3748 3749 Notes: 3750 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3751 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3752 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3753 3754 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3755 3756 The format which is used for the sparse matrix input, is equivalent to a 3757 row-major ordering.. i.e for the following matrix, the input data expected is 3758 as shown: 3759 3760 1 0 0 3761 2 0 3 P0 3762 ------- 3763 4 5 6 P1 3764 3765 Process0 [P0]: rows_owned=[0,1] 3766 i = {0,1,3} [size = nrow+1 = 2+1] 3767 j = {0,0,2} [size = nz = 6] 3768 v = {1,2,3} [size = nz = 6] 3769 3770 Process1 [P1]: rows_owned=[2] 3771 i = {0,3} [size = nrow+1 = 1+1] 3772 j = {0,1,2} [size = nz = 6] 3773 v = {4,5,6} [size = nz = 6] 3774 3775 .keywords: matrix, aij, compressed row, sparse, parallel 3776 3777 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3778 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3779 @*/ 3780 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3781 { 3782 PetscErrorCode ierr; 3783 3784 PetscFunctionBegin; 3785 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3786 PetscFunctionReturn(0); 3787 } 3788 3789 #undef __FUNCT__ 3790 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3791 /*@C 3792 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3793 (the default parallel PETSc format). For good matrix assembly performance 3794 the user should preallocate the matrix storage by setting the parameters 3795 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3796 performance can be increased by more than a factor of 50. 3797 3798 Collective on MPI_Comm 3799 3800 Input Parameters: 3801 + A - the matrix 3802 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3803 (same value is used for all local rows) 3804 . d_nnz - array containing the number of nonzeros in the various rows of the 3805 DIAGONAL portion of the local submatrix (possibly different for each row) 3806 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3807 The size of this array is equal to the number of local rows, i.e 'm'. 3808 For matrices that will be factored, you must leave room for (and set) 3809 the diagonal entry even if it is zero. 3810 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3811 submatrix (same value is used for all local rows). 3812 - o_nnz - array containing the number of nonzeros in the various rows of the 3813 OFF-DIAGONAL portion of the local submatrix (possibly different for 3814 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3815 structure. The size of this array is equal to the number 3816 of local rows, i.e 'm'. 3817 3818 If the *_nnz parameter is given then the *_nz parameter is ignored 3819 3820 The AIJ format (also called the Yale sparse matrix format or 3821 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3822 storage. The stored row and column indices begin with zero. 3823 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3824 3825 The parallel matrix is partitioned such that the first m0 rows belong to 3826 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3827 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3828 3829 The DIAGONAL portion of the local submatrix of a processor can be defined 3830 as the submatrix which is obtained by extraction the part corresponding to 3831 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3832 first row that belongs to the processor, r2 is the last row belonging to 3833 the this processor, and c1-c2 is range of indices of the local part of a 3834 vector suitable for applying the matrix to. This is an mxn matrix. In the 3835 common case of a square matrix, the row and column ranges are the same and 3836 the DIAGONAL part is also square. The remaining portion of the local 3837 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3838 3839 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3840 3841 You can call MatGetInfo() to get information on how effective the preallocation was; 3842 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3843 You can also run with the option -info and look for messages with the string 3844 malloc in them to see if additional memory allocation was needed. 3845 3846 Example usage: 3847 3848 Consider the following 8x8 matrix with 34 non-zero values, that is 3849 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3850 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3851 as follows: 3852 3853 .vb 3854 1 2 0 | 0 3 0 | 0 4 3855 Proc0 0 5 6 | 7 0 0 | 8 0 3856 9 0 10 | 11 0 0 | 12 0 3857 ------------------------------------- 3858 13 0 14 | 15 16 17 | 0 0 3859 Proc1 0 18 0 | 19 20 21 | 0 0 3860 0 0 0 | 22 23 0 | 24 0 3861 ------------------------------------- 3862 Proc2 25 26 27 | 0 0 28 | 29 0 3863 30 0 0 | 31 32 33 | 0 34 3864 .ve 3865 3866 This can be represented as a collection of submatrices as: 3867 3868 .vb 3869 A B C 3870 D E F 3871 G H I 3872 .ve 3873 3874 Where the submatrices A,B,C are owned by proc0, D,E,F are 3875 owned by proc1, G,H,I are owned by proc2. 3876 3877 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3878 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3879 The 'M','N' parameters are 8,8, and have the same values on all procs. 3880 3881 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3882 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3883 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3884 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3885 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3886 matrix, ans [DF] as another SeqAIJ matrix. 3887 3888 When d_nz, o_nz parameters are specified, d_nz storage elements are 3889 allocated for every row of the local diagonal submatrix, and o_nz 3890 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3891 One way to choose d_nz and o_nz is to use the max nonzerors per local 3892 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3893 In this case, the values of d_nz,o_nz are: 3894 .vb 3895 proc0 : dnz = 2, o_nz = 2 3896 proc1 : dnz = 3, o_nz = 2 3897 proc2 : dnz = 1, o_nz = 4 3898 .ve 3899 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3900 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3901 for proc3. i.e we are using 12+15+10=37 storage locations to store 3902 34 values. 3903 3904 When d_nnz, o_nnz parameters are specified, the storage is specified 3905 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3906 In the above case the values for d_nnz,o_nnz are: 3907 .vb 3908 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3909 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3910 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3911 .ve 3912 Here the space allocated is sum of all the above values i.e 34, and 3913 hence pre-allocation is perfect. 3914 3915 Level: intermediate 3916 3917 .keywords: matrix, aij, compressed row, sparse, parallel 3918 3919 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3920 MPIAIJ, MatGetInfo() 3921 @*/ 3922 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3923 { 3924 PetscErrorCode ierr; 3925 3926 PetscFunctionBegin; 3927 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3928 PetscValidType(B,1); 3929 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3930 PetscFunctionReturn(0); 3931 } 3932 3933 #undef __FUNCT__ 3934 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3935 /*@ 3936 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3937 CSR format the local rows. 3938 3939 Collective on MPI_Comm 3940 3941 Input Parameters: 3942 + comm - MPI communicator 3943 . m - number of local rows (Cannot be PETSC_DECIDE) 3944 . n - This value should be the same as the local size used in creating the 3945 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3946 calculated if N is given) For square matrices n is almost always m. 3947 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3948 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3949 . i - row indices 3950 . j - column indices 3951 - a - matrix values 3952 3953 Output Parameter: 3954 . mat - the matrix 3955 3956 Level: intermediate 3957 3958 Notes: 3959 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3960 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3961 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3962 3963 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3964 3965 The format which is used for the sparse matrix input, is equivalent to a 3966 row-major ordering.. i.e for the following matrix, the input data expected is 3967 as shown: 3968 3969 1 0 0 3970 2 0 3 P0 3971 ------- 3972 4 5 6 P1 3973 3974 Process0 [P0]: rows_owned=[0,1] 3975 i = {0,1,3} [size = nrow+1 = 2+1] 3976 j = {0,0,2} [size = nz = 6] 3977 v = {1,2,3} [size = nz = 6] 3978 3979 Process1 [P1]: rows_owned=[2] 3980 i = {0,3} [size = nrow+1 = 1+1] 3981 j = {0,1,2} [size = nz = 6] 3982 v = {4,5,6} [size = nz = 6] 3983 3984 .keywords: matrix, aij, compressed row, sparse, parallel 3985 3986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3987 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3988 @*/ 3989 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3990 { 3991 PetscErrorCode ierr; 3992 3993 PetscFunctionBegin; 3994 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3995 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3996 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3997 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3998 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 3999 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4000 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4001 PetscFunctionReturn(0); 4002 } 4003 4004 #undef __FUNCT__ 4005 #define __FUNCT__ "MatCreateAIJ" 4006 /*@C 4007 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4008 (the default parallel PETSc format). For good matrix assembly performance 4009 the user should preallocate the matrix storage by setting the parameters 4010 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4011 performance can be increased by more than a factor of 50. 4012 4013 Collective on MPI_Comm 4014 4015 Input Parameters: 4016 + comm - MPI communicator 4017 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4018 This value should be the same as the local size used in creating the 4019 y vector for the matrix-vector product y = Ax. 4020 . n - This value should be the same as the local size used in creating the 4021 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4022 calculated if N is given) For square matrices n is almost always m. 4023 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4024 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4025 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4026 (same value is used for all local rows) 4027 . d_nnz - array containing the number of nonzeros in the various rows of the 4028 DIAGONAL portion of the local submatrix (possibly different for each row) 4029 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4030 The size of this array is equal to the number of local rows, i.e 'm'. 4031 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4032 submatrix (same value is used for all local rows). 4033 - o_nnz - array containing the number of nonzeros in the various rows of the 4034 OFF-DIAGONAL portion of the local submatrix (possibly different for 4035 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4036 structure. The size of this array is equal to the number 4037 of local rows, i.e 'm'. 4038 4039 Output Parameter: 4040 . A - the matrix 4041 4042 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4043 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4044 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4045 4046 Notes: 4047 If the *_nnz parameter is given then the *_nz parameter is ignored 4048 4049 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4050 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4051 storage requirements for this matrix. 4052 4053 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4054 processor than it must be used on all processors that share the object for 4055 that argument. 4056 4057 The user MUST specify either the local or global matrix dimensions 4058 (possibly both). 4059 4060 The parallel matrix is partitioned across processors such that the 4061 first m0 rows belong to process 0, the next m1 rows belong to 4062 process 1, the next m2 rows belong to process 2 etc.. where 4063 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4064 values corresponding to [m x N] submatrix. 4065 4066 The columns are logically partitioned with the n0 columns belonging 4067 to 0th partition, the next n1 columns belonging to the next 4068 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4069 4070 The DIAGONAL portion of the local submatrix on any given processor 4071 is the submatrix corresponding to the rows and columns m,n 4072 corresponding to the given processor. i.e diagonal matrix on 4073 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4074 etc. The remaining portion of the local submatrix [m x (N-n)] 4075 constitute the OFF-DIAGONAL portion. The example below better 4076 illustrates this concept. 4077 4078 For a square global matrix we define each processor's diagonal portion 4079 to be its local rows and the corresponding columns (a square submatrix); 4080 each processor's off-diagonal portion encompasses the remainder of the 4081 local matrix (a rectangular submatrix). 4082 4083 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4084 4085 When calling this routine with a single process communicator, a matrix of 4086 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4087 type of communicator, use the construction mechanism: 4088 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4089 4090 By default, this format uses inodes (identical nodes) when possible. 4091 We search for consecutive rows with the same nonzero structure, thereby 4092 reusing matrix information to achieve increased efficiency. 4093 4094 Options Database Keys: 4095 + -mat_no_inode - Do not use inodes 4096 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4097 - -mat_aij_oneindex - Internally use indexing starting at 1 4098 rather than 0. Note that when calling MatSetValues(), 4099 the user still MUST index entries starting at 0! 4100 4101 4102 Example usage: 4103 4104 Consider the following 8x8 matrix with 34 non-zero values, that is 4105 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4106 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4107 as follows: 4108 4109 .vb 4110 1 2 0 | 0 3 0 | 0 4 4111 Proc0 0 5 6 | 7 0 0 | 8 0 4112 9 0 10 | 11 0 0 | 12 0 4113 ------------------------------------- 4114 13 0 14 | 15 16 17 | 0 0 4115 Proc1 0 18 0 | 19 20 21 | 0 0 4116 0 0 0 | 22 23 0 | 24 0 4117 ------------------------------------- 4118 Proc2 25 26 27 | 0 0 28 | 29 0 4119 30 0 0 | 31 32 33 | 0 34 4120 .ve 4121 4122 This can be represented as a collection of submatrices as: 4123 4124 .vb 4125 A B C 4126 D E F 4127 G H I 4128 .ve 4129 4130 Where the submatrices A,B,C are owned by proc0, D,E,F are 4131 owned by proc1, G,H,I are owned by proc2. 4132 4133 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4134 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4135 The 'M','N' parameters are 8,8, and have the same values on all procs. 4136 4137 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4138 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4139 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4140 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4141 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4142 matrix, ans [DF] as another SeqAIJ matrix. 4143 4144 When d_nz, o_nz parameters are specified, d_nz storage elements are 4145 allocated for every row of the local diagonal submatrix, and o_nz 4146 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4147 One way to choose d_nz and o_nz is to use the max nonzerors per local 4148 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4149 In this case, the values of d_nz,o_nz are: 4150 .vb 4151 proc0 : dnz = 2, o_nz = 2 4152 proc1 : dnz = 3, o_nz = 2 4153 proc2 : dnz = 1, o_nz = 4 4154 .ve 4155 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4156 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4157 for proc3. i.e we are using 12+15+10=37 storage locations to store 4158 34 values. 4159 4160 When d_nnz, o_nnz parameters are specified, the storage is specified 4161 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4162 In the above case the values for d_nnz,o_nnz are: 4163 .vb 4164 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4165 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4166 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4167 .ve 4168 Here the space allocated is sum of all the above values i.e 34, and 4169 hence pre-allocation is perfect. 4170 4171 Level: intermediate 4172 4173 .keywords: matrix, aij, compressed row, sparse, parallel 4174 4175 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4176 MPIAIJ, MatCreateMPIAIJWithArrays() 4177 @*/ 4178 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4179 { 4180 PetscErrorCode ierr; 4181 PetscMPIInt size; 4182 4183 PetscFunctionBegin; 4184 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4185 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4186 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4187 if (size > 1) { 4188 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4189 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4190 } else { 4191 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4192 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4193 } 4194 PetscFunctionReturn(0); 4195 } 4196 4197 #undef __FUNCT__ 4198 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4199 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4200 { 4201 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4202 4203 PetscFunctionBegin; 4204 *Ad = a->A; 4205 *Ao = a->B; 4206 *colmap = a->garray; 4207 PetscFunctionReturn(0); 4208 } 4209 4210 #undef __FUNCT__ 4211 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4212 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4213 { 4214 PetscErrorCode ierr; 4215 PetscInt i; 4216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4217 4218 PetscFunctionBegin; 4219 if (coloring->ctype == IS_COLORING_GLOBAL) { 4220 ISColoringValue *allcolors,*colors; 4221 ISColoring ocoloring; 4222 4223 /* set coloring for diagonal portion */ 4224 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4225 4226 /* set coloring for off-diagonal portion */ 4227 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4228 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4229 for (i=0; i<a->B->cmap->n; i++) { 4230 colors[i] = allcolors[a->garray[i]]; 4231 } 4232 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4233 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4234 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4235 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4236 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4237 ISColoringValue *colors; 4238 PetscInt *larray; 4239 ISColoring ocoloring; 4240 4241 /* set coloring for diagonal portion */ 4242 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4243 for (i=0; i<a->A->cmap->n; i++) { 4244 larray[i] = i + A->cmap->rstart; 4245 } 4246 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4247 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4248 for (i=0; i<a->A->cmap->n; i++) { 4249 colors[i] = coloring->colors[larray[i]]; 4250 } 4251 ierr = PetscFree(larray);CHKERRQ(ierr); 4252 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4253 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4254 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4255 4256 /* set coloring for off-diagonal portion */ 4257 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4258 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4259 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4260 for (i=0; i<a->B->cmap->n; i++) { 4261 colors[i] = coloring->colors[larray[i]]; 4262 } 4263 ierr = PetscFree(larray);CHKERRQ(ierr); 4264 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4265 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4266 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4267 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4268 4269 PetscFunctionReturn(0); 4270 } 4271 4272 #if defined(PETSC_HAVE_ADIC) 4273 #undef __FUNCT__ 4274 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4275 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4276 { 4277 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4278 PetscErrorCode ierr; 4279 4280 PetscFunctionBegin; 4281 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4282 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4283 PetscFunctionReturn(0); 4284 } 4285 #endif 4286 4287 #undef __FUNCT__ 4288 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4289 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4290 { 4291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4292 PetscErrorCode ierr; 4293 4294 PetscFunctionBegin; 4295 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4296 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4297 PetscFunctionReturn(0); 4298 } 4299 4300 #undef __FUNCT__ 4301 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4302 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4303 { 4304 PetscErrorCode ierr; 4305 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4306 PetscInt *indx; 4307 4308 PetscFunctionBegin; 4309 /* This routine will ONLY return MPIAIJ type matrix */ 4310 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4311 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4312 if (n == PETSC_DECIDE){ 4313 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4314 } 4315 /* Check sum(n) = N */ 4316 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4317 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4318 4319 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4320 rstart -= m; 4321 4322 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4323 for (i=0;i<m;i++) { 4324 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4325 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4326 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4327 } 4328 4329 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4330 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4331 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4332 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4333 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4334 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4335 PetscFunctionReturn(0); 4336 } 4337 4338 #undef __FUNCT__ 4339 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4340 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4341 { 4342 PetscErrorCode ierr; 4343 PetscInt m,N,i,rstart,nnz,Ii; 4344 PetscInt *indx; 4345 PetscScalar *values; 4346 4347 PetscFunctionBegin; 4348 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4349 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4350 for (i=0;i<m;i++) { 4351 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4352 Ii = i + rstart; 4353 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4354 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4355 } 4356 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4358 PetscFunctionReturn(0); 4359 } 4360 4361 #undef __FUNCT__ 4362 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4363 /*@ 4364 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4365 matrices from each processor 4366 4367 Collective on MPI_Comm 4368 4369 Input Parameters: 4370 + comm - the communicators the parallel matrix will live on 4371 . inmat - the input sequential matrices 4372 . n - number of local columns (or PETSC_DECIDE) 4373 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4374 4375 Output Parameter: 4376 . outmat - the parallel matrix generated 4377 4378 Level: advanced 4379 4380 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4381 4382 @*/ 4383 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4384 { 4385 PetscErrorCode ierr; 4386 4387 PetscFunctionBegin; 4388 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4389 if (scall == MAT_INITIAL_MATRIX){ 4390 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4391 } 4392 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4393 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4394 PetscFunctionReturn(0); 4395 } 4396 4397 #undef __FUNCT__ 4398 #define __FUNCT__ "MatFileSplit" 4399 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4400 { 4401 PetscErrorCode ierr; 4402 PetscMPIInt rank; 4403 PetscInt m,N,i,rstart,nnz; 4404 size_t len; 4405 const PetscInt *indx; 4406 PetscViewer out; 4407 char *name; 4408 Mat B; 4409 const PetscScalar *values; 4410 4411 PetscFunctionBegin; 4412 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4413 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4414 /* Should this be the type of the diagonal block of A? */ 4415 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4416 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4417 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4418 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4419 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4420 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4421 for (i=0;i<m;i++) { 4422 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4423 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4424 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4425 } 4426 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4427 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4428 4429 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4430 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4431 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4432 sprintf(name,"%s.%d",outfile,rank); 4433 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4434 ierr = PetscFree(name); 4435 ierr = MatView(B,out);CHKERRQ(ierr); 4436 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4437 ierr = MatDestroy(&B);CHKERRQ(ierr); 4438 PetscFunctionReturn(0); 4439 } 4440 4441 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4442 #undef __FUNCT__ 4443 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4444 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4445 { 4446 PetscErrorCode ierr; 4447 Mat_Merge_SeqsToMPI *merge; 4448 PetscContainer container; 4449 4450 PetscFunctionBegin; 4451 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4452 if (container) { 4453 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4454 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4455 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4456 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4457 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4458 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4459 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4460 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4466 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4467 ierr = PetscFree(merge);CHKERRQ(ierr); 4468 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4469 } 4470 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4471 PetscFunctionReturn(0); 4472 } 4473 4474 #include <../src/mat/utils/freespace.h> 4475 #include <petscbt.h> 4476 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4479 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4480 { 4481 PetscErrorCode ierr; 4482 MPI_Comm comm=((PetscObject)mpimat)->comm; 4483 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4484 PetscMPIInt size,rank,taga,*len_s; 4485 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4486 PetscInt proc,m; 4487 PetscInt **buf_ri,**buf_rj; 4488 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4489 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4490 MPI_Request *s_waits,*r_waits; 4491 MPI_Status *status; 4492 MatScalar *aa=a->a; 4493 MatScalar **abuf_r,*ba_i; 4494 Mat_Merge_SeqsToMPI *merge; 4495 PetscContainer container; 4496 4497 PetscFunctionBegin; 4498 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4499 4500 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4501 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4502 4503 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4504 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4505 4506 bi = merge->bi; 4507 bj = merge->bj; 4508 buf_ri = merge->buf_ri; 4509 buf_rj = merge->buf_rj; 4510 4511 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4512 owners = merge->rowmap->range; 4513 len_s = merge->len_s; 4514 4515 /* send and recv matrix values */ 4516 /*-----------------------------*/ 4517 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4518 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4519 4520 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4521 for (proc=0,k=0; proc<size; proc++){ 4522 if (!len_s[proc]) continue; 4523 i = owners[proc]; 4524 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4525 k++; 4526 } 4527 4528 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4529 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4530 ierr = PetscFree(status);CHKERRQ(ierr); 4531 4532 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4533 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4534 4535 /* insert mat values of mpimat */ 4536 /*----------------------------*/ 4537 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4538 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4539 4540 for (k=0; k<merge->nrecv; k++){ 4541 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4542 nrows = *(buf_ri_k[k]); 4543 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4544 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4545 } 4546 4547 /* set values of ba */ 4548 m = merge->rowmap->n; 4549 for (i=0; i<m; i++) { 4550 arow = owners[rank] + i; 4551 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4552 bnzi = bi[i+1] - bi[i]; 4553 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4554 4555 /* add local non-zero vals of this proc's seqmat into ba */ 4556 anzi = ai[arow+1] - ai[arow]; 4557 aj = a->j + ai[arow]; 4558 aa = a->a + ai[arow]; 4559 nextaj = 0; 4560 for (j=0; nextaj<anzi; j++){ 4561 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4562 ba_i[j] += aa[nextaj++]; 4563 } 4564 } 4565 4566 /* add received vals into ba */ 4567 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4568 /* i-th row */ 4569 if (i == *nextrow[k]) { 4570 anzi = *(nextai[k]+1) - *nextai[k]; 4571 aj = buf_rj[k] + *(nextai[k]); 4572 aa = abuf_r[k] + *(nextai[k]); 4573 nextaj = 0; 4574 for (j=0; nextaj<anzi; j++){ 4575 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4576 ba_i[j] += aa[nextaj++]; 4577 } 4578 } 4579 nextrow[k]++; nextai[k]++; 4580 } 4581 } 4582 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4583 } 4584 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4585 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4586 4587 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4588 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4589 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4590 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4591 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4592 PetscFunctionReturn(0); 4593 } 4594 4595 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4596 4597 #undef __FUNCT__ 4598 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4599 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4600 { 4601 PetscErrorCode ierr; 4602 Mat B_mpi; 4603 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4604 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4605 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4606 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4607 PetscInt len,proc,*dnz,*onz,bs,cbs; 4608 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4609 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4610 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4611 MPI_Status *status; 4612 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4613 PetscBT lnkbt; 4614 Mat_Merge_SeqsToMPI *merge; 4615 PetscContainer container; 4616 4617 PetscFunctionBegin; 4618 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4619 4620 /* make sure it is a PETSc comm */ 4621 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4622 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4623 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4624 4625 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4626 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4627 4628 /* determine row ownership */ 4629 /*---------------------------------------------------------*/ 4630 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4631 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4632 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4633 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4634 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4635 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4636 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4637 4638 m = merge->rowmap->n; 4639 M = merge->rowmap->N; 4640 owners = merge->rowmap->range; 4641 4642 /* determine the number of messages to send, their lengths */ 4643 /*---------------------------------------------------------*/ 4644 len_s = merge->len_s; 4645 4646 len = 0; /* length of buf_si[] */ 4647 merge->nsend = 0; 4648 for (proc=0; proc<size; proc++){ 4649 len_si[proc] = 0; 4650 if (proc == rank){ 4651 len_s[proc] = 0; 4652 } else { 4653 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4654 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4655 } 4656 if (len_s[proc]) { 4657 merge->nsend++; 4658 nrows = 0; 4659 for (i=owners[proc]; i<owners[proc+1]; i++){ 4660 if (ai[i+1] > ai[i]) nrows++; 4661 } 4662 len_si[proc] = 2*(nrows+1); 4663 len += len_si[proc]; 4664 } 4665 } 4666 4667 /* determine the number and length of messages to receive for ij-structure */ 4668 /*-------------------------------------------------------------------------*/ 4669 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4670 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4671 4672 /* post the Irecv of j-structure */ 4673 /*-------------------------------*/ 4674 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4675 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4676 4677 /* post the Isend of j-structure */ 4678 /*--------------------------------*/ 4679 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4680 4681 for (proc=0, k=0; proc<size; proc++){ 4682 if (!len_s[proc]) continue; 4683 i = owners[proc]; 4684 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4685 k++; 4686 } 4687 4688 /* receives and sends of j-structure are complete */ 4689 /*------------------------------------------------*/ 4690 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4691 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4692 4693 /* send and recv i-structure */ 4694 /*---------------------------*/ 4695 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4696 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4697 4698 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4699 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4700 for (proc=0,k=0; proc<size; proc++){ 4701 if (!len_s[proc]) continue; 4702 /* form outgoing message for i-structure: 4703 buf_si[0]: nrows to be sent 4704 [1:nrows]: row index (global) 4705 [nrows+1:2*nrows+1]: i-structure index 4706 */ 4707 /*-------------------------------------------*/ 4708 nrows = len_si[proc]/2 - 1; 4709 buf_si_i = buf_si + nrows+1; 4710 buf_si[0] = nrows; 4711 buf_si_i[0] = 0; 4712 nrows = 0; 4713 for (i=owners[proc]; i<owners[proc+1]; i++){ 4714 anzi = ai[i+1] - ai[i]; 4715 if (anzi) { 4716 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4717 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4718 nrows++; 4719 } 4720 } 4721 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4722 k++; 4723 buf_si += len_si[proc]; 4724 } 4725 4726 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4727 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4728 4729 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4730 for (i=0; i<merge->nrecv; i++){ 4731 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4732 } 4733 4734 ierr = PetscFree(len_si);CHKERRQ(ierr); 4735 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4736 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4737 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4738 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4739 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4740 ierr = PetscFree(status);CHKERRQ(ierr); 4741 4742 /* compute a local seq matrix in each processor */ 4743 /*----------------------------------------------*/ 4744 /* allocate bi array and free space for accumulating nonzero column info */ 4745 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4746 bi[0] = 0; 4747 4748 /* create and initialize a linked list */ 4749 nlnk = N+1; 4750 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4751 4752 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4753 len = 0; 4754 len = ai[owners[rank+1]] - ai[owners[rank]]; 4755 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4756 current_space = free_space; 4757 4758 /* determine symbolic info for each local row */ 4759 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4760 4761 for (k=0; k<merge->nrecv; k++){ 4762 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4763 nrows = *buf_ri_k[k]; 4764 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4765 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4766 } 4767 4768 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4769 len = 0; 4770 for (i=0;i<m;i++) { 4771 bnzi = 0; 4772 /* add local non-zero cols of this proc's seqmat into lnk */ 4773 arow = owners[rank] + i; 4774 anzi = ai[arow+1] - ai[arow]; 4775 aj = a->j + ai[arow]; 4776 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4777 bnzi += nlnk; 4778 /* add received col data into lnk */ 4779 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4780 if (i == *nextrow[k]) { /* i-th row */ 4781 anzi = *(nextai[k]+1) - *nextai[k]; 4782 aj = buf_rj[k] + *nextai[k]; 4783 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4784 bnzi += nlnk; 4785 nextrow[k]++; nextai[k]++; 4786 } 4787 } 4788 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4789 4790 /* if free space is not available, make more free space */ 4791 if (current_space->local_remaining<bnzi) { 4792 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4793 nspacedouble++; 4794 } 4795 /* copy data into free space, then initialize lnk */ 4796 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4797 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4798 4799 current_space->array += bnzi; 4800 current_space->local_used += bnzi; 4801 current_space->local_remaining -= bnzi; 4802 4803 bi[i+1] = bi[i] + bnzi; 4804 } 4805 4806 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4807 4808 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4809 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4810 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4811 4812 /* create symbolic parallel matrix B_mpi */ 4813 /*---------------------------------------*/ 4814 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4815 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4816 if (n==PETSC_DECIDE) { 4817 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4818 } else { 4819 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4820 } 4821 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4822 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4823 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4824 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4825 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4826 4827 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4828 B_mpi->assembled = PETSC_FALSE; 4829 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4830 merge->bi = bi; 4831 merge->bj = bj; 4832 merge->buf_ri = buf_ri; 4833 merge->buf_rj = buf_rj; 4834 merge->coi = PETSC_NULL; 4835 merge->coj = PETSC_NULL; 4836 merge->owners_co = PETSC_NULL; 4837 4838 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4839 4840 /* attach the supporting struct to B_mpi for reuse */ 4841 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4842 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4843 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4844 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4845 *mpimat = B_mpi; 4846 4847 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4848 PetscFunctionReturn(0); 4849 } 4850 4851 #undef __FUNCT__ 4852 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4853 /*@C 4854 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4855 matrices from each processor 4856 4857 Collective on MPI_Comm 4858 4859 Input Parameters: 4860 + comm - the communicators the parallel matrix will live on 4861 . seqmat - the input sequential matrices 4862 . m - number of local rows (or PETSC_DECIDE) 4863 . n - number of local columns (or PETSC_DECIDE) 4864 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4865 4866 Output Parameter: 4867 . mpimat - the parallel matrix generated 4868 4869 Level: advanced 4870 4871 Notes: 4872 The dimensions of the sequential matrix in each processor MUST be the same. 4873 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4874 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4875 @*/ 4876 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4877 { 4878 PetscErrorCode ierr; 4879 PetscMPIInt size; 4880 4881 PetscFunctionBegin; 4882 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4883 if (size == 1){ 4884 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4885 if (scall == MAT_INITIAL_MATRIX){ 4886 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4887 } else { 4888 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4889 } 4890 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4891 PetscFunctionReturn(0); 4892 } 4893 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4894 if (scall == MAT_INITIAL_MATRIX){ 4895 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4896 } 4897 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4898 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4899 PetscFunctionReturn(0); 4900 } 4901 4902 #undef __FUNCT__ 4903 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4904 /*@ 4905 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4906 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4907 with MatGetSize() 4908 4909 Not Collective 4910 4911 Input Parameters: 4912 + A - the matrix 4913 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4914 4915 Output Parameter: 4916 . A_loc - the local sequential matrix generated 4917 4918 Level: developer 4919 4920 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4921 4922 @*/ 4923 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4924 { 4925 PetscErrorCode ierr; 4926 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4927 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4928 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4929 MatScalar *aa=a->a,*ba=b->a,*cam; 4930 PetscScalar *ca; 4931 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4932 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4933 PetscBool match; 4934 4935 PetscFunctionBegin; 4936 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4937 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4938 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4939 if (scall == MAT_INITIAL_MATRIX){ 4940 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4941 ci[0] = 0; 4942 for (i=0; i<am; i++){ 4943 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4944 } 4945 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4946 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4947 k = 0; 4948 for (i=0; i<am; i++) { 4949 ncols_o = bi[i+1] - bi[i]; 4950 ncols_d = ai[i+1] - ai[i]; 4951 /* off-diagonal portion of A */ 4952 for (jo=0; jo<ncols_o; jo++) { 4953 col = cmap[*bj]; 4954 if (col >= cstart) break; 4955 cj[k] = col; bj++; 4956 ca[k++] = *ba++; 4957 } 4958 /* diagonal portion of A */ 4959 for (j=0; j<ncols_d; j++) { 4960 cj[k] = cstart + *aj++; 4961 ca[k++] = *aa++; 4962 } 4963 /* off-diagonal portion of A */ 4964 for (j=jo; j<ncols_o; j++) { 4965 cj[k] = cmap[*bj++]; 4966 ca[k++] = *ba++; 4967 } 4968 } 4969 /* put together the new matrix */ 4970 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4971 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4972 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4973 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4974 mat->free_a = PETSC_TRUE; 4975 mat->free_ij = PETSC_TRUE; 4976 mat->nonew = 0; 4977 } else if (scall == MAT_REUSE_MATRIX){ 4978 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4979 ci = mat->i; cj = mat->j; cam = mat->a; 4980 for (i=0; i<am; i++) { 4981 /* off-diagonal portion of A */ 4982 ncols_o = bi[i+1] - bi[i]; 4983 for (jo=0; jo<ncols_o; jo++) { 4984 col = cmap[*bj]; 4985 if (col >= cstart) break; 4986 *cam++ = *ba++; bj++; 4987 } 4988 /* diagonal portion of A */ 4989 ncols_d = ai[i+1] - ai[i]; 4990 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4991 /* off-diagonal portion of A */ 4992 for (j=jo; j<ncols_o; j++) { 4993 *cam++ = *ba++; bj++; 4994 } 4995 } 4996 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4997 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4998 PetscFunctionReturn(0); 4999 } 5000 5001 #undef __FUNCT__ 5002 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5003 /*@C 5004 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5005 5006 Not Collective 5007 5008 Input Parameters: 5009 + A - the matrix 5010 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5011 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5012 5013 Output Parameter: 5014 . A_loc - the local sequential matrix generated 5015 5016 Level: developer 5017 5018 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5019 5020 @*/ 5021 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5022 { 5023 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5024 PetscErrorCode ierr; 5025 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5026 IS isrowa,iscola; 5027 Mat *aloc; 5028 PetscBool match; 5029 5030 PetscFunctionBegin; 5031 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5032 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5033 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5034 if (!row){ 5035 start = A->rmap->rstart; end = A->rmap->rend; 5036 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5037 } else { 5038 isrowa = *row; 5039 } 5040 if (!col){ 5041 start = A->cmap->rstart; 5042 cmap = a->garray; 5043 nzA = a->A->cmap->n; 5044 nzB = a->B->cmap->n; 5045 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5046 ncols = 0; 5047 for (i=0; i<nzB; i++) { 5048 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5049 else break; 5050 } 5051 imark = i; 5052 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5053 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5054 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5055 } else { 5056 iscola = *col; 5057 } 5058 if (scall != MAT_INITIAL_MATRIX){ 5059 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5060 aloc[0] = *A_loc; 5061 } 5062 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5063 *A_loc = aloc[0]; 5064 ierr = PetscFree(aloc);CHKERRQ(ierr); 5065 if (!row){ 5066 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5067 } 5068 if (!col){ 5069 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5070 } 5071 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5072 PetscFunctionReturn(0); 5073 } 5074 5075 #undef __FUNCT__ 5076 #define __FUNCT__ "MatGetBrowsOfAcols" 5077 /*@C 5078 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5079 5080 Collective on Mat 5081 5082 Input Parameters: 5083 + A,B - the matrices in mpiaij format 5084 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5085 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5086 5087 Output Parameter: 5088 + rowb, colb - index sets of rows and columns of B to extract 5089 - B_seq - the sequential matrix generated 5090 5091 Level: developer 5092 5093 @*/ 5094 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5095 { 5096 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5097 PetscErrorCode ierr; 5098 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5099 IS isrowb,iscolb; 5100 Mat *bseq=PETSC_NULL; 5101 5102 PetscFunctionBegin; 5103 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5104 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5105 } 5106 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5107 5108 if (scall == MAT_INITIAL_MATRIX){ 5109 start = A->cmap->rstart; 5110 cmap = a->garray; 5111 nzA = a->A->cmap->n; 5112 nzB = a->B->cmap->n; 5113 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5114 ncols = 0; 5115 for (i=0; i<nzB; i++) { /* row < local row index */ 5116 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5117 else break; 5118 } 5119 imark = i; 5120 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5121 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5122 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5123 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5124 } else { 5125 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5126 isrowb = *rowb; iscolb = *colb; 5127 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5128 bseq[0] = *B_seq; 5129 } 5130 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5131 *B_seq = bseq[0]; 5132 ierr = PetscFree(bseq);CHKERRQ(ierr); 5133 if (!rowb){ 5134 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5135 } else { 5136 *rowb = isrowb; 5137 } 5138 if (!colb){ 5139 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5140 } else { 5141 *colb = iscolb; 5142 } 5143 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5144 PetscFunctionReturn(0); 5145 } 5146 5147 #undef __FUNCT__ 5148 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5149 /* 5150 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5151 of the OFF-DIAGONAL portion of local A 5152 5153 Collective on Mat 5154 5155 Input Parameters: 5156 + A,B - the matrices in mpiaij format 5157 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5158 5159 Output Parameter: 5160 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5161 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5162 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5163 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5164 5165 Level: developer 5166 5167 */ 5168 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5169 { 5170 VecScatter_MPI_General *gen_to,*gen_from; 5171 PetscErrorCode ierr; 5172 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5173 Mat_SeqAIJ *b_oth; 5174 VecScatter ctx=a->Mvctx; 5175 MPI_Comm comm=((PetscObject)ctx)->comm; 5176 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5177 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5178 PetscScalar *rvalues,*svalues; 5179 MatScalar *b_otha,*bufa,*bufA; 5180 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5181 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5182 MPI_Status *sstatus,rstatus; 5183 PetscMPIInt jj; 5184 PetscInt *cols,sbs,rbs; 5185 PetscScalar *vals; 5186 5187 PetscFunctionBegin; 5188 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5189 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5190 } 5191 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5192 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5193 5194 gen_to = (VecScatter_MPI_General*)ctx->todata; 5195 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5196 rvalues = gen_from->values; /* holds the length of receiving row */ 5197 svalues = gen_to->values; /* holds the length of sending row */ 5198 nrecvs = gen_from->n; 5199 nsends = gen_to->n; 5200 5201 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5202 srow = gen_to->indices; /* local row index to be sent */ 5203 sstarts = gen_to->starts; 5204 sprocs = gen_to->procs; 5205 sstatus = gen_to->sstatus; 5206 sbs = gen_to->bs; 5207 rstarts = gen_from->starts; 5208 rprocs = gen_from->procs; 5209 rbs = gen_from->bs; 5210 5211 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5212 if (scall == MAT_INITIAL_MATRIX){ 5213 /* i-array */ 5214 /*---------*/ 5215 /* post receives */ 5216 for (i=0; i<nrecvs; i++){ 5217 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5218 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5219 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5220 } 5221 5222 /* pack the outgoing message */ 5223 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5224 sstartsj[0] = 0; rstartsj[0] = 0; 5225 len = 0; /* total length of j or a array to be sent */ 5226 k = 0; 5227 for (i=0; i<nsends; i++){ 5228 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5229 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5230 for (j=0; j<nrows; j++) { 5231 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5232 for (l=0; l<sbs; l++){ 5233 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5234 rowlen[j*sbs+l] = ncols; 5235 len += ncols; 5236 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5237 } 5238 k++; 5239 } 5240 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5241 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5242 } 5243 /* recvs and sends of i-array are completed */ 5244 i = nrecvs; 5245 while (i--) { 5246 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5247 } 5248 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5249 5250 /* allocate buffers for sending j and a arrays */ 5251 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5252 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5253 5254 /* create i-array of B_oth */ 5255 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5256 b_othi[0] = 0; 5257 len = 0; /* total length of j or a array to be received */ 5258 k = 0; 5259 for (i=0; i<nrecvs; i++){ 5260 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5261 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5262 for (j=0; j<nrows; j++) { 5263 b_othi[k+1] = b_othi[k] + rowlen[j]; 5264 len += rowlen[j]; k++; 5265 } 5266 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5267 } 5268 5269 /* allocate space for j and a arrrays of B_oth */ 5270 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5271 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5272 5273 /* j-array */ 5274 /*---------*/ 5275 /* post receives of j-array */ 5276 for (i=0; i<nrecvs; i++){ 5277 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5278 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5279 } 5280 5281 /* pack the outgoing message j-array */ 5282 k = 0; 5283 for (i=0; i<nsends; i++){ 5284 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5285 bufJ = bufj+sstartsj[i]; 5286 for (j=0; j<nrows; j++) { 5287 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5288 for (ll=0; ll<sbs; ll++){ 5289 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5290 for (l=0; l<ncols; l++){ 5291 *bufJ++ = cols[l]; 5292 } 5293 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5294 } 5295 } 5296 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5297 } 5298 5299 /* recvs and sends of j-array are completed */ 5300 i = nrecvs; 5301 while (i--) { 5302 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5303 } 5304 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5305 } else if (scall == MAT_REUSE_MATRIX){ 5306 sstartsj = *startsj_s; 5307 rstartsj = *startsj_r; 5308 bufa = *bufa_ptr; 5309 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5310 b_otha = b_oth->a; 5311 } else { 5312 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5313 } 5314 5315 /* a-array */ 5316 /*---------*/ 5317 /* post receives of a-array */ 5318 for (i=0; i<nrecvs; i++){ 5319 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5320 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5321 } 5322 5323 /* pack the outgoing message a-array */ 5324 k = 0; 5325 for (i=0; i<nsends; i++){ 5326 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5327 bufA = bufa+sstartsj[i]; 5328 for (j=0; j<nrows; j++) { 5329 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5330 for (ll=0; ll<sbs; ll++){ 5331 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5332 for (l=0; l<ncols; l++){ 5333 *bufA++ = vals[l]; 5334 } 5335 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5336 } 5337 } 5338 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5339 } 5340 /* recvs and sends of a-array are completed */ 5341 i = nrecvs; 5342 while (i--) { 5343 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5344 } 5345 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5346 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5347 5348 if (scall == MAT_INITIAL_MATRIX){ 5349 /* put together the new matrix */ 5350 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5351 5352 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5353 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5354 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5355 b_oth->free_a = PETSC_TRUE; 5356 b_oth->free_ij = PETSC_TRUE; 5357 b_oth->nonew = 0; 5358 5359 ierr = PetscFree(bufj);CHKERRQ(ierr); 5360 if (!startsj_s || !bufa_ptr){ 5361 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5362 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5363 } else { 5364 *startsj_s = sstartsj; 5365 *startsj_r = rstartsj; 5366 *bufa_ptr = bufa; 5367 } 5368 } 5369 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5370 PetscFunctionReturn(0); 5371 } 5372 5373 #undef __FUNCT__ 5374 #define __FUNCT__ "MatGetCommunicationStructs" 5375 /*@C 5376 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5377 5378 Not Collective 5379 5380 Input Parameters: 5381 . A - The matrix in mpiaij format 5382 5383 Output Parameter: 5384 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5385 . colmap - A map from global column index to local index into lvec 5386 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5387 5388 Level: developer 5389 5390 @*/ 5391 #if defined (PETSC_USE_CTABLE) 5392 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5393 #else 5394 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5395 #endif 5396 { 5397 Mat_MPIAIJ *a; 5398 5399 PetscFunctionBegin; 5400 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5401 PetscValidPointer(lvec, 2); 5402 PetscValidPointer(colmap, 3); 5403 PetscValidPointer(multScatter, 4); 5404 a = (Mat_MPIAIJ *) A->data; 5405 if (lvec) *lvec = a->lvec; 5406 if (colmap) *colmap = a->colmap; 5407 if (multScatter) *multScatter = a->Mvctx; 5408 PetscFunctionReturn(0); 5409 } 5410 5411 EXTERN_C_BEGIN 5412 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5413 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5414 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5415 EXTERN_C_END 5416 5417 #undef __FUNCT__ 5418 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5419 /* 5420 Computes (B'*A')' since computing B*A directly is untenable 5421 5422 n p p 5423 ( ) ( ) ( ) 5424 m ( A ) * n ( B ) = m ( C ) 5425 ( ) ( ) ( ) 5426 5427 */ 5428 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5429 { 5430 PetscErrorCode ierr; 5431 Mat At,Bt,Ct; 5432 5433 PetscFunctionBegin; 5434 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5435 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5436 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5437 ierr = MatDestroy(&At);CHKERRQ(ierr); 5438 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5439 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5440 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5441 PetscFunctionReturn(0); 5442 } 5443 5444 #undef __FUNCT__ 5445 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5446 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5447 { 5448 PetscErrorCode ierr; 5449 PetscInt m=A->rmap->n,n=B->cmap->n; 5450 Mat Cmat; 5451 5452 PetscFunctionBegin; 5453 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5454 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5455 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5456 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5457 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5458 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5459 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5460 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5461 *C = Cmat; 5462 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5463 PetscFunctionReturn(0); 5464 } 5465 5466 /* ----------------------------------------------------------------*/ 5467 #undef __FUNCT__ 5468 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5469 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5470 { 5471 PetscErrorCode ierr; 5472 5473 PetscFunctionBegin; 5474 if (scall == MAT_INITIAL_MATRIX){ 5475 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5476 } 5477 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5478 PetscFunctionReturn(0); 5479 } 5480 5481 EXTERN_C_BEGIN 5482 #if defined(PETSC_HAVE_MUMPS) 5483 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5484 #endif 5485 #if defined(PETSC_HAVE_PASTIX) 5486 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5487 #endif 5488 #if defined(PETSC_HAVE_SUPERLU_DIST) 5489 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5490 #endif 5491 #if defined(PETSC_HAVE_SPOOLES) 5492 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5493 #endif 5494 EXTERN_C_END 5495 5496 /*MC 5497 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5498 5499 Options Database Keys: 5500 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5501 5502 Level: beginner 5503 5504 .seealso: MatCreateAIJ() 5505 M*/ 5506 5507 EXTERN_C_BEGIN 5508 #undef __FUNCT__ 5509 #define __FUNCT__ "MatCreate_MPIAIJ" 5510 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5511 { 5512 Mat_MPIAIJ *b; 5513 PetscErrorCode ierr; 5514 PetscMPIInt size; 5515 5516 PetscFunctionBegin; 5517 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5518 5519 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5520 B->data = (void*)b; 5521 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5522 B->assembled = PETSC_FALSE; 5523 5524 B->insertmode = NOT_SET_VALUES; 5525 b->size = size; 5526 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5527 5528 /* build cache for off array entries formed */ 5529 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5530 b->donotstash = PETSC_FALSE; 5531 b->colmap = 0; 5532 b->garray = 0; 5533 b->roworiented = PETSC_TRUE; 5534 5535 /* stuff used for matrix vector multiply */ 5536 b->lvec = PETSC_NULL; 5537 b->Mvctx = PETSC_NULL; 5538 5539 /* stuff for MatGetRow() */ 5540 b->rowindices = 0; 5541 b->rowvalues = 0; 5542 b->getrowactive = PETSC_FALSE; 5543 5544 #if defined(PETSC_HAVE_SPOOLES) 5545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5546 "MatGetFactor_mpiaij_spooles", 5547 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5548 #endif 5549 #if defined(PETSC_HAVE_MUMPS) 5550 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5551 "MatGetFactor_aij_mumps", 5552 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5553 #endif 5554 #if defined(PETSC_HAVE_PASTIX) 5555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5556 "MatGetFactor_mpiaij_pastix", 5557 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5558 #endif 5559 #if defined(PETSC_HAVE_SUPERLU_DIST) 5560 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5561 "MatGetFactor_mpiaij_superlu_dist", 5562 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5563 #endif 5564 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5565 "MatStoreValues_MPIAIJ", 5566 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5568 "MatRetrieveValues_MPIAIJ", 5569 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5571 "MatGetDiagonalBlock_MPIAIJ", 5572 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5574 "MatIsTranspose_MPIAIJ", 5575 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5577 "MatMPIAIJSetPreallocation_MPIAIJ", 5578 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5580 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5581 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5582 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5583 "MatDiagonalScaleLocal_MPIAIJ", 5584 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5586 "MatConvert_MPIAIJ_MPIAIJPERM", 5587 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5589 "MatConvert_MPIAIJ_MPIAIJCRL", 5590 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5592 "MatConvert_MPIAIJ_MPISBAIJ", 5593 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5595 "MatMatMult_MPIDense_MPIAIJ", 5596 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5598 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5599 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5601 "MatMatMultNumeric_MPIDense_MPIAIJ", 5602 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5604 PetscFunctionReturn(0); 5605 } 5606 EXTERN_C_END 5607 5608 #undef __FUNCT__ 5609 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5610 /*@ 5611 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5612 and "off-diagonal" part of the matrix in CSR format. 5613 5614 Collective on MPI_Comm 5615 5616 Input Parameters: 5617 + comm - MPI communicator 5618 . m - number of local rows (Cannot be PETSC_DECIDE) 5619 . n - This value should be the same as the local size used in creating the 5620 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5621 calculated if N is given) For square matrices n is almost always m. 5622 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5623 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5624 . i - row indices for "diagonal" portion of matrix 5625 . j - column indices 5626 . a - matrix values 5627 . oi - row indices for "off-diagonal" portion of matrix 5628 . oj - column indices 5629 - oa - matrix values 5630 5631 Output Parameter: 5632 . mat - the matrix 5633 5634 Level: advanced 5635 5636 Notes: 5637 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5638 must free the arrays once the matrix has been destroyed and not before. 5639 5640 The i and j indices are 0 based 5641 5642 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5643 5644 This sets local rows and cannot be used to set off-processor values. 5645 5646 You cannot later use MatSetValues() to change values in this matrix. 5647 5648 .keywords: matrix, aij, compressed row, sparse, parallel 5649 5650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5651 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5652 @*/ 5653 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5654 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5655 { 5656 PetscErrorCode ierr; 5657 Mat_MPIAIJ *maij; 5658 5659 PetscFunctionBegin; 5660 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5661 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5662 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5663 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5664 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5665 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5666 maij = (Mat_MPIAIJ*) (*mat)->data; 5667 maij->donotstash = PETSC_TRUE; 5668 (*mat)->preallocated = PETSC_TRUE; 5669 5670 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5671 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5672 5673 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5674 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5675 5676 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5677 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5678 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5679 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5680 5681 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 PetscFunctionReturn(0); 5684 } 5685 5686 /* 5687 Special version for direct calls from Fortran 5688 */ 5689 #include <petsc-private/fortranimpl.h> 5690 5691 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5692 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5693 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5694 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5695 #endif 5696 5697 /* Change these macros so can be used in void function */ 5698 #undef CHKERRQ 5699 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5700 #undef SETERRQ2 5701 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5702 #undef SETERRQ3 5703 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5704 #undef SETERRQ 5705 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5706 5707 EXTERN_C_BEGIN 5708 #undef __FUNCT__ 5709 #define __FUNCT__ "matsetvaluesmpiaij_" 5710 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5711 { 5712 Mat mat = *mmat; 5713 PetscInt m = *mm, n = *mn; 5714 InsertMode addv = *maddv; 5715 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5716 PetscScalar value; 5717 PetscErrorCode ierr; 5718 5719 MatCheckPreallocated(mat,1); 5720 if (mat->insertmode == NOT_SET_VALUES) { 5721 mat->insertmode = addv; 5722 } 5723 #if defined(PETSC_USE_DEBUG) 5724 else if (mat->insertmode != addv) { 5725 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5726 } 5727 #endif 5728 { 5729 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5730 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5731 PetscBool roworiented = aij->roworiented; 5732 5733 /* Some Variables required in the macro */ 5734 Mat A = aij->A; 5735 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5736 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5737 MatScalar *aa = a->a; 5738 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5739 Mat B = aij->B; 5740 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5741 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5742 MatScalar *ba = b->a; 5743 5744 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5745 PetscInt nonew = a->nonew; 5746 MatScalar *ap1,*ap2; 5747 5748 PetscFunctionBegin; 5749 for (i=0; i<m; i++) { 5750 if (im[i] < 0) continue; 5751 #if defined(PETSC_USE_DEBUG) 5752 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5753 #endif 5754 if (im[i] >= rstart && im[i] < rend) { 5755 row = im[i] - rstart; 5756 lastcol1 = -1; 5757 rp1 = aj + ai[row]; 5758 ap1 = aa + ai[row]; 5759 rmax1 = aimax[row]; 5760 nrow1 = ailen[row]; 5761 low1 = 0; 5762 high1 = nrow1; 5763 lastcol2 = -1; 5764 rp2 = bj + bi[row]; 5765 ap2 = ba + bi[row]; 5766 rmax2 = bimax[row]; 5767 nrow2 = bilen[row]; 5768 low2 = 0; 5769 high2 = nrow2; 5770 5771 for (j=0; j<n; j++) { 5772 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5773 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5774 if (in[j] >= cstart && in[j] < cend){ 5775 col = in[j] - cstart; 5776 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5777 } else if (in[j] < 0) continue; 5778 #if defined(PETSC_USE_DEBUG) 5779 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5780 #endif 5781 else { 5782 if (mat->was_assembled) { 5783 if (!aij->colmap) { 5784 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5785 } 5786 #if defined (PETSC_USE_CTABLE) 5787 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5788 col--; 5789 #else 5790 col = aij->colmap[in[j]] - 1; 5791 #endif 5792 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5793 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5794 col = in[j]; 5795 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5796 B = aij->B; 5797 b = (Mat_SeqAIJ*)B->data; 5798 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5799 rp2 = bj + bi[row]; 5800 ap2 = ba + bi[row]; 5801 rmax2 = bimax[row]; 5802 nrow2 = bilen[row]; 5803 low2 = 0; 5804 high2 = nrow2; 5805 bm = aij->B->rmap->n; 5806 ba = b->a; 5807 } 5808 } else col = in[j]; 5809 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5810 } 5811 } 5812 } else { 5813 if (!aij->donotstash) { 5814 if (roworiented) { 5815 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5816 } else { 5817 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5818 } 5819 } 5820 } 5821 }} 5822 PetscFunctionReturnVoid(); 5823 } 5824 EXTERN_C_END 5825 5826