1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 FILE *file; 1303 1304 PetscFunctionBegin; 1305 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1307 nz = A->nz + B->nz; 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) { 1319 rlen = PetscMax(rlen,range[i+1] - range[i]); 1320 } 1321 } else { 1322 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1323 rlen = mat->rmap->n; 1324 } 1325 1326 /* load up the local row counts */ 1327 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1328 for (i=0; i<mat->rmap->n; i++) { 1329 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1330 } 1331 1332 /* store the row lengths to the file */ 1333 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1334 if (!rank) { 1335 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1338 rlen = range[i+1] - range[i]; 1339 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1345 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1347 } 1348 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1349 1350 /* load up the local column indices */ 1351 nzmax = nz; /* )th processor needs space a largest processor needs */ 1352 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1354 cnt = 0; 1355 for (i=0; i<mat->rmap->n; i++) { 1356 for (j=B->i[i]; j<B->i[i+1]; j++) { 1357 if ( (col = garray[B->j[j]]) > cstart) break; 1358 column_indices[cnt++] = col; 1359 } 1360 for (k=A->i[i]; k<A->i[i+1]; k++) { 1361 column_indices[cnt++] = A->j[k] + cstart; 1362 } 1363 for (; j<B->i[i+1]; j++) { 1364 column_indices[cnt++] = garray[B->j[j]]; 1365 } 1366 } 1367 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1368 1369 /* store the column indices to the file */ 1370 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1371 if (!rank) { 1372 MPI_Status status; 1373 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 for (i=1; i<size; i++) { 1375 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1376 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1377 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1378 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1384 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1386 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1387 } 1388 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1389 1390 /* load up the local column values */ 1391 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1392 cnt = 0; 1393 for (i=0; i<mat->rmap->n; i++) { 1394 for (j=B->i[i]; j<B->i[i+1]; j++) { 1395 if ( garray[B->j[j]] > cstart) break; 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 for (k=A->i[i]; k<A->i[i+1]; k++) { 1399 column_values[cnt++] = A->a[k]; 1400 } 1401 for (; j<B->i[i+1]; j++) { 1402 column_values[cnt++] = B->a[j]; 1403 } 1404 } 1405 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1406 1407 /* store the column values to the file */ 1408 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1409 if (!rank) { 1410 MPI_Status status; 1411 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 for (i=1; i<size; i++) { 1413 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1414 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1415 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1416 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1422 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(column_values);CHKERRQ(ierr); 1427 1428 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1429 if (file) { 1430 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 PetscErrorCode ierr; 1442 PetscMPIInt rank = aij->rank,size = aij->size; 1443 PetscBool isdraw,iascii,isbinary; 1444 PetscViewer sviewer; 1445 PetscViewerFormat format; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1451 if (iascii) { 1452 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1453 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1454 MatInfo info; 1455 PetscBool inodes; 1456 1457 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1458 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 if (!inodes) { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1464 } else { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } 1468 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1472 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1475 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1478 PetscInt inodecount,inodelimit,*inodes; 1479 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1480 if (inodes) { 1481 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1482 } else { 1483 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1487 PetscFunctionReturn(0); 1488 } 1489 } else if (isbinary) { 1490 if (size == 1) { 1491 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1492 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } else if (isdraw) { 1498 PetscDraw draw; 1499 PetscBool isnull; 1500 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1501 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1502 } 1503 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 /* assemble the entire matrix onto first processor. */ 1509 Mat A; 1510 Mat_SeqAIJ *Aloc; 1511 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1512 MatScalar *a; 1513 1514 if (mat->rmap->N > 1024) { 1515 PetscBool flg = PETSC_FALSE; 1516 1517 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1518 if (!flg) { 1519 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1520 } 1521 } 1522 1523 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1528 } 1529 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1530 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1531 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1532 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqAIJ*)aij->A->data; 1537 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1543 } 1544 aj = Aloc->j; 1545 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1546 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqAIJ*)aij->B->data; 1549 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 row = mat->rmap->rstart; 1551 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1552 ct = cols; 1553 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1554 for (i=0; i<m; i++) { 1555 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1556 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1557 } 1558 ierr = PetscFree(ct);CHKERRQ(ierr); 1559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1561 /* 1562 Everyone has to call to draw the matrix since the graphics waits are 1563 synchronized across all processors that share the PetscDraw object 1564 */ 1565 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1568 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1569 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1570 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1571 } 1572 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 ierr = MatDestroy(&A);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatView_MPIAIJ" 1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1581 { 1582 PetscErrorCode ierr; 1583 PetscBool iascii,isdraw,issocket,isbinary; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1590 if (iascii || isdraw || isbinary || issocket) { 1591 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1610 } 1611 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 MPI_Comm comm; 1703 PetscInt first,local_rowsize,local_colsize; 1704 const PetscInt *rows; 1705 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1710 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1711 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1712 /* collect the global row permutation and invert it */ 1713 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1714 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1715 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1718 /* get the local target indices */ 1719 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1721 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1723 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1724 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1725 /* the column permutation is so much easier; 1726 make a local version of 'colp' and invert it */ 1727 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1728 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1729 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1731 /* now we just get the submatrix */ 1732 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1733 /* clean up */ 1734 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1735 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscReal isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1755 isend[3] += info->memory; isend[4] += info->mallocs; 1756 if (flag == MAT_LOCAL) { 1757 info->nz_used = isend[0]; 1758 info->nz_allocated = isend[1]; 1759 info->nz_unneeded = isend[2]; 1760 info->memory = isend[3]; 1761 info->mallocs = isend[4]; 1762 } else if (flag == MAT_GLOBAL_MAX) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } else if (flag == MAT_GLOBAL_SUM) { 1770 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } 1777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1778 info->fill_ratio_needed = 0; 1779 info->factor_mallocs = 0; 1780 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetOption_MPIAIJ" 1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1787 { 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 switch (op) { 1793 case MAT_NEW_NONZERO_LOCATIONS: 1794 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1795 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1796 case MAT_KEEP_NONZERO_PATTERN: 1797 case MAT_NEW_NONZERO_LOCATION_ERR: 1798 case MAT_USE_INODES: 1799 case MAT_IGNORE_ZERO_ENTRIES: 1800 MatCheckPreallocated(A,1); 1801 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1802 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1803 break; 1804 case MAT_ROW_ORIENTED: 1805 a->roworiented = flg; 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_NEW_DIAGONALS: 1810 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1811 break; 1812 case MAT_IGNORE_OFF_PROC_ENTRIES: 1813 a->donotstash = flg; 1814 break; 1815 case MAT_SPD: 1816 A->spd_set = PETSC_TRUE; 1817 A->spd = flg; 1818 if (flg) { 1819 A->symmetric = PETSC_TRUE; 1820 A->structurally_symmetric = PETSC_TRUE; 1821 A->symmetric_set = PETSC_TRUE; 1822 A->structurally_symmetric_set = PETSC_TRUE; 1823 } 1824 break; 1825 case MAT_SYMMETRIC: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 case MAT_STRUCTURALLY_SYMMETRIC: 1829 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1830 break; 1831 case MAT_HERMITIAN: 1832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1833 break; 1834 case MAT_SYMMETRY_ETERNAL: 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatGetRow_MPIAIJ" 1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1846 { 1847 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1848 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1849 PetscErrorCode ierr; 1850 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1851 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1852 PetscInt *cmap,*idx_p; 1853 1854 PetscFunctionBegin; 1855 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1856 mat->getrowactive = PETSC_TRUE; 1857 1858 if (!mat->rowvalues && (idx || v)) { 1859 /* 1860 allocate enough space to hold information from the longest row. 1861 */ 1862 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1863 PetscInt max = 1,tmp; 1864 for (i=0; i<matin->rmap->n; i++) { 1865 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1866 if (max < tmp) { max = tmp; } 1867 } 1868 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1869 } 1870 1871 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1872 lrow = row - rstart; 1873 1874 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1875 if (!v) {pvA = 0; pvB = 0;} 1876 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1877 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1878 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1879 nztot = nzA + nzB; 1880 1881 cmap = mat->garray; 1882 if (v || idx) { 1883 if (nztot) { 1884 /* Sort by increasing column numbers, assuming A and B already sorted */ 1885 PetscInt imark = -1; 1886 if (v) { 1887 *v = v_p = mat->rowvalues; 1888 for (i=0; i<nzB; i++) { 1889 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1890 else break; 1891 } 1892 imark = i; 1893 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1894 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1895 } 1896 if (idx) { 1897 *idx = idx_p = mat->rowindices; 1898 if (imark > -1) { 1899 for (i=0; i<imark; i++) { 1900 idx_p[i] = cmap[cworkB[i]]; 1901 } 1902 } else { 1903 for (i=0; i<nzB; i++) { 1904 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1905 else break; 1906 } 1907 imark = i; 1908 } 1909 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1910 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1911 } 1912 } else { 1913 if (idx) *idx = 0; 1914 if (v) *v = 0; 1915 } 1916 } 1917 *nz = nztot; 1918 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1919 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1925 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1926 { 1927 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1928 1929 PetscFunctionBegin; 1930 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1931 aij->getrowactive = PETSC_FALSE; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatNorm_MPIAIJ" 1937 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1938 { 1939 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1940 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1941 PetscErrorCode ierr; 1942 PetscInt i,j,cstart = mat->cmap->rstart; 1943 PetscReal sum = 0.0; 1944 MatScalar *v; 1945 1946 PetscFunctionBegin; 1947 if (aij->size == 1) { 1948 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1949 } else { 1950 if (type == NORM_FROBENIUS) { 1951 v = amat->a; 1952 for (i=0; i<amat->nz; i++) { 1953 #if defined(PETSC_USE_COMPLEX) 1954 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1955 #else 1956 sum += (*v)*(*v); v++; 1957 #endif 1958 } 1959 v = bmat->a; 1960 for (i=0; i<bmat->nz; i++) { 1961 #if defined(PETSC_USE_COMPLEX) 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 #else 1964 sum += (*v)*(*v); v++; 1965 #endif 1966 } 1967 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1968 *norm = PetscSqrtReal(*norm); 1969 } else if (type == NORM_1) { /* max column norm */ 1970 PetscReal *tmp,*tmp2; 1971 PetscInt *jj,*garray = aij->garray; 1972 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1973 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1974 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1975 *norm = 0.0; 1976 v = amat->a; jj = amat->j; 1977 for (j=0; j<amat->nz; j++) { 1978 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1979 } 1980 v = bmat->a; jj = bmat->j; 1981 for (j=0; j<bmat->nz; j++) { 1982 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1983 } 1984 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1985 for (j=0; j<mat->cmap->N; j++) { 1986 if (tmp2[j] > *norm) *norm = tmp2[j]; 1987 } 1988 ierr = PetscFree(tmp);CHKERRQ(ierr); 1989 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1990 } else if (type == NORM_INFINITY) { /* max row norm */ 1991 PetscReal ntemp = 0.0; 1992 for (j=0; j<aij->A->rmap->n; j++) { 1993 v = amat->a + amat->i[j]; 1994 sum = 0.0; 1995 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1996 sum += PetscAbsScalar(*v); v++; 1997 } 1998 v = bmat->a + bmat->i[j]; 1999 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2000 sum += PetscAbsScalar(*v); v++; 2001 } 2002 if (sum > ntemp) ntemp = sum; 2003 } 2004 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2005 } else { 2006 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2007 } 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "MatTranspose_MPIAIJ" 2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2015 { 2016 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2017 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2018 PetscErrorCode ierr; 2019 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2020 PetscInt cstart=A->cmap->rstart,ncol; 2021 Mat B; 2022 MatScalar *array; 2023 2024 PetscFunctionBegin; 2025 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2026 2027 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2028 ai = Aloc->i; aj = Aloc->j; 2029 bi = Bloc->i; bj = Bloc->j; 2030 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2031 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2032 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2033 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2034 for (i=0; i<ai[ma]; i++){ 2035 d_nnz[aj[i]] ++; 2036 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2037 } 2038 2039 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2040 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2041 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2042 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2043 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2044 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2045 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2046 } else { 2047 B = *matout; 2048 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2049 for (i=0; i<ai[ma]; i++){ 2050 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2051 } 2052 } 2053 2054 /* copy over the A part */ 2055 array = Aloc->a; 2056 row = A->rmap->rstart; 2057 for (i=0; i<ma; i++) { 2058 ncol = ai[i+1]-ai[i]; 2059 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2060 row++; array += ncol; aj += ncol; 2061 } 2062 aj = Aloc->j; 2063 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2064 2065 /* copy over the B part */ 2066 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2067 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2068 array = Bloc->a; 2069 row = A->rmap->rstart; 2070 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2071 cols_tmp = cols; 2072 for (i=0; i<mb; i++) { 2073 ncol = bi[i+1]-bi[i]; 2074 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2075 row++; array += ncol; cols_tmp += ncol; 2076 } 2077 ierr = PetscFree(cols);CHKERRQ(ierr); 2078 2079 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2082 *matout = B; 2083 } else { 2084 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2091 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2092 { 2093 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2094 Mat a = aij->A,b = aij->B; 2095 PetscErrorCode ierr; 2096 PetscInt s1,s2,s3; 2097 2098 PetscFunctionBegin; 2099 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2100 if (rr) { 2101 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2102 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2103 /* Overlap communication with computation. */ 2104 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 } 2106 if (ll) { 2107 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2108 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2109 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2110 } 2111 /* scale the diagonal block */ 2112 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2113 2114 if (rr) { 2115 /* Do a scatter end and then right scale the off-diagonal block */ 2116 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2118 } 2119 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2125 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2126 { 2127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2132 PetscFunctionReturn(0); 2133 } 2134 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatEqual_MPIAIJ" 2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2138 { 2139 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2140 Mat a,b,c,d; 2141 PetscBool flg; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 a = matA->A; b = matA->B; 2146 c = matB->A; d = matB->B; 2147 2148 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2149 if (flg) { 2150 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2151 } 2152 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatCopy_MPIAIJ" 2158 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2159 { 2160 PetscErrorCode ierr; 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2162 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2163 2164 PetscFunctionBegin; 2165 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2166 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2167 /* because of the column compression in the off-processor part of the matrix a->B, 2168 the number of columns in a->B and b->B may be different, hence we cannot call 2169 the MatCopy() directly on the two parts. If need be, we can provide a more 2170 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2171 then copying the submatrices */ 2172 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2173 } else { 2174 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2175 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatSetUp_MPIAIJ" 2182 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2183 { 2184 PetscErrorCode ierr; 2185 2186 PetscFunctionBegin; 2187 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2193 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2194 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2195 { 2196 PetscInt i,m=Y->rmap->N; 2197 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2198 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2199 const PetscInt *xi = x->i,*yi = y->i; 2200 2201 PetscFunctionBegin; 2202 /* Set the number of nonzeros in the new matrix */ 2203 for(i=0; i<m; i++) { 2204 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2205 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2206 nnz[i] = 0; 2207 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2208 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2209 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2210 nnz[i]++; 2211 } 2212 for (; k<nzy; k++) nnz[i]++; 2213 } 2214 PetscFunctionReturn(0); 2215 } 2216 2217 #undef __FUNCT__ 2218 #define __FUNCT__ "MatAXPY_MPIAIJ" 2219 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2220 { 2221 PetscErrorCode ierr; 2222 PetscInt i; 2223 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2224 PetscBLASInt bnz,one=1; 2225 Mat_SeqAIJ *x,*y; 2226 2227 PetscFunctionBegin; 2228 if (str == SAME_NONZERO_PATTERN) { 2229 PetscScalar alpha = a; 2230 x = (Mat_SeqAIJ *)xx->A->data; 2231 y = (Mat_SeqAIJ *)yy->A->data; 2232 bnz = PetscBLASIntCast(x->nz); 2233 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2234 x = (Mat_SeqAIJ *)xx->B->data; 2235 y = (Mat_SeqAIJ *)yy->B->data; 2236 bnz = PetscBLASIntCast(x->nz); 2237 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2238 } else if (str == SUBSET_NONZERO_PATTERN) { 2239 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2240 2241 x = (Mat_SeqAIJ *)xx->B->data; 2242 y = (Mat_SeqAIJ *)yy->B->data; 2243 if (y->xtoy && y->XtoY != xx->B) { 2244 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2245 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2246 } 2247 if (!y->xtoy) { /* get xtoy */ 2248 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2249 y->XtoY = xx->B; 2250 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2251 } 2252 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2253 } else { 2254 Mat B; 2255 PetscInt *nnz_d,*nnz_o; 2256 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2257 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2258 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2259 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2260 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2261 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2262 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2263 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2264 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2265 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2266 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2267 ierr = MatHeaderReplace(Y,B); 2268 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2269 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 2274 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatConjugate_MPIAIJ" 2278 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2279 { 2280 #if defined(PETSC_USE_COMPLEX) 2281 PetscErrorCode ierr; 2282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2283 2284 PetscFunctionBegin; 2285 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2286 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2287 #else 2288 PetscFunctionBegin; 2289 #endif 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatRealPart_MPIAIJ" 2295 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2296 { 2297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2302 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2308 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2309 { 2310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2315 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #ifdef PETSC_HAVE_PBGL 2320 2321 #include <boost/parallel/mpi/bsp_process_group.hpp> 2322 #include <boost/graph/distributed/ilu_default_graph.hpp> 2323 #include <boost/graph/distributed/ilu_0_block.hpp> 2324 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2325 #include <boost/graph/distributed/petsc/interface.hpp> 2326 #include <boost/multi_array.hpp> 2327 #include <boost/parallel/distributed_property_map->hpp> 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2331 /* 2332 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2333 */ 2334 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2335 { 2336 namespace petsc = boost::distributed::petsc; 2337 2338 namespace graph_dist = boost::graph::distributed; 2339 using boost::graph::distributed::ilu_default::process_group_type; 2340 using boost::graph::ilu_permuted; 2341 2342 PetscBool row_identity, col_identity; 2343 PetscContainer c; 2344 PetscInt m, n, M, N; 2345 PetscErrorCode ierr; 2346 2347 PetscFunctionBegin; 2348 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2349 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2350 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2351 if (!row_identity || !col_identity) { 2352 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2353 } 2354 2355 process_group_type pg; 2356 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2357 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2358 lgraph_type& level_graph = *lgraph_p; 2359 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2360 2361 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2362 ilu_permuted(level_graph); 2363 2364 /* put together the new matrix */ 2365 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2366 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2367 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2368 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2369 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2370 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2371 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2373 2374 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2375 ierr = PetscContainerSetPointer(c, lgraph_p); 2376 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2377 ierr = PetscContainerDestroy(&c); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2383 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2384 { 2385 PetscFunctionBegin; 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatSolve_MPIAIJ" 2391 /* 2392 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2393 */ 2394 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2395 { 2396 namespace graph_dist = boost::graph::distributed; 2397 2398 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2399 lgraph_type* lgraph_p; 2400 PetscContainer c; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2405 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2406 ierr = VecCopy(b, x);CHKERRQ(ierr); 2407 2408 PetscScalar* array_x; 2409 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2410 PetscInt sx; 2411 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2412 2413 PetscScalar* array_b; 2414 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2415 PetscInt sb; 2416 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2417 2418 lgraph_type& level_graph = *lgraph_p; 2419 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2420 2421 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2422 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2423 ref_x(array_x, boost::extents[num_vertices(graph)]); 2424 2425 typedef boost::iterator_property_map<array_ref_type::iterator, 2426 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2427 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2428 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2429 2430 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2431 2432 PetscFunctionReturn(0); 2433 } 2434 #endif 2435 2436 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2437 PetscInt nzlocal,nsends,nrecvs; 2438 PetscMPIInt *send_rank,*recv_rank; 2439 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2440 PetscScalar *sbuf_a,**rbuf_a; 2441 PetscErrorCode (*Destroy)(Mat); 2442 } Mat_Redundant; 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2446 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2447 { 2448 PetscErrorCode ierr; 2449 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2450 PetscInt i; 2451 2452 PetscFunctionBegin; 2453 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2454 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2455 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2456 for (i=0; i<redund->nrecvs; i++){ 2457 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2458 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2459 } 2460 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2461 ierr = PetscFree(redund);CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatDestroy_MatRedundant" 2467 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2468 { 2469 PetscErrorCode ierr; 2470 PetscContainer container; 2471 Mat_Redundant *redund=PETSC_NULL; 2472 2473 PetscFunctionBegin; 2474 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2475 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2476 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2477 A->ops->destroy = redund->Destroy; 2478 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2479 if (A->ops->destroy) { 2480 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2487 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2488 { 2489 PetscMPIInt rank,size; 2490 MPI_Comm comm=((PetscObject)mat)->comm; 2491 PetscErrorCode ierr; 2492 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2493 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2494 PetscInt *rowrange=mat->rmap->range; 2495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2496 Mat A=aij->A,B=aij->B,C=*matredundant; 2497 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2498 PetscScalar *sbuf_a; 2499 PetscInt nzlocal=a->nz+b->nz; 2500 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2501 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2502 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2503 MatScalar *aworkA,*aworkB; 2504 PetscScalar *vals; 2505 PetscMPIInt tag1,tag2,tag3,imdex; 2506 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2507 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2508 MPI_Status recv_status,*send_status; 2509 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2510 PetscInt **rbuf_j=PETSC_NULL; 2511 PetscScalar **rbuf_a=PETSC_NULL; 2512 Mat_Redundant *redund=PETSC_NULL; 2513 PetscContainer container; 2514 2515 PetscFunctionBegin; 2516 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2518 2519 if (reuse == MAT_REUSE_MATRIX) { 2520 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2521 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2522 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2523 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2524 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2525 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2526 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2527 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2528 2529 nsends = redund->nsends; 2530 nrecvs = redund->nrecvs; 2531 send_rank = redund->send_rank; 2532 recv_rank = redund->recv_rank; 2533 sbuf_nz = redund->sbuf_nz; 2534 rbuf_nz = redund->rbuf_nz; 2535 sbuf_j = redund->sbuf_j; 2536 sbuf_a = redund->sbuf_a; 2537 rbuf_j = redund->rbuf_j; 2538 rbuf_a = redund->rbuf_a; 2539 } 2540 2541 if (reuse == MAT_INITIAL_MATRIX){ 2542 PetscMPIInt subrank,subsize; 2543 PetscInt nleftover,np_subcomm; 2544 /* get the destination processors' id send_rank, nsends and nrecvs */ 2545 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2546 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2547 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2548 np_subcomm = size/nsubcomm; 2549 nleftover = size - nsubcomm*np_subcomm; 2550 nsends = 0; nrecvs = 0; 2551 for (i=0; i<size; i++){ /* i=rank*/ 2552 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2553 send_rank[nsends] = i; nsends++; 2554 recv_rank[nrecvs++] = i; 2555 } 2556 } 2557 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2558 i = size-nleftover-1; 2559 j = 0; 2560 while (j < nsubcomm - nleftover){ 2561 send_rank[nsends++] = i; 2562 i--; j++; 2563 } 2564 } 2565 2566 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2567 for (i=0; i<nleftover; i++){ 2568 recv_rank[nrecvs++] = size-nleftover+i; 2569 } 2570 } 2571 2572 /* allocate sbuf_j, sbuf_a */ 2573 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2574 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2575 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2576 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2577 2578 /* copy mat's local entries into the buffers */ 2579 if (reuse == MAT_INITIAL_MATRIX){ 2580 rownz_max = 0; 2581 rptr = sbuf_j; 2582 cols = sbuf_j + rend-rstart + 1; 2583 vals = sbuf_a; 2584 rptr[0] = 0; 2585 for (i=0; i<rend-rstart; i++){ 2586 row = i + rstart; 2587 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2588 ncols = nzA + nzB; 2589 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2590 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2591 /* load the column indices for this row into cols */ 2592 lwrite = 0; 2593 for (l=0; l<nzB; l++) { 2594 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2595 vals[lwrite] = aworkB[l]; 2596 cols[lwrite++] = ctmp; 2597 } 2598 } 2599 for (l=0; l<nzA; l++){ 2600 vals[lwrite] = aworkA[l]; 2601 cols[lwrite++] = cstart + cworkA[l]; 2602 } 2603 for (l=0; l<nzB; l++) { 2604 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2605 vals[lwrite] = aworkB[l]; 2606 cols[lwrite++] = ctmp; 2607 } 2608 } 2609 vals += ncols; 2610 cols += ncols; 2611 rptr[i+1] = rptr[i] + ncols; 2612 if (rownz_max < ncols) rownz_max = ncols; 2613 } 2614 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2615 } else { /* only copy matrix values into sbuf_a */ 2616 rptr = sbuf_j; 2617 vals = sbuf_a; 2618 rptr[0] = 0; 2619 for (i=0; i<rend-rstart; i++){ 2620 row = i + rstart; 2621 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2622 ncols = nzA + nzB; 2623 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2624 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2625 lwrite = 0; 2626 for (l=0; l<nzB; l++) { 2627 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2628 } 2629 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2630 for (l=0; l<nzB; l++) { 2631 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2632 } 2633 vals += ncols; 2634 rptr[i+1] = rptr[i] + ncols; 2635 } 2636 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2637 2638 /* send nzlocal to others, and recv other's nzlocal */ 2639 /*--------------------------------------------------*/ 2640 if (reuse == MAT_INITIAL_MATRIX){ 2641 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2642 s_waits2 = s_waits3 + nsends; 2643 s_waits1 = s_waits2 + nsends; 2644 r_waits1 = s_waits1 + nsends; 2645 r_waits2 = r_waits1 + nrecvs; 2646 r_waits3 = r_waits2 + nrecvs; 2647 } else { 2648 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2649 r_waits3 = s_waits3 + nsends; 2650 } 2651 2652 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2653 if (reuse == MAT_INITIAL_MATRIX){ 2654 /* get new tags to keep the communication clean */ 2655 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2656 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2657 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2658 2659 /* post receives of other's nzlocal */ 2660 for (i=0; i<nrecvs; i++){ 2661 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2662 } 2663 /* send nzlocal to others */ 2664 for (i=0; i<nsends; i++){ 2665 sbuf_nz[i] = nzlocal; 2666 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2667 } 2668 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2669 count = nrecvs; 2670 while (count) { 2671 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2672 recv_rank[imdex] = recv_status.MPI_SOURCE; 2673 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2674 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2675 2676 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2677 rbuf_nz[imdex] += i + 2; 2678 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2679 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2680 count--; 2681 } 2682 /* wait on sends of nzlocal */ 2683 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2684 /* send mat->i,j to others, and recv from other's */ 2685 /*------------------------------------------------*/ 2686 for (i=0; i<nsends; i++){ 2687 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2688 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2689 } 2690 /* wait on receives of mat->i,j */ 2691 /*------------------------------*/ 2692 count = nrecvs; 2693 while (count) { 2694 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2695 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2696 count--; 2697 } 2698 /* wait on sends of mat->i,j */ 2699 /*---------------------------*/ 2700 if (nsends) { 2701 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2702 } 2703 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2704 2705 /* post receives, send and receive mat->a */ 2706 /*----------------------------------------*/ 2707 for (imdex=0; imdex<nrecvs; imdex++) { 2708 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2709 } 2710 for (i=0; i<nsends; i++){ 2711 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2712 } 2713 count = nrecvs; 2714 while (count) { 2715 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2716 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2717 count--; 2718 } 2719 if (nsends) { 2720 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2721 } 2722 2723 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2724 2725 /* create redundant matrix */ 2726 /*-------------------------*/ 2727 if (reuse == MAT_INITIAL_MATRIX){ 2728 /* compute rownz_max for preallocation */ 2729 for (imdex=0; imdex<nrecvs; imdex++){ 2730 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2731 rptr = rbuf_j[imdex]; 2732 for (i=0; i<j; i++){ 2733 ncols = rptr[i+1] - rptr[i]; 2734 if (rownz_max < ncols) rownz_max = ncols; 2735 } 2736 } 2737 2738 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2739 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2740 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2741 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2742 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2743 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2744 } else { 2745 C = *matredundant; 2746 } 2747 2748 /* insert local matrix entries */ 2749 rptr = sbuf_j; 2750 cols = sbuf_j + rend-rstart + 1; 2751 vals = sbuf_a; 2752 for (i=0; i<rend-rstart; i++){ 2753 row = i + rstart; 2754 ncols = rptr[i+1] - rptr[i]; 2755 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2756 vals += ncols; 2757 cols += ncols; 2758 } 2759 /* insert received matrix entries */ 2760 for (imdex=0; imdex<nrecvs; imdex++){ 2761 rstart = rowrange[recv_rank[imdex]]; 2762 rend = rowrange[recv_rank[imdex]+1]; 2763 rptr = rbuf_j[imdex]; 2764 cols = rbuf_j[imdex] + rend-rstart + 1; 2765 vals = rbuf_a[imdex]; 2766 for (i=0; i<rend-rstart; i++){ 2767 row = i + rstart; 2768 ncols = rptr[i+1] - rptr[i]; 2769 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2770 vals += ncols; 2771 cols += ncols; 2772 } 2773 } 2774 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2775 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2776 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2777 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2778 if (reuse == MAT_INITIAL_MATRIX) { 2779 PetscContainer container; 2780 *matredundant = C; 2781 /* create a supporting struct and attach it to C for reuse */ 2782 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2783 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2784 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2785 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2786 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2787 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2788 2789 redund->nzlocal = nzlocal; 2790 redund->nsends = nsends; 2791 redund->nrecvs = nrecvs; 2792 redund->send_rank = send_rank; 2793 redund->recv_rank = recv_rank; 2794 redund->sbuf_nz = sbuf_nz; 2795 redund->rbuf_nz = rbuf_nz; 2796 redund->sbuf_j = sbuf_j; 2797 redund->sbuf_a = sbuf_a; 2798 redund->rbuf_j = rbuf_j; 2799 redund->rbuf_a = rbuf_a; 2800 2801 redund->Destroy = C->ops->destroy; 2802 C->ops->destroy = MatDestroy_MatRedundant; 2803 } 2804 PetscFunctionReturn(0); 2805 } 2806 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2809 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2810 { 2811 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2812 PetscErrorCode ierr; 2813 PetscInt i,*idxb = 0; 2814 PetscScalar *va,*vb; 2815 Vec vtmp; 2816 2817 PetscFunctionBegin; 2818 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2819 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2820 if (idx) { 2821 for (i=0; i<A->rmap->n; i++) { 2822 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2823 } 2824 } 2825 2826 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2827 if (idx) { 2828 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2829 } 2830 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2831 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2832 2833 for (i=0; i<A->rmap->n; i++){ 2834 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2835 va[i] = vb[i]; 2836 if (idx) idx[i] = a->garray[idxb[i]]; 2837 } 2838 } 2839 2840 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2841 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2842 ierr = PetscFree(idxb);CHKERRQ(ierr); 2843 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2844 PetscFunctionReturn(0); 2845 } 2846 2847 #undef __FUNCT__ 2848 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2849 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2850 { 2851 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2852 PetscErrorCode ierr; 2853 PetscInt i,*idxb = 0; 2854 PetscScalar *va,*vb; 2855 Vec vtmp; 2856 2857 PetscFunctionBegin; 2858 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2859 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2860 if (idx) { 2861 for (i=0; i<A->cmap->n; i++) { 2862 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2863 } 2864 } 2865 2866 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2867 if (idx) { 2868 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2869 } 2870 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2871 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2872 2873 for (i=0; i<A->rmap->n; i++){ 2874 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2875 va[i] = vb[i]; 2876 if (idx) idx[i] = a->garray[idxb[i]]; 2877 } 2878 } 2879 2880 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2881 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2882 ierr = PetscFree(idxb);CHKERRQ(ierr); 2883 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2884 PetscFunctionReturn(0); 2885 } 2886 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2889 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2890 { 2891 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2892 PetscInt n = A->rmap->n; 2893 PetscInt cstart = A->cmap->rstart; 2894 PetscInt *cmap = mat->garray; 2895 PetscInt *diagIdx, *offdiagIdx; 2896 Vec diagV, offdiagV; 2897 PetscScalar *a, *diagA, *offdiagA; 2898 PetscInt r; 2899 PetscErrorCode ierr; 2900 2901 PetscFunctionBegin; 2902 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2903 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2904 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2905 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2906 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2907 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2908 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2909 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2910 for(r = 0; r < n; ++r) { 2911 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2912 a[r] = diagA[r]; 2913 idx[r] = cstart + diagIdx[r]; 2914 } else { 2915 a[r] = offdiagA[r]; 2916 idx[r] = cmap[offdiagIdx[r]]; 2917 } 2918 } 2919 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2920 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2921 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2922 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2923 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2924 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2925 PetscFunctionReturn(0); 2926 } 2927 2928 #undef __FUNCT__ 2929 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2930 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2931 { 2932 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2933 PetscInt n = A->rmap->n; 2934 PetscInt cstart = A->cmap->rstart; 2935 PetscInt *cmap = mat->garray; 2936 PetscInt *diagIdx, *offdiagIdx; 2937 Vec diagV, offdiagV; 2938 PetscScalar *a, *diagA, *offdiagA; 2939 PetscInt r; 2940 PetscErrorCode ierr; 2941 2942 PetscFunctionBegin; 2943 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2944 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2945 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2946 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2947 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2948 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2949 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2950 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2951 for(r = 0; r < n; ++r) { 2952 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2953 a[r] = diagA[r]; 2954 idx[r] = cstart + diagIdx[r]; 2955 } else { 2956 a[r] = offdiagA[r]; 2957 idx[r] = cmap[offdiagIdx[r]]; 2958 } 2959 } 2960 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2961 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2962 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2963 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2964 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2965 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2966 PetscFunctionReturn(0); 2967 } 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2971 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2972 { 2973 PetscErrorCode ierr; 2974 Mat *dummy; 2975 2976 PetscFunctionBegin; 2977 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2978 *newmat = *dummy; 2979 ierr = PetscFree(dummy);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2987 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2988 { 2989 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2990 PetscErrorCode ierr; 2991 2992 PetscFunctionBegin; 2993 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 2998 /* -------------------------------------------------------------------*/ 2999 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3000 MatGetRow_MPIAIJ, 3001 MatRestoreRow_MPIAIJ, 3002 MatMult_MPIAIJ, 3003 /* 4*/ MatMultAdd_MPIAIJ, 3004 MatMultTranspose_MPIAIJ, 3005 MatMultTransposeAdd_MPIAIJ, 3006 #ifdef PETSC_HAVE_PBGL 3007 MatSolve_MPIAIJ, 3008 #else 3009 0, 3010 #endif 3011 0, 3012 0, 3013 /*10*/ 0, 3014 0, 3015 0, 3016 MatSOR_MPIAIJ, 3017 MatTranspose_MPIAIJ, 3018 /*15*/ MatGetInfo_MPIAIJ, 3019 MatEqual_MPIAIJ, 3020 MatGetDiagonal_MPIAIJ, 3021 MatDiagonalScale_MPIAIJ, 3022 MatNorm_MPIAIJ, 3023 /*20*/ MatAssemblyBegin_MPIAIJ, 3024 MatAssemblyEnd_MPIAIJ, 3025 MatSetOption_MPIAIJ, 3026 MatZeroEntries_MPIAIJ, 3027 /*24*/ MatZeroRows_MPIAIJ, 3028 0, 3029 #ifdef PETSC_HAVE_PBGL 3030 0, 3031 #else 3032 0, 3033 #endif 3034 0, 3035 0, 3036 /*29*/ MatSetUp_MPIAIJ, 3037 #ifdef PETSC_HAVE_PBGL 3038 0, 3039 #else 3040 0, 3041 #endif 3042 0, 3043 0, 3044 0, 3045 /*34*/ MatDuplicate_MPIAIJ, 3046 0, 3047 0, 3048 0, 3049 0, 3050 /*39*/ MatAXPY_MPIAIJ, 3051 MatGetSubMatrices_MPIAIJ, 3052 MatIncreaseOverlap_MPIAIJ, 3053 MatGetValues_MPIAIJ, 3054 MatCopy_MPIAIJ, 3055 /*44*/ MatGetRowMax_MPIAIJ, 3056 MatScale_MPIAIJ, 3057 0, 3058 0, 3059 MatZeroRowsColumns_MPIAIJ, 3060 /*49*/ 0, 3061 0, 3062 0, 3063 0, 3064 0, 3065 /*54*/ MatFDColoringCreate_MPIAIJ, 3066 0, 3067 MatSetUnfactored_MPIAIJ, 3068 0, /* MatPermute_MPIAIJ, impl currently broken */ 3069 0, 3070 /*59*/ MatGetSubMatrix_MPIAIJ, 3071 MatDestroy_MPIAIJ, 3072 MatView_MPIAIJ, 3073 0, 3074 0, 3075 /*64*/ 0, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3081 MatGetRowMinAbs_MPIAIJ, 3082 0, 3083 MatSetColoring_MPIAIJ, 3084 #if defined(PETSC_HAVE_ADIC) 3085 MatSetValuesAdic_MPIAIJ, 3086 #else 3087 0, 3088 #endif 3089 MatSetValuesAdifor_MPIAIJ, 3090 /*75*/ MatFDColoringApply_AIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*80*/ 0, 3096 0, 3097 0, 3098 /*83*/ MatLoad_MPIAIJ, 3099 0, 3100 0, 3101 0, 3102 0, 3103 0, 3104 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3105 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3106 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3107 MatPtAP_Basic, 3108 MatPtAPSymbolic_MPIAIJ, 3109 /*94*/ MatPtAPNumeric_MPIAIJ, 3110 0, 3111 0, 3112 0, 3113 0, 3114 /*99*/ 0, 3115 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3116 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3117 MatConjugate_MPIAIJ, 3118 0, 3119 /*104*/MatSetValuesRow_MPIAIJ, 3120 MatRealPart_MPIAIJ, 3121 MatImaginaryPart_MPIAIJ, 3122 0, 3123 0, 3124 /*109*/0, 3125 MatGetRedundantMatrix_MPIAIJ, 3126 MatGetRowMin_MPIAIJ, 3127 0, 3128 0, 3129 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3130 0, 3131 0, 3132 0, 3133 0, 3134 /*119*/0, 3135 0, 3136 0, 3137 0, 3138 MatGetMultiProcBlock_MPIAIJ, 3139 /*124*/MatFindNonZeroRows_MPIAIJ, 3140 MatGetColumnNorms_MPIAIJ, 3141 MatInvertBlockDiagonal_MPIAIJ, 3142 0, 3143 MatGetSubMatricesParallel_MPIAIJ, 3144 /*129*/0, 3145 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3146 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3147 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3148 0, 3149 /*134*/0, 3150 0, 3151 0, 3152 0, 3153 0 3154 }; 3155 3156 /* ----------------------------------------------------------------------------------------*/ 3157 3158 EXTERN_C_BEGIN 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3161 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3162 { 3163 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3164 PetscErrorCode ierr; 3165 3166 PetscFunctionBegin; 3167 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3168 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3169 PetscFunctionReturn(0); 3170 } 3171 EXTERN_C_END 3172 3173 EXTERN_C_BEGIN 3174 #undef __FUNCT__ 3175 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3176 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3177 { 3178 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3179 PetscErrorCode ierr; 3180 3181 PetscFunctionBegin; 3182 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3183 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3184 PetscFunctionReturn(0); 3185 } 3186 EXTERN_C_END 3187 3188 EXTERN_C_BEGIN 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3191 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3192 { 3193 Mat_MPIAIJ *b; 3194 PetscErrorCode ierr; 3195 PetscInt i; 3196 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3197 3198 PetscFunctionBegin; 3199 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3200 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3201 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3202 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3203 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3204 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3205 3206 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3207 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3208 if (d_nnz) { 3209 for (i=0; i<B->rmap->n; i++) { 3210 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3211 } 3212 } 3213 if (o_nnz) { 3214 for (i=0; i<B->rmap->n; i++) { 3215 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3216 } 3217 } 3218 b = (Mat_MPIAIJ*)B->data; 3219 3220 if (!B->preallocated) { 3221 /* Explicitly create 2 MATSEQAIJ matrices. */ 3222 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3223 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3224 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3225 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3226 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3227 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3228 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3229 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3230 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3231 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3232 } 3233 3234 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3235 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3236 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3237 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3238 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3239 B->preallocated = PETSC_TRUE; 3240 PetscFunctionReturn(0); 3241 } 3242 EXTERN_C_END 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3246 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3247 { 3248 Mat mat; 3249 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3250 PetscErrorCode ierr; 3251 3252 PetscFunctionBegin; 3253 *newmat = 0; 3254 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3255 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3256 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3257 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3258 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3259 a = (Mat_MPIAIJ*)mat->data; 3260 3261 mat->factortype = matin->factortype; 3262 mat->rmap->bs = matin->rmap->bs; 3263 mat->cmap->bs = matin->cmap->bs; 3264 mat->assembled = PETSC_TRUE; 3265 mat->insertmode = NOT_SET_VALUES; 3266 mat->preallocated = PETSC_TRUE; 3267 3268 a->size = oldmat->size; 3269 a->rank = oldmat->rank; 3270 a->donotstash = oldmat->donotstash; 3271 a->roworiented = oldmat->roworiented; 3272 a->rowindices = 0; 3273 a->rowvalues = 0; 3274 a->getrowactive = PETSC_FALSE; 3275 3276 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3277 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3278 3279 if (oldmat->colmap) { 3280 #if defined (PETSC_USE_CTABLE) 3281 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3282 #else 3283 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3284 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3285 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3286 #endif 3287 } else a->colmap = 0; 3288 if (oldmat->garray) { 3289 PetscInt len; 3290 len = oldmat->B->cmap->n; 3291 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3292 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3293 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3294 } else a->garray = 0; 3295 3296 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3297 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3298 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3299 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3300 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3301 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3302 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3303 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3304 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3305 *newmat = mat; 3306 PetscFunctionReturn(0); 3307 } 3308 3309 3310 3311 #undef __FUNCT__ 3312 #define __FUNCT__ "MatLoad_MPIAIJ" 3313 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3314 { 3315 PetscScalar *vals,*svals; 3316 MPI_Comm comm = ((PetscObject)viewer)->comm; 3317 PetscErrorCode ierr; 3318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3319 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3320 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3321 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3322 PetscInt cend,cstart,n,*rowners,sizesset=1; 3323 int fd; 3324 PetscInt bs = 1; 3325 3326 PetscFunctionBegin; 3327 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3328 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3329 if (!rank) { 3330 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3331 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3332 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3333 } 3334 3335 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3336 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3337 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3338 3339 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3340 3341 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3342 M = header[1]; N = header[2]; 3343 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3344 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3345 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3346 3347 /* If global sizes are set, check if they are consistent with that given in the file */ 3348 if (sizesset) { 3349 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3350 } 3351 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3352 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3353 3354 /* determine ownership of all (block) rows */ 3355 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3356 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3357 else m = newMat->rmap->n; /* Set by user */ 3358 3359 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3360 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3361 3362 /* First process needs enough room for process with most rows */ 3363 if (!rank) { 3364 mmax = rowners[1]; 3365 for (i=2; i<size; i++) { 3366 mmax = PetscMax(mmax,rowners[i]); 3367 } 3368 } else mmax = m; 3369 3370 rowners[0] = 0; 3371 for (i=2; i<=size; i++) { 3372 rowners[i] += rowners[i-1]; 3373 } 3374 rstart = rowners[rank]; 3375 rend = rowners[rank+1]; 3376 3377 /* distribute row lengths to all processors */ 3378 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3379 if (!rank) { 3380 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3381 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3382 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3383 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3384 for (j=0; j<m; j++) { 3385 procsnz[0] += ourlens[j]; 3386 } 3387 for (i=1; i<size; i++) { 3388 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3389 /* calculate the number of nonzeros on each processor */ 3390 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3391 procsnz[i] += rowlengths[j]; 3392 } 3393 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3394 } 3395 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3396 } else { 3397 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3398 } 3399 3400 if (!rank) { 3401 /* determine max buffer needed and allocate it */ 3402 maxnz = 0; 3403 for (i=0; i<size; i++) { 3404 maxnz = PetscMax(maxnz,procsnz[i]); 3405 } 3406 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3407 3408 /* read in my part of the matrix column indices */ 3409 nz = procsnz[0]; 3410 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3411 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3412 3413 /* read in every one elses and ship off */ 3414 for (i=1; i<size; i++) { 3415 nz = procsnz[i]; 3416 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3417 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3418 } 3419 ierr = PetscFree(cols);CHKERRQ(ierr); 3420 } else { 3421 /* determine buffer space needed for message */ 3422 nz = 0; 3423 for (i=0; i<m; i++) { 3424 nz += ourlens[i]; 3425 } 3426 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3427 3428 /* receive message of column indices*/ 3429 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3430 } 3431 3432 /* determine column ownership if matrix is not square */ 3433 if (N != M) { 3434 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3435 else n = newMat->cmap->n; 3436 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3437 cstart = cend - n; 3438 } else { 3439 cstart = rstart; 3440 cend = rend; 3441 n = cend - cstart; 3442 } 3443 3444 /* loop over local rows, determining number of off diagonal entries */ 3445 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3446 jj = 0; 3447 for (i=0; i<m; i++) { 3448 for (j=0; j<ourlens[i]; j++) { 3449 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3450 jj++; 3451 } 3452 } 3453 3454 for (i=0; i<m; i++) { 3455 ourlens[i] -= offlens[i]; 3456 } 3457 if (!sizesset) { 3458 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3459 } 3460 3461 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3462 3463 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3464 3465 for (i=0; i<m; i++) { 3466 ourlens[i] += offlens[i]; 3467 } 3468 3469 if (!rank) { 3470 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3471 3472 /* read in my part of the matrix numerical values */ 3473 nz = procsnz[0]; 3474 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3475 3476 /* insert into matrix */ 3477 jj = rstart; 3478 smycols = mycols; 3479 svals = vals; 3480 for (i=0; i<m; i++) { 3481 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3482 smycols += ourlens[i]; 3483 svals += ourlens[i]; 3484 jj++; 3485 } 3486 3487 /* read in other processors and ship out */ 3488 for (i=1; i<size; i++) { 3489 nz = procsnz[i]; 3490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3491 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3492 } 3493 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3494 } else { 3495 /* receive numeric values */ 3496 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3497 3498 /* receive message of values*/ 3499 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3500 3501 /* insert into matrix */ 3502 jj = rstart; 3503 smycols = mycols; 3504 svals = vals; 3505 for (i=0; i<m; i++) { 3506 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3507 smycols += ourlens[i]; 3508 svals += ourlens[i]; 3509 jj++; 3510 } 3511 } 3512 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3513 ierr = PetscFree(vals);CHKERRQ(ierr); 3514 ierr = PetscFree(mycols);CHKERRQ(ierr); 3515 ierr = PetscFree(rowners);CHKERRQ(ierr); 3516 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3517 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3518 3519 PetscFunctionReturn(0); 3520 } 3521 3522 #undef __FUNCT__ 3523 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3524 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3525 { 3526 PetscErrorCode ierr; 3527 IS iscol_local; 3528 PetscInt csize; 3529 3530 PetscFunctionBegin; 3531 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3532 if (call == MAT_REUSE_MATRIX) { 3533 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3534 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3535 } else { 3536 PetscInt cbs; 3537 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3538 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3539 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3540 } 3541 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3542 if (call == MAT_INITIAL_MATRIX) { 3543 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3544 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3545 } 3546 PetscFunctionReturn(0); 3547 } 3548 3549 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3552 /* 3553 Not great since it makes two copies of the submatrix, first an SeqAIJ 3554 in local and then by concatenating the local matrices the end result. 3555 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3556 3557 Note: This requires a sequential iscol with all indices. 3558 */ 3559 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3560 { 3561 PetscErrorCode ierr; 3562 PetscMPIInt rank,size; 3563 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3564 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3565 PetscBool allcolumns, colflag; 3566 Mat M,Mreuse; 3567 MatScalar *vwork,*aa; 3568 MPI_Comm comm = ((PetscObject)mat)->comm; 3569 Mat_SeqAIJ *aij; 3570 3571 3572 PetscFunctionBegin; 3573 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3574 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3575 3576 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3577 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3578 if (colflag && ncol == mat->cmap->N){ 3579 allcolumns = PETSC_TRUE; 3580 } else { 3581 allcolumns = PETSC_FALSE; 3582 } 3583 if (call == MAT_REUSE_MATRIX) { 3584 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3585 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3586 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3587 } else { 3588 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3589 } 3590 3591 /* 3592 m - number of local rows 3593 n - number of columns (same on all processors) 3594 rstart - first row in new global matrix generated 3595 */ 3596 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3597 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3598 if (call == MAT_INITIAL_MATRIX) { 3599 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3600 ii = aij->i; 3601 jj = aij->j; 3602 3603 /* 3604 Determine the number of non-zeros in the diagonal and off-diagonal 3605 portions of the matrix in order to do correct preallocation 3606 */ 3607 3608 /* first get start and end of "diagonal" columns */ 3609 if (csize == PETSC_DECIDE) { 3610 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3611 if (mglobal == n) { /* square matrix */ 3612 nlocal = m; 3613 } else { 3614 nlocal = n/size + ((n % size) > rank); 3615 } 3616 } else { 3617 nlocal = csize; 3618 } 3619 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3620 rstart = rend - nlocal; 3621 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3622 3623 /* next, compute all the lengths */ 3624 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3625 olens = dlens + m; 3626 for (i=0; i<m; i++) { 3627 jend = ii[i+1] - ii[i]; 3628 olen = 0; 3629 dlen = 0; 3630 for (j=0; j<jend; j++) { 3631 if (*jj < rstart || *jj >= rend) olen++; 3632 else dlen++; 3633 jj++; 3634 } 3635 olens[i] = olen; 3636 dlens[i] = dlen; 3637 } 3638 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3639 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3640 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3641 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3642 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3643 ierr = PetscFree(dlens);CHKERRQ(ierr); 3644 } else { 3645 PetscInt ml,nl; 3646 3647 M = *newmat; 3648 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3649 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3650 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3651 /* 3652 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3653 rather than the slower MatSetValues(). 3654 */ 3655 M->was_assembled = PETSC_TRUE; 3656 M->assembled = PETSC_FALSE; 3657 } 3658 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3659 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3660 ii = aij->i; 3661 jj = aij->j; 3662 aa = aij->a; 3663 for (i=0; i<m; i++) { 3664 row = rstart + i; 3665 nz = ii[i+1] - ii[i]; 3666 cwork = jj; jj += nz; 3667 vwork = aa; aa += nz; 3668 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3669 } 3670 3671 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3672 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3673 *newmat = M; 3674 3675 /* save submatrix used in processor for next request */ 3676 if (call == MAT_INITIAL_MATRIX) { 3677 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3678 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3679 } 3680 3681 PetscFunctionReturn(0); 3682 } 3683 3684 EXTERN_C_BEGIN 3685 #undef __FUNCT__ 3686 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3687 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3688 { 3689 PetscInt m,cstart, cend,j,nnz,i,d; 3690 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3691 const PetscInt *JJ; 3692 PetscScalar *values; 3693 PetscErrorCode ierr; 3694 3695 PetscFunctionBegin; 3696 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3697 3698 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3699 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3700 m = B->rmap->n; 3701 cstart = B->cmap->rstart; 3702 cend = B->cmap->rend; 3703 rstart = B->rmap->rstart; 3704 3705 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3706 3707 #if defined(PETSC_USE_DEBUGGING) 3708 for (i=0; i<m; i++) { 3709 nnz = Ii[i+1]- Ii[i]; 3710 JJ = J + Ii[i]; 3711 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3712 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3713 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3714 } 3715 #endif 3716 3717 for (i=0; i<m; i++) { 3718 nnz = Ii[i+1]- Ii[i]; 3719 JJ = J + Ii[i]; 3720 nnz_max = PetscMax(nnz_max,nnz); 3721 d = 0; 3722 for (j=0; j<nnz; j++) { 3723 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3724 } 3725 d_nnz[i] = d; 3726 o_nnz[i] = nnz - d; 3727 } 3728 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3729 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3730 3731 if (v) values = (PetscScalar*)v; 3732 else { 3733 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3734 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3735 } 3736 3737 for (i=0; i<m; i++) { 3738 ii = i + rstart; 3739 nnz = Ii[i+1]- Ii[i]; 3740 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3741 } 3742 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3744 3745 if (!v) { 3746 ierr = PetscFree(values);CHKERRQ(ierr); 3747 } 3748 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3749 PetscFunctionReturn(0); 3750 } 3751 EXTERN_C_END 3752 3753 #undef __FUNCT__ 3754 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3755 /*@ 3756 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3757 (the default parallel PETSc format). 3758 3759 Collective on MPI_Comm 3760 3761 Input Parameters: 3762 + B - the matrix 3763 . i - the indices into j for the start of each local row (starts with zero) 3764 . j - the column indices for each local row (starts with zero) 3765 - v - optional values in the matrix 3766 3767 Level: developer 3768 3769 Notes: 3770 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3771 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3772 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3773 3774 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3775 3776 The format which is used for the sparse matrix input, is equivalent to a 3777 row-major ordering.. i.e for the following matrix, the input data expected is 3778 as shown: 3779 3780 1 0 0 3781 2 0 3 P0 3782 ------- 3783 4 5 6 P1 3784 3785 Process0 [P0]: rows_owned=[0,1] 3786 i = {0,1,3} [size = nrow+1 = 2+1] 3787 j = {0,0,2} [size = nz = 6] 3788 v = {1,2,3} [size = nz = 6] 3789 3790 Process1 [P1]: rows_owned=[2] 3791 i = {0,3} [size = nrow+1 = 1+1] 3792 j = {0,1,2} [size = nz = 6] 3793 v = {4,5,6} [size = nz = 6] 3794 3795 .keywords: matrix, aij, compressed row, sparse, parallel 3796 3797 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3798 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3799 @*/ 3800 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3801 { 3802 PetscErrorCode ierr; 3803 3804 PetscFunctionBegin; 3805 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3806 PetscFunctionReturn(0); 3807 } 3808 3809 #undef __FUNCT__ 3810 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3811 /*@C 3812 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3813 (the default parallel PETSc format). For good matrix assembly performance 3814 the user should preallocate the matrix storage by setting the parameters 3815 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3816 performance can be increased by more than a factor of 50. 3817 3818 Collective on MPI_Comm 3819 3820 Input Parameters: 3821 + A - the matrix 3822 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3823 (same value is used for all local rows) 3824 . d_nnz - array containing the number of nonzeros in the various rows of the 3825 DIAGONAL portion of the local submatrix (possibly different for each row) 3826 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3827 The size of this array is equal to the number of local rows, i.e 'm'. 3828 For matrices that will be factored, you must leave room for (and set) 3829 the diagonal entry even if it is zero. 3830 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3831 submatrix (same value is used for all local rows). 3832 - o_nnz - array containing the number of nonzeros in the various rows of the 3833 OFF-DIAGONAL portion of the local submatrix (possibly different for 3834 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3835 structure. The size of this array is equal to the number 3836 of local rows, i.e 'm'. 3837 3838 If the *_nnz parameter is given then the *_nz parameter is ignored 3839 3840 The AIJ format (also called the Yale sparse matrix format or 3841 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3842 storage. The stored row and column indices begin with zero. 3843 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3844 3845 The parallel matrix is partitioned such that the first m0 rows belong to 3846 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3847 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3848 3849 The DIAGONAL portion of the local submatrix of a processor can be defined 3850 as the submatrix which is obtained by extraction the part corresponding to 3851 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3852 first row that belongs to the processor, r2 is the last row belonging to 3853 the this processor, and c1-c2 is range of indices of the local part of a 3854 vector suitable for applying the matrix to. This is an mxn matrix. In the 3855 common case of a square matrix, the row and column ranges are the same and 3856 the DIAGONAL part is also square. The remaining portion of the local 3857 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3858 3859 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3860 3861 You can call MatGetInfo() to get information on how effective the preallocation was; 3862 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3863 You can also run with the option -info and look for messages with the string 3864 malloc in them to see if additional memory allocation was needed. 3865 3866 Example usage: 3867 3868 Consider the following 8x8 matrix with 34 non-zero values, that is 3869 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3870 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3871 as follows: 3872 3873 .vb 3874 1 2 0 | 0 3 0 | 0 4 3875 Proc0 0 5 6 | 7 0 0 | 8 0 3876 9 0 10 | 11 0 0 | 12 0 3877 ------------------------------------- 3878 13 0 14 | 15 16 17 | 0 0 3879 Proc1 0 18 0 | 19 20 21 | 0 0 3880 0 0 0 | 22 23 0 | 24 0 3881 ------------------------------------- 3882 Proc2 25 26 27 | 0 0 28 | 29 0 3883 30 0 0 | 31 32 33 | 0 34 3884 .ve 3885 3886 This can be represented as a collection of submatrices as: 3887 3888 .vb 3889 A B C 3890 D E F 3891 G H I 3892 .ve 3893 3894 Where the submatrices A,B,C are owned by proc0, D,E,F are 3895 owned by proc1, G,H,I are owned by proc2. 3896 3897 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3898 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3899 The 'M','N' parameters are 8,8, and have the same values on all procs. 3900 3901 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3902 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3903 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3904 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3905 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3906 matrix, ans [DF] as another SeqAIJ matrix. 3907 3908 When d_nz, o_nz parameters are specified, d_nz storage elements are 3909 allocated for every row of the local diagonal submatrix, and o_nz 3910 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3911 One way to choose d_nz and o_nz is to use the max nonzerors per local 3912 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3913 In this case, the values of d_nz,o_nz are: 3914 .vb 3915 proc0 : dnz = 2, o_nz = 2 3916 proc1 : dnz = 3, o_nz = 2 3917 proc2 : dnz = 1, o_nz = 4 3918 .ve 3919 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3920 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3921 for proc3. i.e we are using 12+15+10=37 storage locations to store 3922 34 values. 3923 3924 When d_nnz, o_nnz parameters are specified, the storage is specified 3925 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3926 In the above case the values for d_nnz,o_nnz are: 3927 .vb 3928 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3929 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3930 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3931 .ve 3932 Here the space allocated is sum of all the above values i.e 34, and 3933 hence pre-allocation is perfect. 3934 3935 Level: intermediate 3936 3937 .keywords: matrix, aij, compressed row, sparse, parallel 3938 3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3940 MPIAIJ, MatGetInfo() 3941 @*/ 3942 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3948 PetscValidType(B,1); 3949 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3950 PetscFunctionReturn(0); 3951 } 3952 3953 #undef __FUNCT__ 3954 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3955 /*@ 3956 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3957 CSR format the local rows. 3958 3959 Collective on MPI_Comm 3960 3961 Input Parameters: 3962 + comm - MPI communicator 3963 . m - number of local rows (Cannot be PETSC_DECIDE) 3964 . n - This value should be the same as the local size used in creating the 3965 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3966 calculated if N is given) For square matrices n is almost always m. 3967 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3968 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3969 . i - row indices 3970 . j - column indices 3971 - a - matrix values 3972 3973 Output Parameter: 3974 . mat - the matrix 3975 3976 Level: intermediate 3977 3978 Notes: 3979 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3980 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3981 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3982 3983 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3984 3985 The format which is used for the sparse matrix input, is equivalent to a 3986 row-major ordering.. i.e for the following matrix, the input data expected is 3987 as shown: 3988 3989 1 0 0 3990 2 0 3 P0 3991 ------- 3992 4 5 6 P1 3993 3994 Process0 [P0]: rows_owned=[0,1] 3995 i = {0,1,3} [size = nrow+1 = 2+1] 3996 j = {0,0,2} [size = nz = 6] 3997 v = {1,2,3} [size = nz = 6] 3998 3999 Process1 [P1]: rows_owned=[2] 4000 i = {0,3} [size = nrow+1 = 1+1] 4001 j = {0,1,2} [size = nz = 6] 4002 v = {4,5,6} [size = nz = 6] 4003 4004 .keywords: matrix, aij, compressed row, sparse, parallel 4005 4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4007 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4008 @*/ 4009 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4010 { 4011 PetscErrorCode ierr; 4012 4013 PetscFunctionBegin; 4014 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4015 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4016 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4017 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4018 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4019 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4020 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4021 PetscFunctionReturn(0); 4022 } 4023 4024 #undef __FUNCT__ 4025 #define __FUNCT__ "MatCreateAIJ" 4026 /*@C 4027 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4028 (the default parallel PETSc format). For good matrix assembly performance 4029 the user should preallocate the matrix storage by setting the parameters 4030 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4031 performance can be increased by more than a factor of 50. 4032 4033 Collective on MPI_Comm 4034 4035 Input Parameters: 4036 + comm - MPI communicator 4037 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4038 This value should be the same as the local size used in creating the 4039 y vector for the matrix-vector product y = Ax. 4040 . n - This value should be the same as the local size used in creating the 4041 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4042 calculated if N is given) For square matrices n is almost always m. 4043 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4044 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4045 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4046 (same value is used for all local rows) 4047 . d_nnz - array containing the number of nonzeros in the various rows of the 4048 DIAGONAL portion of the local submatrix (possibly different for each row) 4049 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4050 The size of this array is equal to the number of local rows, i.e 'm'. 4051 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4052 submatrix (same value is used for all local rows). 4053 - o_nnz - array containing the number of nonzeros in the various rows of the 4054 OFF-DIAGONAL portion of the local submatrix (possibly different for 4055 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4056 structure. The size of this array is equal to the number 4057 of local rows, i.e 'm'. 4058 4059 Output Parameter: 4060 . A - the matrix 4061 4062 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4063 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4064 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4065 4066 Notes: 4067 If the *_nnz parameter is given then the *_nz parameter is ignored 4068 4069 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4070 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4071 storage requirements for this matrix. 4072 4073 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4074 processor than it must be used on all processors that share the object for 4075 that argument. 4076 4077 The user MUST specify either the local or global matrix dimensions 4078 (possibly both). 4079 4080 The parallel matrix is partitioned across processors such that the 4081 first m0 rows belong to process 0, the next m1 rows belong to 4082 process 1, the next m2 rows belong to process 2 etc.. where 4083 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4084 values corresponding to [m x N] submatrix. 4085 4086 The columns are logically partitioned with the n0 columns belonging 4087 to 0th partition, the next n1 columns belonging to the next 4088 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4089 4090 The DIAGONAL portion of the local submatrix on any given processor 4091 is the submatrix corresponding to the rows and columns m,n 4092 corresponding to the given processor. i.e diagonal matrix on 4093 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4094 etc. The remaining portion of the local submatrix [m x (N-n)] 4095 constitute the OFF-DIAGONAL portion. The example below better 4096 illustrates this concept. 4097 4098 For a square global matrix we define each processor's diagonal portion 4099 to be its local rows and the corresponding columns (a square submatrix); 4100 each processor's off-diagonal portion encompasses the remainder of the 4101 local matrix (a rectangular submatrix). 4102 4103 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4104 4105 When calling this routine with a single process communicator, a matrix of 4106 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4107 type of communicator, use the construction mechanism: 4108 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4109 4110 By default, this format uses inodes (identical nodes) when possible. 4111 We search for consecutive rows with the same nonzero structure, thereby 4112 reusing matrix information to achieve increased efficiency. 4113 4114 Options Database Keys: 4115 + -mat_no_inode - Do not use inodes 4116 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4117 - -mat_aij_oneindex - Internally use indexing starting at 1 4118 rather than 0. Note that when calling MatSetValues(), 4119 the user still MUST index entries starting at 0! 4120 4121 4122 Example usage: 4123 4124 Consider the following 8x8 matrix with 34 non-zero values, that is 4125 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4126 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4127 as follows: 4128 4129 .vb 4130 1 2 0 | 0 3 0 | 0 4 4131 Proc0 0 5 6 | 7 0 0 | 8 0 4132 9 0 10 | 11 0 0 | 12 0 4133 ------------------------------------- 4134 13 0 14 | 15 16 17 | 0 0 4135 Proc1 0 18 0 | 19 20 21 | 0 0 4136 0 0 0 | 22 23 0 | 24 0 4137 ------------------------------------- 4138 Proc2 25 26 27 | 0 0 28 | 29 0 4139 30 0 0 | 31 32 33 | 0 34 4140 .ve 4141 4142 This can be represented as a collection of submatrices as: 4143 4144 .vb 4145 A B C 4146 D E F 4147 G H I 4148 .ve 4149 4150 Where the submatrices A,B,C are owned by proc0, D,E,F are 4151 owned by proc1, G,H,I are owned by proc2. 4152 4153 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4154 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4155 The 'M','N' parameters are 8,8, and have the same values on all procs. 4156 4157 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4158 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4159 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4160 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4161 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4162 matrix, ans [DF] as another SeqAIJ matrix. 4163 4164 When d_nz, o_nz parameters are specified, d_nz storage elements are 4165 allocated for every row of the local diagonal submatrix, and o_nz 4166 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4167 One way to choose d_nz and o_nz is to use the max nonzerors per local 4168 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4169 In this case, the values of d_nz,o_nz are: 4170 .vb 4171 proc0 : dnz = 2, o_nz = 2 4172 proc1 : dnz = 3, o_nz = 2 4173 proc2 : dnz = 1, o_nz = 4 4174 .ve 4175 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4176 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4177 for proc3. i.e we are using 12+15+10=37 storage locations to store 4178 34 values. 4179 4180 When d_nnz, o_nnz parameters are specified, the storage is specified 4181 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4182 In the above case the values for d_nnz,o_nnz are: 4183 .vb 4184 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4185 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4186 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4187 .ve 4188 Here the space allocated is sum of all the above values i.e 34, and 4189 hence pre-allocation is perfect. 4190 4191 Level: intermediate 4192 4193 .keywords: matrix, aij, compressed row, sparse, parallel 4194 4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4196 MPIAIJ, MatCreateMPIAIJWithArrays() 4197 @*/ 4198 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4199 { 4200 PetscErrorCode ierr; 4201 PetscMPIInt size; 4202 4203 PetscFunctionBegin; 4204 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4205 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4206 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4207 if (size > 1) { 4208 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4209 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4210 } else { 4211 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4212 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4213 } 4214 PetscFunctionReturn(0); 4215 } 4216 4217 #undef __FUNCT__ 4218 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4219 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4220 { 4221 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4222 4223 PetscFunctionBegin; 4224 *Ad = a->A; 4225 *Ao = a->B; 4226 *colmap = a->garray; 4227 PetscFunctionReturn(0); 4228 } 4229 4230 #undef __FUNCT__ 4231 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4232 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4233 { 4234 PetscErrorCode ierr; 4235 PetscInt i; 4236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4237 4238 PetscFunctionBegin; 4239 if (coloring->ctype == IS_COLORING_GLOBAL) { 4240 ISColoringValue *allcolors,*colors; 4241 ISColoring ocoloring; 4242 4243 /* set coloring for diagonal portion */ 4244 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4245 4246 /* set coloring for off-diagonal portion */ 4247 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4248 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4249 for (i=0; i<a->B->cmap->n; i++) { 4250 colors[i] = allcolors[a->garray[i]]; 4251 } 4252 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4253 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4254 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4255 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4256 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4257 ISColoringValue *colors; 4258 PetscInt *larray; 4259 ISColoring ocoloring; 4260 4261 /* set coloring for diagonal portion */ 4262 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4263 for (i=0; i<a->A->cmap->n; i++) { 4264 larray[i] = i + A->cmap->rstart; 4265 } 4266 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4267 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4268 for (i=0; i<a->A->cmap->n; i++) { 4269 colors[i] = coloring->colors[larray[i]]; 4270 } 4271 ierr = PetscFree(larray);CHKERRQ(ierr); 4272 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4273 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4274 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4275 4276 /* set coloring for off-diagonal portion */ 4277 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4278 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4279 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4280 for (i=0; i<a->B->cmap->n; i++) { 4281 colors[i] = coloring->colors[larray[i]]; 4282 } 4283 ierr = PetscFree(larray);CHKERRQ(ierr); 4284 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4285 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4286 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4287 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4288 4289 PetscFunctionReturn(0); 4290 } 4291 4292 #if defined(PETSC_HAVE_ADIC) 4293 #undef __FUNCT__ 4294 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4295 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4296 { 4297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4298 PetscErrorCode ierr; 4299 4300 PetscFunctionBegin; 4301 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4302 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4303 PetscFunctionReturn(0); 4304 } 4305 #endif 4306 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4309 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4310 { 4311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4312 PetscErrorCode ierr; 4313 4314 PetscFunctionBegin; 4315 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4316 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4317 PetscFunctionReturn(0); 4318 } 4319 4320 #undef __FUNCT__ 4321 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4322 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4323 { 4324 PetscErrorCode ierr; 4325 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4326 PetscInt *indx; 4327 4328 PetscFunctionBegin; 4329 /* This routine will ONLY return MPIAIJ type matrix */ 4330 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4331 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4332 if (n == PETSC_DECIDE){ 4333 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4334 } 4335 /* Check sum(n) = N */ 4336 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4337 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4338 4339 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4340 rstart -= m; 4341 4342 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4343 for (i=0;i<m;i++) { 4344 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4345 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4346 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4347 } 4348 4349 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4350 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4351 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4352 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4353 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4354 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4360 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4361 { 4362 PetscErrorCode ierr; 4363 PetscInt m,N,i,rstart,nnz,Ii; 4364 PetscInt *indx; 4365 PetscScalar *values; 4366 4367 PetscFunctionBegin; 4368 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4369 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4370 for (i=0;i<m;i++) { 4371 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4372 Ii = i + rstart; 4373 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4374 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4375 } 4376 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4377 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 PetscFunctionReturn(0); 4379 } 4380 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4383 /*@ 4384 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4385 matrices from each processor 4386 4387 Collective on MPI_Comm 4388 4389 Input Parameters: 4390 + comm - the communicators the parallel matrix will live on 4391 . inmat - the input sequential matrices 4392 . n - number of local columns (or PETSC_DECIDE) 4393 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4394 4395 Output Parameter: 4396 . outmat - the parallel matrix generated 4397 4398 Level: advanced 4399 4400 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4401 4402 @*/ 4403 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4404 { 4405 PetscErrorCode ierr; 4406 4407 PetscFunctionBegin; 4408 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4409 if (scall == MAT_INITIAL_MATRIX){ 4410 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4411 } 4412 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4413 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4414 PetscFunctionReturn(0); 4415 } 4416 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatFileSplit" 4419 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4420 { 4421 PetscErrorCode ierr; 4422 PetscMPIInt rank; 4423 PetscInt m,N,i,rstart,nnz; 4424 size_t len; 4425 const PetscInt *indx; 4426 PetscViewer out; 4427 char *name; 4428 Mat B; 4429 const PetscScalar *values; 4430 4431 PetscFunctionBegin; 4432 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4433 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4434 /* Should this be the type of the diagonal block of A? */ 4435 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4436 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4437 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4438 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4439 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4440 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4441 for (i=0;i<m;i++) { 4442 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4443 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4444 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4445 } 4446 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4447 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4448 4449 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4450 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4451 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4452 sprintf(name,"%s.%d",outfile,rank); 4453 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4454 ierr = PetscFree(name); 4455 ierr = MatView(B,out);CHKERRQ(ierr); 4456 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4457 ierr = MatDestroy(&B);CHKERRQ(ierr); 4458 PetscFunctionReturn(0); 4459 } 4460 4461 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4462 #undef __FUNCT__ 4463 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4464 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4465 { 4466 PetscErrorCode ierr; 4467 Mat_Merge_SeqsToMPI *merge; 4468 PetscContainer container; 4469 4470 PetscFunctionBegin; 4471 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4472 if (container) { 4473 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4486 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4487 ierr = PetscFree(merge);CHKERRQ(ierr); 4488 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4489 } 4490 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4491 PetscFunctionReturn(0); 4492 } 4493 4494 #include <../src/mat/utils/freespace.h> 4495 #include <petscbt.h> 4496 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4499 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4500 { 4501 PetscErrorCode ierr; 4502 MPI_Comm comm=((PetscObject)mpimat)->comm; 4503 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4504 PetscMPIInt size,rank,taga,*len_s; 4505 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4506 PetscInt proc,m; 4507 PetscInt **buf_ri,**buf_rj; 4508 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4509 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4510 MPI_Request *s_waits,*r_waits; 4511 MPI_Status *status; 4512 MatScalar *aa=a->a; 4513 MatScalar **abuf_r,*ba_i; 4514 Mat_Merge_SeqsToMPI *merge; 4515 PetscContainer container; 4516 4517 PetscFunctionBegin; 4518 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4519 4520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4521 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4522 4523 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4524 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4525 4526 bi = merge->bi; 4527 bj = merge->bj; 4528 buf_ri = merge->buf_ri; 4529 buf_rj = merge->buf_rj; 4530 4531 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4532 owners = merge->rowmap->range; 4533 len_s = merge->len_s; 4534 4535 /* send and recv matrix values */ 4536 /*-----------------------------*/ 4537 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4538 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4539 4540 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4541 for (proc=0,k=0; proc<size; proc++){ 4542 if (!len_s[proc]) continue; 4543 i = owners[proc]; 4544 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4545 k++; 4546 } 4547 4548 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4549 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4550 ierr = PetscFree(status);CHKERRQ(ierr); 4551 4552 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4553 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4554 4555 /* insert mat values of mpimat */ 4556 /*----------------------------*/ 4557 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4558 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4559 4560 for (k=0; k<merge->nrecv; k++){ 4561 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4562 nrows = *(buf_ri_k[k]); 4563 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4564 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4565 } 4566 4567 /* set values of ba */ 4568 m = merge->rowmap->n; 4569 for (i=0; i<m; i++) { 4570 arow = owners[rank] + i; 4571 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4572 bnzi = bi[i+1] - bi[i]; 4573 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4574 4575 /* add local non-zero vals of this proc's seqmat into ba */ 4576 anzi = ai[arow+1] - ai[arow]; 4577 aj = a->j + ai[arow]; 4578 aa = a->a + ai[arow]; 4579 nextaj = 0; 4580 for (j=0; nextaj<anzi; j++){ 4581 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4582 ba_i[j] += aa[nextaj++]; 4583 } 4584 } 4585 4586 /* add received vals into ba */ 4587 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4588 /* i-th row */ 4589 if (i == *nextrow[k]) { 4590 anzi = *(nextai[k]+1) - *nextai[k]; 4591 aj = buf_rj[k] + *(nextai[k]); 4592 aa = abuf_r[k] + *(nextai[k]); 4593 nextaj = 0; 4594 for (j=0; nextaj<anzi; j++){ 4595 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4596 ba_i[j] += aa[nextaj++]; 4597 } 4598 } 4599 nextrow[k]++; nextai[k]++; 4600 } 4601 } 4602 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4603 } 4604 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4605 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4606 4607 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4608 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4609 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4610 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4611 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4612 PetscFunctionReturn(0); 4613 } 4614 4615 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4616 4617 #undef __FUNCT__ 4618 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4619 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4620 { 4621 PetscErrorCode ierr; 4622 Mat B_mpi; 4623 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4624 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4625 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4626 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4627 PetscInt len,proc,*dnz,*onz,bs,cbs; 4628 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4629 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4630 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4631 MPI_Status *status; 4632 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4633 PetscBT lnkbt; 4634 Mat_Merge_SeqsToMPI *merge; 4635 PetscContainer container; 4636 4637 PetscFunctionBegin; 4638 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4639 4640 /* make sure it is a PETSc comm */ 4641 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4642 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4643 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4644 4645 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4646 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4647 4648 /* determine row ownership */ 4649 /*---------------------------------------------------------*/ 4650 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4651 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4652 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4653 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4654 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4655 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4656 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4657 4658 m = merge->rowmap->n; 4659 M = merge->rowmap->N; 4660 owners = merge->rowmap->range; 4661 4662 /* determine the number of messages to send, their lengths */ 4663 /*---------------------------------------------------------*/ 4664 len_s = merge->len_s; 4665 4666 len = 0; /* length of buf_si[] */ 4667 merge->nsend = 0; 4668 for (proc=0; proc<size; proc++){ 4669 len_si[proc] = 0; 4670 if (proc == rank){ 4671 len_s[proc] = 0; 4672 } else { 4673 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4674 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4675 } 4676 if (len_s[proc]) { 4677 merge->nsend++; 4678 nrows = 0; 4679 for (i=owners[proc]; i<owners[proc+1]; i++){ 4680 if (ai[i+1] > ai[i]) nrows++; 4681 } 4682 len_si[proc] = 2*(nrows+1); 4683 len += len_si[proc]; 4684 } 4685 } 4686 4687 /* determine the number and length of messages to receive for ij-structure */ 4688 /*-------------------------------------------------------------------------*/ 4689 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4690 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4691 4692 /* post the Irecv of j-structure */ 4693 /*-------------------------------*/ 4694 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4695 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4696 4697 /* post the Isend of j-structure */ 4698 /*--------------------------------*/ 4699 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4700 4701 for (proc=0, k=0; proc<size; proc++){ 4702 if (!len_s[proc]) continue; 4703 i = owners[proc]; 4704 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4705 k++; 4706 } 4707 4708 /* receives and sends of j-structure are complete */ 4709 /*------------------------------------------------*/ 4710 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4711 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4712 4713 /* send and recv i-structure */ 4714 /*---------------------------*/ 4715 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4716 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4717 4718 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4719 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4720 for (proc=0,k=0; proc<size; proc++){ 4721 if (!len_s[proc]) continue; 4722 /* form outgoing message for i-structure: 4723 buf_si[0]: nrows to be sent 4724 [1:nrows]: row index (global) 4725 [nrows+1:2*nrows+1]: i-structure index 4726 */ 4727 /*-------------------------------------------*/ 4728 nrows = len_si[proc]/2 - 1; 4729 buf_si_i = buf_si + nrows+1; 4730 buf_si[0] = nrows; 4731 buf_si_i[0] = 0; 4732 nrows = 0; 4733 for (i=owners[proc]; i<owners[proc+1]; i++){ 4734 anzi = ai[i+1] - ai[i]; 4735 if (anzi) { 4736 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4737 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4738 nrows++; 4739 } 4740 } 4741 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4742 k++; 4743 buf_si += len_si[proc]; 4744 } 4745 4746 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4747 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4748 4749 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4750 for (i=0; i<merge->nrecv; i++){ 4751 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4752 } 4753 4754 ierr = PetscFree(len_si);CHKERRQ(ierr); 4755 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4756 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4757 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4758 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4759 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4760 ierr = PetscFree(status);CHKERRQ(ierr); 4761 4762 /* compute a local seq matrix in each processor */ 4763 /*----------------------------------------------*/ 4764 /* allocate bi array and free space for accumulating nonzero column info */ 4765 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4766 bi[0] = 0; 4767 4768 /* create and initialize a linked list */ 4769 nlnk = N+1; 4770 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4771 4772 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4773 len = 0; 4774 len = ai[owners[rank+1]] - ai[owners[rank]]; 4775 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4776 current_space = free_space; 4777 4778 /* determine symbolic info for each local row */ 4779 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4780 4781 for (k=0; k<merge->nrecv; k++){ 4782 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4783 nrows = *buf_ri_k[k]; 4784 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4785 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4786 } 4787 4788 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4789 len = 0; 4790 for (i=0;i<m;i++) { 4791 bnzi = 0; 4792 /* add local non-zero cols of this proc's seqmat into lnk */ 4793 arow = owners[rank] + i; 4794 anzi = ai[arow+1] - ai[arow]; 4795 aj = a->j + ai[arow]; 4796 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4797 bnzi += nlnk; 4798 /* add received col data into lnk */ 4799 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4800 if (i == *nextrow[k]) { /* i-th row */ 4801 anzi = *(nextai[k]+1) - *nextai[k]; 4802 aj = buf_rj[k] + *nextai[k]; 4803 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4804 bnzi += nlnk; 4805 nextrow[k]++; nextai[k]++; 4806 } 4807 } 4808 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4809 4810 /* if free space is not available, make more free space */ 4811 if (current_space->local_remaining<bnzi) { 4812 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4813 nspacedouble++; 4814 } 4815 /* copy data into free space, then initialize lnk */ 4816 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4817 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4818 4819 current_space->array += bnzi; 4820 current_space->local_used += bnzi; 4821 current_space->local_remaining -= bnzi; 4822 4823 bi[i+1] = bi[i] + bnzi; 4824 } 4825 4826 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4827 4828 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4829 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4830 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4831 4832 /* create symbolic parallel matrix B_mpi */ 4833 /*---------------------------------------*/ 4834 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4835 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4836 if (n==PETSC_DECIDE) { 4837 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4838 } else { 4839 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4840 } 4841 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4842 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4843 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4844 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4845 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4846 4847 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4848 B_mpi->assembled = PETSC_FALSE; 4849 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4850 merge->bi = bi; 4851 merge->bj = bj; 4852 merge->buf_ri = buf_ri; 4853 merge->buf_rj = buf_rj; 4854 merge->coi = PETSC_NULL; 4855 merge->coj = PETSC_NULL; 4856 merge->owners_co = PETSC_NULL; 4857 4858 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4859 4860 /* attach the supporting struct to B_mpi for reuse */ 4861 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4862 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4863 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4864 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4865 *mpimat = B_mpi; 4866 4867 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4868 PetscFunctionReturn(0); 4869 } 4870 4871 #undef __FUNCT__ 4872 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4873 /*@C 4874 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4875 matrices from each processor 4876 4877 Collective on MPI_Comm 4878 4879 Input Parameters: 4880 + comm - the communicators the parallel matrix will live on 4881 . seqmat - the input sequential matrices 4882 . m - number of local rows (or PETSC_DECIDE) 4883 . n - number of local columns (or PETSC_DECIDE) 4884 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4885 4886 Output Parameter: 4887 . mpimat - the parallel matrix generated 4888 4889 Level: advanced 4890 4891 Notes: 4892 The dimensions of the sequential matrix in each processor MUST be the same. 4893 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4894 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4895 @*/ 4896 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4897 { 4898 PetscErrorCode ierr; 4899 PetscMPIInt size; 4900 4901 PetscFunctionBegin; 4902 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4903 if (size == 1){ 4904 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4905 if (scall == MAT_INITIAL_MATRIX){ 4906 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4907 } else { 4908 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4909 } 4910 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4911 PetscFunctionReturn(0); 4912 } 4913 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4914 if (scall == MAT_INITIAL_MATRIX){ 4915 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4916 } 4917 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4918 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4919 PetscFunctionReturn(0); 4920 } 4921 4922 #undef __FUNCT__ 4923 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4924 /*@ 4925 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4926 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4927 with MatGetSize() 4928 4929 Not Collective 4930 4931 Input Parameters: 4932 + A - the matrix 4933 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4934 4935 Output Parameter: 4936 . A_loc - the local sequential matrix generated 4937 4938 Level: developer 4939 4940 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4941 4942 @*/ 4943 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4944 { 4945 PetscErrorCode ierr; 4946 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4947 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4948 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4949 MatScalar *aa=a->a,*ba=b->a,*cam; 4950 PetscScalar *ca; 4951 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4952 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4953 PetscBool match; 4954 4955 PetscFunctionBegin; 4956 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4957 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4958 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4959 if (scall == MAT_INITIAL_MATRIX){ 4960 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4961 ci[0] = 0; 4962 for (i=0; i<am; i++){ 4963 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4964 } 4965 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4966 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4967 k = 0; 4968 for (i=0; i<am; i++) { 4969 ncols_o = bi[i+1] - bi[i]; 4970 ncols_d = ai[i+1] - ai[i]; 4971 /* off-diagonal portion of A */ 4972 for (jo=0; jo<ncols_o; jo++) { 4973 col = cmap[*bj]; 4974 if (col >= cstart) break; 4975 cj[k] = col; bj++; 4976 ca[k++] = *ba++; 4977 } 4978 /* diagonal portion of A */ 4979 for (j=0; j<ncols_d; j++) { 4980 cj[k] = cstart + *aj++; 4981 ca[k++] = *aa++; 4982 } 4983 /* off-diagonal portion of A */ 4984 for (j=jo; j<ncols_o; j++) { 4985 cj[k] = cmap[*bj++]; 4986 ca[k++] = *ba++; 4987 } 4988 } 4989 /* put together the new matrix */ 4990 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4991 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4992 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4993 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4994 mat->free_a = PETSC_TRUE; 4995 mat->free_ij = PETSC_TRUE; 4996 mat->nonew = 0; 4997 } else if (scall == MAT_REUSE_MATRIX){ 4998 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4999 ci = mat->i; cj = mat->j; cam = mat->a; 5000 for (i=0; i<am; i++) { 5001 /* off-diagonal portion of A */ 5002 ncols_o = bi[i+1] - bi[i]; 5003 for (jo=0; jo<ncols_o; jo++) { 5004 col = cmap[*bj]; 5005 if (col >= cstart) break; 5006 *cam++ = *ba++; bj++; 5007 } 5008 /* diagonal portion of A */ 5009 ncols_d = ai[i+1] - ai[i]; 5010 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5011 /* off-diagonal portion of A */ 5012 for (j=jo; j<ncols_o; j++) { 5013 *cam++ = *ba++; bj++; 5014 } 5015 } 5016 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5017 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5018 PetscFunctionReturn(0); 5019 } 5020 5021 #undef __FUNCT__ 5022 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5023 /*@C 5024 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5025 5026 Not Collective 5027 5028 Input Parameters: 5029 + A - the matrix 5030 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5031 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5032 5033 Output Parameter: 5034 . A_loc - the local sequential matrix generated 5035 5036 Level: developer 5037 5038 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5039 5040 @*/ 5041 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5042 { 5043 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5044 PetscErrorCode ierr; 5045 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5046 IS isrowa,iscola; 5047 Mat *aloc; 5048 PetscBool match; 5049 5050 PetscFunctionBegin; 5051 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5052 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5053 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5054 if (!row){ 5055 start = A->rmap->rstart; end = A->rmap->rend; 5056 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5057 } else { 5058 isrowa = *row; 5059 } 5060 if (!col){ 5061 start = A->cmap->rstart; 5062 cmap = a->garray; 5063 nzA = a->A->cmap->n; 5064 nzB = a->B->cmap->n; 5065 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5066 ncols = 0; 5067 for (i=0; i<nzB; i++) { 5068 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5069 else break; 5070 } 5071 imark = i; 5072 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5073 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5074 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5075 } else { 5076 iscola = *col; 5077 } 5078 if (scall != MAT_INITIAL_MATRIX){ 5079 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5080 aloc[0] = *A_loc; 5081 } 5082 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5083 *A_loc = aloc[0]; 5084 ierr = PetscFree(aloc);CHKERRQ(ierr); 5085 if (!row){ 5086 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5087 } 5088 if (!col){ 5089 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5090 } 5091 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 #undef __FUNCT__ 5096 #define __FUNCT__ "MatGetBrowsOfAcols" 5097 /*@C 5098 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5099 5100 Collective on Mat 5101 5102 Input Parameters: 5103 + A,B - the matrices in mpiaij format 5104 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5105 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5106 5107 Output Parameter: 5108 + rowb, colb - index sets of rows and columns of B to extract 5109 - B_seq - the sequential matrix generated 5110 5111 Level: developer 5112 5113 @*/ 5114 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5115 { 5116 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5117 PetscErrorCode ierr; 5118 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5119 IS isrowb,iscolb; 5120 Mat *bseq=PETSC_NULL; 5121 5122 PetscFunctionBegin; 5123 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5124 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5125 } 5126 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5127 5128 if (scall == MAT_INITIAL_MATRIX){ 5129 start = A->cmap->rstart; 5130 cmap = a->garray; 5131 nzA = a->A->cmap->n; 5132 nzB = a->B->cmap->n; 5133 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5134 ncols = 0; 5135 for (i=0; i<nzB; i++) { /* row < local row index */ 5136 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5137 else break; 5138 } 5139 imark = i; 5140 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5141 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5142 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5143 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5144 } else { 5145 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5146 isrowb = *rowb; iscolb = *colb; 5147 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5148 bseq[0] = *B_seq; 5149 } 5150 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5151 *B_seq = bseq[0]; 5152 ierr = PetscFree(bseq);CHKERRQ(ierr); 5153 if (!rowb){ 5154 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5155 } else { 5156 *rowb = isrowb; 5157 } 5158 if (!colb){ 5159 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5160 } else { 5161 *colb = iscolb; 5162 } 5163 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5164 PetscFunctionReturn(0); 5165 } 5166 5167 #undef __FUNCT__ 5168 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5169 /* 5170 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5171 of the OFF-DIAGONAL portion of local A 5172 5173 Collective on Mat 5174 5175 Input Parameters: 5176 + A,B - the matrices in mpiaij format 5177 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5178 5179 Output Parameter: 5180 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5181 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5182 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5183 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5184 5185 Level: developer 5186 5187 */ 5188 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5189 { 5190 VecScatter_MPI_General *gen_to,*gen_from; 5191 PetscErrorCode ierr; 5192 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5193 Mat_SeqAIJ *b_oth; 5194 VecScatter ctx=a->Mvctx; 5195 MPI_Comm comm=((PetscObject)ctx)->comm; 5196 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5197 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5198 PetscScalar *rvalues,*svalues; 5199 MatScalar *b_otha,*bufa,*bufA; 5200 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5201 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5202 MPI_Status *sstatus,rstatus; 5203 PetscMPIInt jj; 5204 PetscInt *cols,sbs,rbs; 5205 PetscScalar *vals; 5206 5207 PetscFunctionBegin; 5208 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5209 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5210 } 5211 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5212 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5213 5214 gen_to = (VecScatter_MPI_General*)ctx->todata; 5215 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5216 rvalues = gen_from->values; /* holds the length of receiving row */ 5217 svalues = gen_to->values; /* holds the length of sending row */ 5218 nrecvs = gen_from->n; 5219 nsends = gen_to->n; 5220 5221 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5222 srow = gen_to->indices; /* local row index to be sent */ 5223 sstarts = gen_to->starts; 5224 sprocs = gen_to->procs; 5225 sstatus = gen_to->sstatus; 5226 sbs = gen_to->bs; 5227 rstarts = gen_from->starts; 5228 rprocs = gen_from->procs; 5229 rbs = gen_from->bs; 5230 5231 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5232 if (scall == MAT_INITIAL_MATRIX){ 5233 /* i-array */ 5234 /*---------*/ 5235 /* post receives */ 5236 for (i=0; i<nrecvs; i++){ 5237 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5238 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5239 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5240 } 5241 5242 /* pack the outgoing message */ 5243 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5244 sstartsj[0] = 0; rstartsj[0] = 0; 5245 len = 0; /* total length of j or a array to be sent */ 5246 k = 0; 5247 for (i=0; i<nsends; i++){ 5248 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5249 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5250 for (j=0; j<nrows; j++) { 5251 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5252 for (l=0; l<sbs; l++){ 5253 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5254 rowlen[j*sbs+l] = ncols; 5255 len += ncols; 5256 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5257 } 5258 k++; 5259 } 5260 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5261 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5262 } 5263 /* recvs and sends of i-array are completed */ 5264 i = nrecvs; 5265 while (i--) { 5266 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5267 } 5268 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5269 5270 /* allocate buffers for sending j and a arrays */ 5271 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5272 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5273 5274 /* create i-array of B_oth */ 5275 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5276 b_othi[0] = 0; 5277 len = 0; /* total length of j or a array to be received */ 5278 k = 0; 5279 for (i=0; i<nrecvs; i++){ 5280 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5281 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5282 for (j=0; j<nrows; j++) { 5283 b_othi[k+1] = b_othi[k] + rowlen[j]; 5284 len += rowlen[j]; k++; 5285 } 5286 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5287 } 5288 5289 /* allocate space for j and a arrrays of B_oth */ 5290 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5291 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5292 5293 /* j-array */ 5294 /*---------*/ 5295 /* post receives of j-array */ 5296 for (i=0; i<nrecvs; i++){ 5297 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5298 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5299 } 5300 5301 /* pack the outgoing message j-array */ 5302 k = 0; 5303 for (i=0; i<nsends; i++){ 5304 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5305 bufJ = bufj+sstartsj[i]; 5306 for (j=0; j<nrows; j++) { 5307 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5308 for (ll=0; ll<sbs; ll++){ 5309 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5310 for (l=0; l<ncols; l++){ 5311 *bufJ++ = cols[l]; 5312 } 5313 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5314 } 5315 } 5316 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5317 } 5318 5319 /* recvs and sends of j-array are completed */ 5320 i = nrecvs; 5321 while (i--) { 5322 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5323 } 5324 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5325 } else if (scall == MAT_REUSE_MATRIX){ 5326 sstartsj = *startsj_s; 5327 rstartsj = *startsj_r; 5328 bufa = *bufa_ptr; 5329 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5330 b_otha = b_oth->a; 5331 } else { 5332 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5333 } 5334 5335 /* a-array */ 5336 /*---------*/ 5337 /* post receives of a-array */ 5338 for (i=0; i<nrecvs; i++){ 5339 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5340 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5341 } 5342 5343 /* pack the outgoing message a-array */ 5344 k = 0; 5345 for (i=0; i<nsends; i++){ 5346 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5347 bufA = bufa+sstartsj[i]; 5348 for (j=0; j<nrows; j++) { 5349 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5350 for (ll=0; ll<sbs; ll++){ 5351 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5352 for (l=0; l<ncols; l++){ 5353 *bufA++ = vals[l]; 5354 } 5355 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5356 } 5357 } 5358 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5359 } 5360 /* recvs and sends of a-array are completed */ 5361 i = nrecvs; 5362 while (i--) { 5363 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5364 } 5365 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5366 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5367 5368 if (scall == MAT_INITIAL_MATRIX){ 5369 /* put together the new matrix */ 5370 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5371 5372 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5373 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5374 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5375 b_oth->free_a = PETSC_TRUE; 5376 b_oth->free_ij = PETSC_TRUE; 5377 b_oth->nonew = 0; 5378 5379 ierr = PetscFree(bufj);CHKERRQ(ierr); 5380 if (!startsj_s || !bufa_ptr){ 5381 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5382 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5383 } else { 5384 *startsj_s = sstartsj; 5385 *startsj_r = rstartsj; 5386 *bufa_ptr = bufa; 5387 } 5388 } 5389 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5390 PetscFunctionReturn(0); 5391 } 5392 5393 #undef __FUNCT__ 5394 #define __FUNCT__ "MatGetCommunicationStructs" 5395 /*@C 5396 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5397 5398 Not Collective 5399 5400 Input Parameters: 5401 . A - The matrix in mpiaij format 5402 5403 Output Parameter: 5404 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5405 . colmap - A map from global column index to local index into lvec 5406 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5407 5408 Level: developer 5409 5410 @*/ 5411 #if defined (PETSC_USE_CTABLE) 5412 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5413 #else 5414 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5415 #endif 5416 { 5417 Mat_MPIAIJ *a; 5418 5419 PetscFunctionBegin; 5420 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5421 PetscValidPointer(lvec, 2); 5422 PetscValidPointer(colmap, 3); 5423 PetscValidPointer(multScatter, 4); 5424 a = (Mat_MPIAIJ *) A->data; 5425 if (lvec) *lvec = a->lvec; 5426 if (colmap) *colmap = a->colmap; 5427 if (multScatter) *multScatter = a->Mvctx; 5428 PetscFunctionReturn(0); 5429 } 5430 5431 EXTERN_C_BEGIN 5432 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5433 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5434 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5435 EXTERN_C_END 5436 5437 #undef __FUNCT__ 5438 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5439 /* 5440 Computes (B'*A')' since computing B*A directly is untenable 5441 5442 n p p 5443 ( ) ( ) ( ) 5444 m ( A ) * n ( B ) = m ( C ) 5445 ( ) ( ) ( ) 5446 5447 */ 5448 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5449 { 5450 PetscErrorCode ierr; 5451 Mat At,Bt,Ct; 5452 5453 PetscFunctionBegin; 5454 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5455 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5456 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5457 ierr = MatDestroy(&At);CHKERRQ(ierr); 5458 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5459 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5460 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5461 PetscFunctionReturn(0); 5462 } 5463 5464 #undef __FUNCT__ 5465 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5466 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5467 { 5468 PetscErrorCode ierr; 5469 PetscInt m=A->rmap->n,n=B->cmap->n; 5470 Mat Cmat; 5471 5472 PetscFunctionBegin; 5473 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5474 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5475 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5476 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5477 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5478 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5479 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5480 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5481 *C = Cmat; 5482 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5483 PetscFunctionReturn(0); 5484 } 5485 5486 /* ----------------------------------------------------------------*/ 5487 #undef __FUNCT__ 5488 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5489 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5490 { 5491 PetscErrorCode ierr; 5492 5493 PetscFunctionBegin; 5494 if (scall == MAT_INITIAL_MATRIX){ 5495 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5496 } 5497 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5498 PetscFunctionReturn(0); 5499 } 5500 5501 EXTERN_C_BEGIN 5502 #if defined(PETSC_HAVE_MUMPS) 5503 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5504 #endif 5505 #if defined(PETSC_HAVE_PASTIX) 5506 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5507 #endif 5508 #if defined(PETSC_HAVE_SUPERLU_DIST) 5509 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5510 #endif 5511 #if defined(PETSC_HAVE_SPOOLES) 5512 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5513 #endif 5514 EXTERN_C_END 5515 5516 /*MC 5517 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5518 5519 Options Database Keys: 5520 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5521 5522 Level: beginner 5523 5524 .seealso: MatCreateAIJ() 5525 M*/ 5526 5527 EXTERN_C_BEGIN 5528 #undef __FUNCT__ 5529 #define __FUNCT__ "MatCreate_MPIAIJ" 5530 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5531 { 5532 Mat_MPIAIJ *b; 5533 PetscErrorCode ierr; 5534 PetscMPIInt size; 5535 5536 PetscFunctionBegin; 5537 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5538 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5539 B->data = (void*)b; 5540 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5541 B->assembled = PETSC_FALSE; 5542 B->insertmode = NOT_SET_VALUES; 5543 b->size = size; 5544 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5545 5546 /* build cache for off array entries formed */ 5547 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5548 b->donotstash = PETSC_FALSE; 5549 b->colmap = 0; 5550 b->garray = 0; 5551 b->roworiented = PETSC_TRUE; 5552 5553 /* stuff used for matrix vector multiply */ 5554 b->lvec = PETSC_NULL; 5555 b->Mvctx = PETSC_NULL; 5556 5557 /* stuff for MatGetRow() */ 5558 b->rowindices = 0; 5559 b->rowvalues = 0; 5560 b->getrowactive = PETSC_FALSE; 5561 5562 /* flexible pointer used in CUSP/CUSPARSE classes */ 5563 b->spptr = PETSC_NULL; 5564 #if defined(PETSC_HAVE_SPOOLES) 5565 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5566 "MatGetFactor_mpiaij_spooles", 5567 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5568 #endif 5569 #if defined(PETSC_HAVE_MUMPS) 5570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5571 "MatGetFactor_aij_mumps", 5572 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5573 #endif 5574 #if defined(PETSC_HAVE_PASTIX) 5575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5576 "MatGetFactor_mpiaij_pastix", 5577 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5578 #endif 5579 #if defined(PETSC_HAVE_SUPERLU_DIST) 5580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5581 "MatGetFactor_mpiaij_superlu_dist", 5582 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5583 #endif 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5585 "MatStoreValues_MPIAIJ", 5586 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5588 "MatRetrieveValues_MPIAIJ", 5589 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5591 "MatGetDiagonalBlock_MPIAIJ", 5592 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5594 "MatIsTranspose_MPIAIJ", 5595 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5597 "MatMPIAIJSetPreallocation_MPIAIJ", 5598 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5600 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5601 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5603 "MatDiagonalScaleLocal_MPIAIJ", 5604 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5606 "MatConvert_MPIAIJ_MPIAIJPERM", 5607 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5609 "MatConvert_MPIAIJ_MPIAIJCRL", 5610 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5612 "MatConvert_MPIAIJ_MPISBAIJ", 5613 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5615 "MatMatMult_MPIDense_MPIAIJ", 5616 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5618 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5619 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5621 "MatMatMultNumeric_MPIDense_MPIAIJ", 5622 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5624 PetscFunctionReturn(0); 5625 } 5626 EXTERN_C_END 5627 5628 #undef __FUNCT__ 5629 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5630 /*@ 5631 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5632 and "off-diagonal" part of the matrix in CSR format. 5633 5634 Collective on MPI_Comm 5635 5636 Input Parameters: 5637 + comm - MPI communicator 5638 . m - number of local rows (Cannot be PETSC_DECIDE) 5639 . n - This value should be the same as the local size used in creating the 5640 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5641 calculated if N is given) For square matrices n is almost always m. 5642 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5643 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5644 . i - row indices for "diagonal" portion of matrix 5645 . j - column indices 5646 . a - matrix values 5647 . oi - row indices for "off-diagonal" portion of matrix 5648 . oj - column indices 5649 - oa - matrix values 5650 5651 Output Parameter: 5652 . mat - the matrix 5653 5654 Level: advanced 5655 5656 Notes: 5657 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5658 must free the arrays once the matrix has been destroyed and not before. 5659 5660 The i and j indices are 0 based 5661 5662 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5663 5664 This sets local rows and cannot be used to set off-processor values. 5665 5666 You cannot later use MatSetValues() to change values in this matrix. 5667 5668 .keywords: matrix, aij, compressed row, sparse, parallel 5669 5670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5671 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5672 @*/ 5673 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5674 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5675 { 5676 PetscErrorCode ierr; 5677 Mat_MPIAIJ *maij; 5678 5679 PetscFunctionBegin; 5680 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5681 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5682 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5683 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5684 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5685 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5686 maij = (Mat_MPIAIJ*) (*mat)->data; 5687 maij->donotstash = PETSC_TRUE; 5688 (*mat)->preallocated = PETSC_TRUE; 5689 5690 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5691 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5692 5693 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5694 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5695 5696 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 5701 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 PetscFunctionReturn(0); 5704 } 5705 5706 /* 5707 Special version for direct calls from Fortran 5708 */ 5709 #include <petsc-private/fortranimpl.h> 5710 5711 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5712 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5713 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5714 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5715 #endif 5716 5717 /* Change these macros so can be used in void function */ 5718 #undef CHKERRQ 5719 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5720 #undef SETERRQ2 5721 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5722 #undef SETERRQ3 5723 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5724 #undef SETERRQ 5725 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5726 5727 EXTERN_C_BEGIN 5728 #undef __FUNCT__ 5729 #define __FUNCT__ "matsetvaluesmpiaij_" 5730 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5731 { 5732 Mat mat = *mmat; 5733 PetscInt m = *mm, n = *mn; 5734 InsertMode addv = *maddv; 5735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5736 PetscScalar value; 5737 PetscErrorCode ierr; 5738 5739 MatCheckPreallocated(mat,1); 5740 if (mat->insertmode == NOT_SET_VALUES) { 5741 mat->insertmode = addv; 5742 } 5743 #if defined(PETSC_USE_DEBUG) 5744 else if (mat->insertmode != addv) { 5745 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5746 } 5747 #endif 5748 { 5749 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5750 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5751 PetscBool roworiented = aij->roworiented; 5752 5753 /* Some Variables required in the macro */ 5754 Mat A = aij->A; 5755 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5756 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5757 MatScalar *aa = a->a; 5758 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5759 Mat B = aij->B; 5760 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5761 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5762 MatScalar *ba = b->a; 5763 5764 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5765 PetscInt nonew = a->nonew; 5766 MatScalar *ap1,*ap2; 5767 5768 PetscFunctionBegin; 5769 for (i=0; i<m; i++) { 5770 if (im[i] < 0) continue; 5771 #if defined(PETSC_USE_DEBUG) 5772 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5773 #endif 5774 if (im[i] >= rstart && im[i] < rend) { 5775 row = im[i] - rstart; 5776 lastcol1 = -1; 5777 rp1 = aj + ai[row]; 5778 ap1 = aa + ai[row]; 5779 rmax1 = aimax[row]; 5780 nrow1 = ailen[row]; 5781 low1 = 0; 5782 high1 = nrow1; 5783 lastcol2 = -1; 5784 rp2 = bj + bi[row]; 5785 ap2 = ba + bi[row]; 5786 rmax2 = bimax[row]; 5787 nrow2 = bilen[row]; 5788 low2 = 0; 5789 high2 = nrow2; 5790 5791 for (j=0; j<n; j++) { 5792 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5793 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5794 if (in[j] >= cstart && in[j] < cend){ 5795 col = in[j] - cstart; 5796 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5797 } else if (in[j] < 0) continue; 5798 #if defined(PETSC_USE_DEBUG) 5799 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5800 #endif 5801 else { 5802 if (mat->was_assembled) { 5803 if (!aij->colmap) { 5804 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5805 } 5806 #if defined (PETSC_USE_CTABLE) 5807 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5808 col--; 5809 #else 5810 col = aij->colmap[in[j]] - 1; 5811 #endif 5812 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5813 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5814 col = in[j]; 5815 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5816 B = aij->B; 5817 b = (Mat_SeqAIJ*)B->data; 5818 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5819 rp2 = bj + bi[row]; 5820 ap2 = ba + bi[row]; 5821 rmax2 = bimax[row]; 5822 nrow2 = bilen[row]; 5823 low2 = 0; 5824 high2 = nrow2; 5825 bm = aij->B->rmap->n; 5826 ba = b->a; 5827 } 5828 } else col = in[j]; 5829 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5830 } 5831 } 5832 } else { 5833 if (!aij->donotstash) { 5834 if (roworiented) { 5835 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5836 } else { 5837 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5838 } 5839 } 5840 } 5841 }} 5842 PetscFunctionReturnVoid(); 5843 } 5844 EXTERN_C_END 5845 5846