1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 #if defined(PETSC_HAVE_DSCPACK) 163 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 164 #endif 165 166 #define CHUNKSIZE 10 167 168 /* 169 Checks for missing diagonals 170 */ 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 173 int MatMissingDiagonal_SeqBAIJ(Mat A) 174 { 175 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 176 int *diag,*jj = a->j,i,ierr; 177 178 PetscFunctionBegin; 179 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 180 diag = a->diag; 181 for (i=0; i<a->mbs; i++) { 182 if (jj[diag[i]] != i) { 183 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 184 } 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 191 int MatMarkDiagonal_SeqBAIJ(Mat A) 192 { 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 194 int i,j,*diag,m = a->mbs,ierr; 195 196 PetscFunctionBegin; 197 if (a->diag) PetscFunctionReturn(0); 198 199 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 200 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 201 for (i=0; i<m; i++) { 202 diag[i] = a->i[i+1]; 203 for (j=a->i[i]; j<a->i[i+1]; j++) { 204 if (a->j[j] == i) { 205 diag[i] = j; 206 break; 207 } 208 } 209 } 210 a->diag = diag; 211 PetscFunctionReturn(0); 212 } 213 214 215 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 219 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 220 { 221 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 222 int ierr,n = a->mbs,i; 223 224 PetscFunctionBegin; 225 *nn = n; 226 if (!ia) PetscFunctionReturn(0); 227 if (symmetric) { 228 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 229 } else if (oshift == 1) { 230 /* temporarily add 1 to i and j indices */ 231 int nz = a->i[n]; 232 for (i=0; i<nz; i++) a->j[i]++; 233 for (i=0; i<n+1; i++) a->i[i]++; 234 *ia = a->i; *ja = a->j; 235 } else { 236 *ia = a->i; *ja = a->j; 237 } 238 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 244 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 245 { 246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 247 int i,n = a->mbs,ierr; 248 249 PetscFunctionBegin; 250 if (!ia) PetscFunctionReturn(0); 251 if (symmetric) { 252 ierr = PetscFree(*ia);CHKERRQ(ierr); 253 ierr = PetscFree(*ja);CHKERRQ(ierr); 254 } else if (oshift == 1) { 255 int nz = a->i[n]-1; 256 for (i=0; i<nz; i++) a->j[i]--; 257 for (i=0; i<n+1; i++) a->i[i]--; 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 264 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 265 { 266 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 267 268 PetscFunctionBegin; 269 *bs = baij->bs; 270 PetscFunctionReturn(0); 271 } 272 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDestroy_SeqBAIJ" 276 int MatDestroy_SeqBAIJ(Mat A) 277 { 278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 279 int ierr; 280 281 PetscFunctionBegin; 282 #if defined(PETSC_USE_LOG) 283 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 284 #endif 285 ierr = PetscFree(a->a);CHKERRQ(ierr); 286 if (!a->singlemalloc) { 287 ierr = PetscFree(a->i);CHKERRQ(ierr); 288 ierr = PetscFree(a->j);CHKERRQ(ierr); 289 } 290 if (a->row) { 291 ierr = ISDestroy(a->row);CHKERRQ(ierr); 292 } 293 if (a->col) { 294 ierr = ISDestroy(a->col);CHKERRQ(ierr); 295 } 296 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 297 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 298 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 299 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 300 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 301 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 302 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 303 #if defined(PETSC_USE_MAT_SINGLE) 304 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 305 #endif 306 ierr = PetscFree(a);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatSetOption_SeqBAIJ" 312 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 313 { 314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 315 316 PetscFunctionBegin; 317 switch (op) { 318 case MAT_ROW_ORIENTED: 319 a->roworiented = PETSC_TRUE; 320 break; 321 case MAT_COLUMN_ORIENTED: 322 a->roworiented = PETSC_FALSE; 323 break; 324 case MAT_COLUMNS_SORTED: 325 a->sorted = PETSC_TRUE; 326 break; 327 case MAT_COLUMNS_UNSORTED: 328 a->sorted = PETSC_FALSE; 329 break; 330 case MAT_KEEP_ZEROED_ROWS: 331 a->keepzeroedrows = PETSC_TRUE; 332 break; 333 case MAT_NO_NEW_NONZERO_LOCATIONS: 334 a->nonew = 1; 335 break; 336 case MAT_NEW_NONZERO_LOCATION_ERR: 337 a->nonew = -1; 338 break; 339 case MAT_NEW_NONZERO_ALLOCATION_ERR: 340 a->nonew = -2; 341 break; 342 case MAT_YES_NEW_NONZERO_LOCATIONS: 343 a->nonew = 0; 344 break; 345 case MAT_ROWS_SORTED: 346 case MAT_ROWS_UNSORTED: 347 case MAT_YES_NEW_DIAGONALS: 348 case MAT_IGNORE_OFF_PROC_ENTRIES: 349 case MAT_USE_HASH_TABLE: 350 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 351 break; 352 case MAT_NO_NEW_DIAGONALS: 353 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 354 default: 355 SETERRQ(PETSC_ERR_SUP,"unknown option"); 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetRow_SeqBAIJ" 362 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 363 { 364 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 365 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 366 MatScalar *aa,*aa_i; 367 PetscScalar *v_i; 368 369 PetscFunctionBegin; 370 bs = a->bs; 371 ai = a->i; 372 aj = a->j; 373 aa = a->a; 374 bs2 = a->bs2; 375 376 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 377 378 bn = row/bs; /* Block number */ 379 bp = row % bs; /* Block Position */ 380 M = ai[bn+1] - ai[bn]; 381 *nz = bs*M; 382 383 if (v) { 384 *v = 0; 385 if (*nz) { 386 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 387 for (i=0; i<M; i++) { /* for each block in the block row */ 388 v_i = *v + i*bs; 389 aa_i = aa + bs2*(ai[bn] + i); 390 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 391 } 392 } 393 } 394 395 if (idx) { 396 *idx = 0; 397 if (*nz) { 398 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 399 for (i=0; i<M; i++) { /* for each block in the block row */ 400 idx_i = *idx + i*bs; 401 itmp = bs*aj[ai[bn] + i]; 402 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 403 } 404 } 405 } 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 411 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 412 { 413 int ierr; 414 415 PetscFunctionBegin; 416 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 417 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatTranspose_SeqBAIJ" 423 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 424 { 425 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 426 Mat C; 427 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 428 int *rows,*cols,bs2=a->bs2; 429 PetscScalar *array; 430 431 PetscFunctionBegin; 432 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 433 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 434 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 435 436 #if defined(PETSC_USE_MAT_SINGLE) 437 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 438 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 439 #else 440 array = a->a; 441 #endif 442 443 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 444 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 445 ierr = PetscFree(col);CHKERRQ(ierr); 446 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 447 cols = rows + bs; 448 for (i=0; i<mbs; i++) { 449 cols[0] = i*bs; 450 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 451 len = ai[i+1] - ai[i]; 452 for (j=0; j<len; j++) { 453 rows[0] = (*aj++)*bs; 454 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 455 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 456 array += bs2; 457 } 458 } 459 ierr = PetscFree(rows);CHKERRQ(ierr); 460 #if defined(PETSC_USE_MAT_SINGLE) 461 ierr = PetscFree(array); 462 #endif 463 464 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 465 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 467 if (B) { 468 *B = C; 469 } else { 470 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 471 } 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 477 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 478 { 479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 480 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 481 PetscScalar *aa; 482 FILE *file; 483 484 PetscFunctionBegin; 485 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 486 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 487 col_lens[0] = MAT_FILE_COOKIE; 488 489 col_lens[1] = A->m; 490 col_lens[2] = A->n; 491 col_lens[3] = a->nz*bs2; 492 493 /* store lengths of each row and write (including header) to file */ 494 count = 0; 495 for (i=0; i<a->mbs; i++) { 496 for (j=0; j<bs; j++) { 497 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 498 } 499 } 500 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 501 ierr = PetscFree(col_lens);CHKERRQ(ierr); 502 503 /* store column indices (zero start index) */ 504 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 505 count = 0; 506 for (i=0; i<a->mbs; i++) { 507 for (j=0; j<bs; j++) { 508 for (k=a->i[i]; k<a->i[i+1]; k++) { 509 for (l=0; l<bs; l++) { 510 jj[count++] = bs*a->j[k] + l; 511 } 512 } 513 } 514 } 515 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 516 ierr = PetscFree(jj);CHKERRQ(ierr); 517 518 /* store nonzero values */ 519 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 520 count = 0; 521 for (i=0; i<a->mbs; i++) { 522 for (j=0; j<bs; j++) { 523 for (k=a->i[i]; k<a->i[i+1]; k++) { 524 for (l=0; l<bs; l++) { 525 aa[count++] = a->a[bs2*k + l*bs + j]; 526 } 527 } 528 } 529 } 530 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 531 ierr = PetscFree(aa);CHKERRQ(ierr); 532 533 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 534 if (file) { 535 fprintf(file,"-matload_block_size %d\n",a->bs); 536 } 537 PetscFunctionReturn(0); 538 } 539 540 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 544 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 545 { 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 547 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 548 PetscViewerFormat format; 549 550 PetscFunctionBegin; 551 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 552 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 553 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 554 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 555 Mat aij; 556 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 557 ierr = MatView(aij,viewer);CHKERRQ(ierr); 558 ierr = MatDestroy(aij);CHKERRQ(ierr); 559 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 560 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 561 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 562 #endif 563 PetscFunctionReturn(0); 564 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 565 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 566 for (i=0; i<a->mbs; i++) { 567 for (j=0; j<bs; j++) { 568 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 569 for (k=a->i[i]; k<a->i[i+1]; k++) { 570 for (l=0; l<bs; l++) { 571 #if defined(PETSC_USE_COMPLEX) 572 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 573 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 574 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 575 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 576 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 577 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 578 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 579 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 580 } 581 #else 582 if (a->a[bs2*k + l*bs + j] != 0.0) { 583 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 584 } 585 #endif 586 } 587 } 588 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 589 } 590 } 591 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 592 } else { 593 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 594 for (i=0; i<a->mbs; i++) { 595 for (j=0; j<bs; j++) { 596 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 597 for (k=a->i[i]; k<a->i[i+1]; k++) { 598 for (l=0; l<bs; l++) { 599 #if defined(PETSC_USE_COMPLEX) 600 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 601 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 602 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 603 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 605 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 606 } else { 607 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 608 } 609 #else 610 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 611 #endif 612 } 613 } 614 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 615 } 616 } 617 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 618 } 619 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 625 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 626 { 627 Mat A = (Mat) Aa; 628 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 629 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 630 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 631 MatScalar *aa; 632 PetscViewer viewer; 633 634 PetscFunctionBegin; 635 636 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 637 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 638 639 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 640 641 /* loop over matrix elements drawing boxes */ 642 color = PETSC_DRAW_BLUE; 643 for (i=0,row=0; i<mbs; i++,row+=bs) { 644 for (j=a->i[i]; j<a->i[i+1]; j++) { 645 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 646 x_l = a->j[j]*bs; x_r = x_l + 1.0; 647 aa = a->a + j*bs2; 648 for (k=0; k<bs; k++) { 649 for (l=0; l<bs; l++) { 650 if (PetscRealPart(*aa++) >= 0.) continue; 651 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 652 } 653 } 654 } 655 } 656 color = PETSC_DRAW_CYAN; 657 for (i=0,row=0; i<mbs; i++,row+=bs) { 658 for (j=a->i[i]; j<a->i[i+1]; j++) { 659 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 660 x_l = a->j[j]*bs; x_r = x_l + 1.0; 661 aa = a->a + j*bs2; 662 for (k=0; k<bs; k++) { 663 for (l=0; l<bs; l++) { 664 if (PetscRealPart(*aa++) != 0.) continue; 665 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 666 } 667 } 668 } 669 } 670 671 color = PETSC_DRAW_RED; 672 for (i=0,row=0; i<mbs; i++,row+=bs) { 673 for (j=a->i[i]; j<a->i[i+1]; j++) { 674 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 675 x_l = a->j[j]*bs; x_r = x_l + 1.0; 676 aa = a->a + j*bs2; 677 for (k=0; k<bs; k++) { 678 for (l=0; l<bs; l++) { 679 if (PetscRealPart(*aa++) <= 0.) continue; 680 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 681 } 682 } 683 } 684 } 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 690 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 691 { 692 int ierr; 693 PetscReal xl,yl,xr,yr,w,h; 694 PetscDraw draw; 695 PetscTruth isnull; 696 697 PetscFunctionBegin; 698 699 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 700 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 701 702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 703 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 704 xr += w; yr += h; xl = -w; yl = -h; 705 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 706 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 707 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatView_SeqBAIJ" 713 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 714 { 715 int ierr; 716 PetscTruth issocket,isascii,isbinary,isdraw; 717 718 PetscFunctionBegin; 719 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 723 if (issocket) { 724 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 725 } else if (isascii){ 726 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 727 } else if (isbinary) { 728 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 729 } else if (isdraw) { 730 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 731 } else { 732 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 733 } 734 PetscFunctionReturn(0); 735 } 736 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatGetValues_SeqBAIJ" 740 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 741 { 742 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 743 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 744 int *ai = a->i,*ailen = a->ilen; 745 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 746 MatScalar *ap,*aa = a->a,zero = 0.0; 747 748 PetscFunctionBegin; 749 for (k=0; k<m; k++) { /* loop over rows */ 750 row = im[k]; brow = row/bs; 751 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 752 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 753 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 754 nrow = ailen[brow]; 755 for (l=0; l<n; l++) { /* loop over columns */ 756 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 757 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 758 col = in[l] ; 759 bcol = col/bs; 760 cidx = col%bs; 761 ridx = row%bs; 762 high = nrow; 763 low = 0; /* assume unsorted */ 764 while (high-low > 5) { 765 t = (low+high)/2; 766 if (rp[t] > bcol) high = t; 767 else low = t; 768 } 769 for (i=low; i<high; i++) { 770 if (rp[i] > bcol) break; 771 if (rp[i] == bcol) { 772 *v++ = ap[bs2*i+bs*cidx+ridx]; 773 goto finished; 774 } 775 } 776 *v++ = zero; 777 finished:; 778 } 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #if defined(PETSC_USE_MAT_SINGLE) 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 786 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 787 { 788 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 789 int ierr,i,N = m*n*b->bs2; 790 MatScalar *vsingle; 791 792 PetscFunctionBegin; 793 if (N > b->setvalueslen) { 794 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 795 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 796 b->setvalueslen = N; 797 } 798 vsingle = b->setvaluescopy; 799 for (i=0; i<N; i++) { 800 vsingle[i] = v[i]; 801 } 802 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } 805 #endif 806 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 810 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 814 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 815 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 816 PetscTruth roworiented=a->roworiented; 817 MatScalar *value = v,*ap,*aa = a->a,*bap; 818 819 PetscFunctionBegin; 820 if (roworiented) { 821 stepval = (n-1)*bs; 822 } else { 823 stepval = (m-1)*bs; 824 } 825 for (k=0; k<m; k++) { /* loop over added rows */ 826 row = im[k]; 827 if (row < 0) continue; 828 #if defined(PETSC_USE_BOPT_g) 829 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 830 #endif 831 rp = aj + ai[row]; 832 ap = aa + bs2*ai[row]; 833 rmax = imax[row]; 834 nrow = ailen[row]; 835 low = 0; 836 for (l=0; l<n; l++) { /* loop over added columns */ 837 if (in[l] < 0) continue; 838 #if defined(PETSC_USE_BOPT_g) 839 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 840 #endif 841 col = in[l]; 842 if (roworiented) { 843 value = v + k*(stepval+bs)*bs + l*bs; 844 } else { 845 value = v + l*(stepval+bs)*bs + k*bs; 846 } 847 if (!sorted) low = 0; high = nrow; 848 while (high-low > 7) { 849 t = (low+high)/2; 850 if (rp[t] > col) high = t; 851 else low = t; 852 } 853 for (i=low; i<high; i++) { 854 if (rp[i] > col) break; 855 if (rp[i] == col) { 856 bap = ap + bs2*i; 857 if (roworiented) { 858 if (is == ADD_VALUES) { 859 for (ii=0; ii<bs; ii++,value+=stepval) { 860 for (jj=ii; jj<bs2; jj+=bs) { 861 bap[jj] += *value++; 862 } 863 } 864 } else { 865 for (ii=0; ii<bs; ii++,value+=stepval) { 866 for (jj=ii; jj<bs2; jj+=bs) { 867 bap[jj] = *value++; 868 } 869 } 870 } 871 } else { 872 if (is == ADD_VALUES) { 873 for (ii=0; ii<bs; ii++,value+=stepval) { 874 for (jj=0; jj<bs; jj++) { 875 *bap++ += *value++; 876 } 877 } 878 } else { 879 for (ii=0; ii<bs; ii++,value+=stepval) { 880 for (jj=0; jj<bs; jj++) { 881 *bap++ = *value++; 882 } 883 } 884 } 885 } 886 goto noinsert2; 887 } 888 } 889 if (nonew == 1) goto noinsert2; 890 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 891 if (nrow >= rmax) { 892 /* there is no extra room in row, therefore enlarge */ 893 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 894 MatScalar *new_a; 895 896 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 897 898 /* malloc new storage space */ 899 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 900 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 901 new_j = (int*)(new_a + bs2*new_nz); 902 new_i = new_j + new_nz; 903 904 /* copy over old data into new slots */ 905 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 906 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 907 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 908 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 909 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 910 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 911 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 912 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 913 /* free up old matrix storage */ 914 ierr = PetscFree(a->a);CHKERRQ(ierr); 915 if (!a->singlemalloc) { 916 ierr = PetscFree(a->i);CHKERRQ(ierr); 917 ierr = PetscFree(a->j);CHKERRQ(ierr); 918 } 919 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 920 a->singlemalloc = PETSC_TRUE; 921 922 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 923 rmax = imax[row] = imax[row] + CHUNKSIZE; 924 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 925 a->maxnz += bs2*CHUNKSIZE; 926 a->reallocs++; 927 a->nz++; 928 } 929 N = nrow++ - 1; 930 /* shift up all the later entries in this row */ 931 for (ii=N; ii>=i; ii--) { 932 rp[ii+1] = rp[ii]; 933 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 934 } 935 if (N >= i) { 936 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 937 } 938 rp[i] = col; 939 bap = ap + bs2*i; 940 if (roworiented) { 941 for (ii=0; ii<bs; ii++,value+=stepval) { 942 for (jj=ii; jj<bs2; jj+=bs) { 943 bap[jj] = *value++; 944 } 945 } 946 } else { 947 for (ii=0; ii<bs; ii++,value+=stepval) { 948 for (jj=0; jj<bs; jj++) { 949 *bap++ = *value++; 950 } 951 } 952 } 953 noinsert2:; 954 low = i; 955 } 956 ailen[row] = nrow; 957 } 958 PetscFunctionReturn(0); 959 } 960 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 963 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 964 { 965 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 966 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 967 int m = A->m,*ip,N,*ailen = a->ilen; 968 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 969 MatScalar *aa = a->a,*ap; 970 #if defined(PETSC_HAVE_DSCPACK) 971 PetscTruth flag; 972 #endif 973 974 PetscFunctionBegin; 975 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 976 977 if (m) rmax = ailen[0]; 978 for (i=1; i<mbs; i++) { 979 /* move each row back by the amount of empty slots (fshift) before it*/ 980 fshift += imax[i-1] - ailen[i-1]; 981 rmax = PetscMax(rmax,ailen[i]); 982 if (fshift) { 983 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 984 N = ailen[i]; 985 for (j=0; j<N; j++) { 986 ip[j-fshift] = ip[j]; 987 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 988 } 989 } 990 ai[i] = ai[i-1] + ailen[i-1]; 991 } 992 if (mbs) { 993 fshift += imax[mbs-1] - ailen[mbs-1]; 994 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 995 } 996 /* reset ilen and imax for each row */ 997 for (i=0; i<mbs; i++) { 998 ailen[i] = imax[i] = ai[i+1] - ai[i]; 999 } 1000 a->nz = ai[mbs]; 1001 1002 /* diagonals may have moved, so kill the diagonal pointers */ 1003 if (fshift && a->diag) { 1004 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1005 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1006 a->diag = 0; 1007 } 1008 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1009 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1010 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1011 a->reallocs = 0; 1012 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1013 1014 #if defined(PETSC_HAVE_DSCPACK) 1015 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1016 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1017 #endif 1018 1019 PetscFunctionReturn(0); 1020 } 1021 1022 1023 1024 /* 1025 This function returns an array of flags which indicate the locations of contiguous 1026 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1027 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1028 Assume: sizes should be long enough to hold all the values. 1029 */ 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1032 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1033 { 1034 int i,j,k,row; 1035 PetscTruth flg; 1036 1037 PetscFunctionBegin; 1038 for (i=0,j=0; i<n; j++) { 1039 row = idx[i]; 1040 if (row%bs!=0) { /* Not the begining of a block */ 1041 sizes[j] = 1; 1042 i++; 1043 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1044 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1045 i++; 1046 } else { /* Begining of the block, so check if the complete block exists */ 1047 flg = PETSC_TRUE; 1048 for (k=1; k<bs; k++) { 1049 if (row+k != idx[i+k]) { /* break in the block */ 1050 flg = PETSC_FALSE; 1051 break; 1052 } 1053 } 1054 if (flg == PETSC_TRUE) { /* No break in the bs */ 1055 sizes[j] = bs; 1056 i+= bs; 1057 } else { 1058 sizes[j] = 1; 1059 i++; 1060 } 1061 } 1062 } 1063 *bs_max = j; 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1069 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1070 { 1071 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1072 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1073 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1074 PetscScalar zero = 0.0; 1075 MatScalar *aa; 1076 1077 PetscFunctionBegin; 1078 /* Make a copy of the IS and sort it */ 1079 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1080 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1081 1082 /* allocate memory for rows,sizes */ 1083 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1084 sizes = rows + is_n; 1085 1086 /* copy IS values to rows, and sort them */ 1087 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1088 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1089 if (baij->keepzeroedrows) { 1090 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1091 bs_max = is_n; 1092 } else { 1093 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1094 } 1095 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1096 1097 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1098 row = rows[j]; 1099 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1100 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1101 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1102 if (sizes[i] == bs && !baij->keepzeroedrows) { 1103 if (diag) { 1104 if (baij->ilen[row/bs] > 0) { 1105 baij->ilen[row/bs] = 1; 1106 baij->j[baij->i[row/bs]] = row/bs; 1107 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1108 } 1109 /* Now insert all the diagonal values for this bs */ 1110 for (k=0; k<bs; k++) { 1111 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1112 } 1113 } else { /* (!diag) */ 1114 baij->ilen[row/bs] = 0; 1115 } /* end (!diag) */ 1116 } else { /* (sizes[i] != bs) */ 1117 #if defined (PETSC_USE_DEBUG) 1118 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1119 #endif 1120 for (k=0; k<count; k++) { 1121 aa[0] = zero; 1122 aa += bs; 1123 } 1124 if (diag) { 1125 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1126 } 1127 } 1128 } 1129 1130 ierr = PetscFree(rows);CHKERRQ(ierr); 1131 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1137 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1138 { 1139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1140 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1141 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1142 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1143 int ridx,cidx,bs2=a->bs2,ierr; 1144 PetscTruth roworiented=a->roworiented; 1145 MatScalar *ap,value,*aa=a->a,*bap; 1146 1147 PetscFunctionBegin; 1148 for (k=0; k<m; k++) { /* loop over added rows */ 1149 row = im[k]; brow = row/bs; 1150 if (row < 0) continue; 1151 #if defined(PETSC_USE_BOPT_g) 1152 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1153 #endif 1154 rp = aj + ai[brow]; 1155 ap = aa + bs2*ai[brow]; 1156 rmax = imax[brow]; 1157 nrow = ailen[brow]; 1158 low = 0; 1159 for (l=0; l<n; l++) { /* loop over added columns */ 1160 if (in[l] < 0) continue; 1161 #if defined(PETSC_USE_BOPT_g) 1162 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1163 #endif 1164 col = in[l]; bcol = col/bs; 1165 ridx = row % bs; cidx = col % bs; 1166 if (roworiented) { 1167 value = v[l + k*n]; 1168 } else { 1169 value = v[k + l*m]; 1170 } 1171 if (!sorted) low = 0; high = nrow; 1172 while (high-low > 7) { 1173 t = (low+high)/2; 1174 if (rp[t] > bcol) high = t; 1175 else low = t; 1176 } 1177 for (i=low; i<high; i++) { 1178 if (rp[i] > bcol) break; 1179 if (rp[i] == bcol) { 1180 bap = ap + bs2*i + bs*cidx + ridx; 1181 if (is == ADD_VALUES) *bap += value; 1182 else *bap = value; 1183 goto noinsert1; 1184 } 1185 } 1186 if (nonew == 1) goto noinsert1; 1187 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1188 if (nrow >= rmax) { 1189 /* there is no extra room in row, therefore enlarge */ 1190 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1191 MatScalar *new_a; 1192 1193 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1194 1195 /* Malloc new storage space */ 1196 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1197 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1198 new_j = (int*)(new_a + bs2*new_nz); 1199 new_i = new_j + new_nz; 1200 1201 /* copy over old data into new slots */ 1202 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1203 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1204 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1205 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1206 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1207 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1208 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1209 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1210 /* free up old matrix storage */ 1211 ierr = PetscFree(a->a);CHKERRQ(ierr); 1212 if (!a->singlemalloc) { 1213 ierr = PetscFree(a->i);CHKERRQ(ierr); 1214 ierr = PetscFree(a->j);CHKERRQ(ierr); 1215 } 1216 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1217 a->singlemalloc = PETSC_TRUE; 1218 1219 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1220 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1221 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1222 a->maxnz += bs2*CHUNKSIZE; 1223 a->reallocs++; 1224 a->nz++; 1225 } 1226 N = nrow++ - 1; 1227 /* shift up all the later entries in this row */ 1228 for (ii=N; ii>=i; ii--) { 1229 rp[ii+1] = rp[ii]; 1230 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1231 } 1232 if (N>=i) { 1233 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1234 } 1235 rp[i] = bcol; 1236 ap[bs2*i + bs*cidx + ridx] = value; 1237 noinsert1:; 1238 low = i; 1239 } 1240 ailen[brow] = nrow; 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1248 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1249 { 1250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1251 Mat outA; 1252 int ierr; 1253 PetscTruth row_identity,col_identity; 1254 1255 PetscFunctionBegin; 1256 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1257 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1258 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1259 if (!row_identity || !col_identity) { 1260 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1261 } 1262 1263 outA = inA; 1264 inA->factor = FACTOR_LU; 1265 1266 if (!a->diag) { 1267 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1268 } 1269 1270 a->row = row; 1271 a->col = col; 1272 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1273 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1274 1275 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1276 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1277 PetscLogObjectParent(inA,a->icol); 1278 1279 /* 1280 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1281 for ILU(0) factorization with natural ordering 1282 */ 1283 if (a->bs < 8) { 1284 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1285 } else { 1286 if (!a->solve_work) { 1287 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1288 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1289 } 1290 } 1291 1292 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1293 1294 PetscFunctionReturn(0); 1295 } 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1298 int MatPrintHelp_SeqBAIJ(Mat A) 1299 { 1300 static PetscTruth called = PETSC_FALSE; 1301 MPI_Comm comm = A->comm; 1302 int ierr; 1303 1304 PetscFunctionBegin; 1305 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1306 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1307 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 EXTERN_C_BEGIN 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1314 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1315 { 1316 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1317 int i,nz,nbs; 1318 1319 PetscFunctionBegin; 1320 nz = baij->maxnz/baij->bs2; 1321 nbs = baij->nbs; 1322 for (i=0; i<nz; i++) { 1323 baij->j[i] = indices[i]; 1324 } 1325 baij->nz = nz; 1326 for (i=0; i<nbs; i++) { 1327 baij->ilen[i] = baij->imax[i]; 1328 } 1329 1330 PetscFunctionReturn(0); 1331 } 1332 EXTERN_C_END 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1336 /*@ 1337 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1338 in the matrix. 1339 1340 Input Parameters: 1341 + mat - the SeqBAIJ matrix 1342 - indices - the column indices 1343 1344 Level: advanced 1345 1346 Notes: 1347 This can be called if you have precomputed the nonzero structure of the 1348 matrix and want to provide it to the matrix object to improve the performance 1349 of the MatSetValues() operation. 1350 1351 You MUST have set the correct numbers of nonzeros per row in the call to 1352 MatCreateSeqBAIJ(). 1353 1354 MUST be called before any calls to MatSetValues(); 1355 1356 @*/ 1357 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1358 { 1359 int ierr,(*f)(Mat,int *); 1360 1361 PetscFunctionBegin; 1362 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1363 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1364 if (f) { 1365 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1366 } else { 1367 SETERRQ(1,"Wrong type of matrix to set column indices"); 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1374 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1375 { 1376 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1377 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1378 PetscReal atmp; 1379 PetscScalar *x,zero = 0.0; 1380 MatScalar *aa; 1381 int ncols,brow,krow,kcol; 1382 1383 PetscFunctionBegin; 1384 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1385 bs = a->bs; 1386 aa = a->a; 1387 ai = a->i; 1388 aj = a->j; 1389 mbs = a->mbs; 1390 1391 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1392 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1393 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1394 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1395 for (i=0; i<mbs; i++) { 1396 ncols = ai[1] - ai[0]; ai++; 1397 brow = bs*i; 1398 for (j=0; j<ncols; j++){ 1399 /* bcol = bs*(*aj); */ 1400 for (kcol=0; kcol<bs; kcol++){ 1401 for (krow=0; krow<bs; krow++){ 1402 atmp = PetscAbsScalar(*aa); aa++; 1403 row = brow + krow; /* row index */ 1404 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1405 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1406 } 1407 } 1408 aj++; 1409 } 1410 } 1411 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1417 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1418 { 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1428 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1429 { 1430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1431 PetscFunctionBegin; 1432 *array = a->a; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1438 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1439 { 1440 PetscFunctionBegin; 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #include "petscblaslapack.h" 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1447 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1448 { 1449 int ierr,one=1,i; 1450 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1451 int *xtoy,nz,row,xcol,ycol,jx,jy,*xi=x->i,*xj=x->j,*yi=y->i,*yj=y->j; 1452 1453 PetscFunctionBegin; 1454 if (str == SAME_NONZERO_PATTERN) { 1455 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1456 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1457 if (x->bs > 1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not support for bs>1"); 1458 ierr = PetscMalloc(x->nz*sizeof(int),&xtoy);CHKERRQ(ierr); 1459 i = 0; 1460 for (row=0; row<x->mbs; row++){ 1461 nz = xi[1] - xi[0]; 1462 jy = 0; 1463 for (jx=0; jx<nz; jx++,jy++){ 1464 xcol = xj[*xi + jx]; 1465 ycol = yj[*yi + jy]; /* col index for y */ 1466 while ( ycol < xcol ) { 1467 jy++; 1468 ycol = yj[*yi + jy]; 1469 } 1470 if (xcol != ycol) SETERRQ2(PETSC_ERR_ARG_WRONG,"X matrix entry (%d,%d) is not in Y matrix",row,ycol); 1471 xtoy[i++] = *yi + jy; 1472 } 1473 xi++; yi++; 1474 } 1475 1476 for (i=0; i<x->nz; i++) { 1477 y->a[xtoy[i]] += (*a)*(x->a[i]); 1478 } 1479 ierr = PetscFree(xtoy);CHKERRQ(ierr); 1480 } else { 1481 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 /* -------------------------------------------------------------------*/ 1487 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1488 MatGetRow_SeqBAIJ, 1489 MatRestoreRow_SeqBAIJ, 1490 MatMult_SeqBAIJ_N, 1491 MatMultAdd_SeqBAIJ_N, 1492 MatMultTranspose_SeqBAIJ, 1493 MatMultTransposeAdd_SeqBAIJ, 1494 MatSolve_SeqBAIJ_N, 1495 0, 1496 0, 1497 0, 1498 MatLUFactor_SeqBAIJ, 1499 0, 1500 0, 1501 MatTranspose_SeqBAIJ, 1502 MatGetInfo_SeqBAIJ, 1503 MatEqual_SeqBAIJ, 1504 MatGetDiagonal_SeqBAIJ, 1505 MatDiagonalScale_SeqBAIJ, 1506 MatNorm_SeqBAIJ, 1507 0, 1508 MatAssemblyEnd_SeqBAIJ, 1509 0, 1510 MatSetOption_SeqBAIJ, 1511 MatZeroEntries_SeqBAIJ, 1512 MatZeroRows_SeqBAIJ, 1513 MatLUFactorSymbolic_SeqBAIJ, 1514 MatLUFactorNumeric_SeqBAIJ_N, 1515 0, 1516 0, 1517 MatSetUpPreallocation_SeqBAIJ, 1518 MatILUFactorSymbolic_SeqBAIJ, 1519 0, 1520 MatGetArray_SeqBAIJ, 1521 MatRestoreArray_SeqBAIJ, 1522 MatDuplicate_SeqBAIJ, 1523 0, 1524 0, 1525 MatILUFactor_SeqBAIJ, 1526 0, 1527 MatAXPY_SeqBAIJ, 1528 MatGetSubMatrices_SeqBAIJ, 1529 MatIncreaseOverlap_SeqBAIJ, 1530 MatGetValues_SeqBAIJ, 1531 0, 1532 MatPrintHelp_SeqBAIJ, 1533 MatScale_SeqBAIJ, 1534 0, 1535 0, 1536 0, 1537 MatGetBlockSize_SeqBAIJ, 1538 MatGetRowIJ_SeqBAIJ, 1539 MatRestoreRowIJ_SeqBAIJ, 1540 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 MatSetValuesBlocked_SeqBAIJ, 1547 MatGetSubMatrix_SeqBAIJ, 1548 MatDestroy_SeqBAIJ, 1549 MatView_SeqBAIJ, 1550 MatGetPetscMaps_Petsc, 1551 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 MatGetRowMax_SeqBAIJ, 1558 MatConvert_Basic}; 1559 1560 EXTERN_C_BEGIN 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1563 int MatStoreValues_SeqBAIJ(Mat mat) 1564 { 1565 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1566 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1567 int ierr; 1568 1569 PetscFunctionBegin; 1570 if (aij->nonew != 1) { 1571 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1572 } 1573 1574 /* allocate space for values if not already there */ 1575 if (!aij->saved_values) { 1576 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1577 } 1578 1579 /* copy values over */ 1580 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 EXTERN_C_END 1584 1585 EXTERN_C_BEGIN 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1588 int MatRetrieveValues_SeqBAIJ(Mat mat) 1589 { 1590 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1591 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1592 1593 PetscFunctionBegin; 1594 if (aij->nonew != 1) { 1595 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1596 } 1597 if (!aij->saved_values) { 1598 SETERRQ(1,"Must call MatStoreValues(A);first"); 1599 } 1600 1601 /* copy values over */ 1602 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 EXTERN_C_END 1606 1607 EXTERN_C_BEGIN 1608 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1609 EXTERN_C_END 1610 1611 EXTERN_C_BEGIN 1612 #undef __FUNCT__ 1613 #define __FUNCT__ "MatCreate_SeqBAIJ" 1614 int MatCreate_SeqBAIJ(Mat B) 1615 { 1616 int ierr,size; 1617 Mat_SeqBAIJ *b; 1618 1619 PetscFunctionBegin; 1620 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1621 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1622 1623 B->m = B->M = PetscMax(B->m,B->M); 1624 B->n = B->N = PetscMax(B->n,B->N); 1625 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1626 B->data = (void*)b; 1627 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1628 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1629 B->factor = 0; 1630 B->lupivotthreshold = 1.0; 1631 B->mapping = 0; 1632 b->row = 0; 1633 b->col = 0; 1634 b->icol = 0; 1635 b->reallocs = 0; 1636 b->saved_values = 0; 1637 #if defined(PETSC_USE_MAT_SINGLE) 1638 b->setvalueslen = 0; 1639 b->setvaluescopy = PETSC_NULL; 1640 #endif 1641 1642 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1643 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1644 1645 b->sorted = PETSC_FALSE; 1646 b->roworiented = PETSC_TRUE; 1647 b->nonew = 0; 1648 b->diag = 0; 1649 b->solve_work = 0; 1650 b->mult_work = 0; 1651 B->spptr = 0; 1652 B->info.nz_unneeded = (PetscReal)b->maxnz; 1653 b->keepzeroedrows = PETSC_FALSE; 1654 1655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1656 "MatStoreValues_SeqBAIJ", 1657 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1659 "MatRetrieveValues_SeqBAIJ", 1660 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1661 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1662 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1663 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1665 "MatConvert_SeqBAIJ_SeqAIJ", 1666 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 EXTERN_C_END 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1673 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1674 { 1675 Mat C; 1676 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1677 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1678 1679 PetscFunctionBegin; 1680 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1681 1682 *B = 0; 1683 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1684 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1685 c = (Mat_SeqBAIJ*)C->data; 1686 1687 c->bs = a->bs; 1688 c->bs2 = a->bs2; 1689 c->mbs = a->mbs; 1690 c->nbs = a->nbs; 1691 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1692 1693 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1694 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1695 for (i=0; i<mbs; i++) { 1696 c->imax[i] = a->imax[i]; 1697 c->ilen[i] = a->ilen[i]; 1698 } 1699 1700 /* allocate the matrix space */ 1701 c->singlemalloc = PETSC_TRUE; 1702 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1703 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1704 c->j = (int*)(c->a + nz*bs2); 1705 c->i = c->j + nz; 1706 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1707 if (mbs > 0) { 1708 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1709 if (cpvalues == MAT_COPY_VALUES) { 1710 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1711 } else { 1712 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1713 } 1714 } 1715 1716 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1717 c->sorted = a->sorted; 1718 c->roworiented = a->roworiented; 1719 c->nonew = a->nonew; 1720 1721 if (a->diag) { 1722 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1723 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1724 for (i=0; i<mbs; i++) { 1725 c->diag[i] = a->diag[i]; 1726 } 1727 } else c->diag = 0; 1728 c->nz = a->nz; 1729 c->maxnz = a->maxnz; 1730 c->solve_work = 0; 1731 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1732 c->mult_work = 0; 1733 C->preallocated = PETSC_TRUE; 1734 C->assembled = PETSC_TRUE; 1735 *B = C; 1736 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 EXTERN_C_BEGIN 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatLoad_SeqBAIJ" 1743 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1744 { 1745 Mat_SeqBAIJ *a; 1746 Mat B; 1747 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1748 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1749 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1750 int *masked,nmask,tmp,bs2,ishift; 1751 PetscScalar *aa; 1752 MPI_Comm comm = ((PetscObject)viewer)->comm; 1753 #if defined(PETSC_HAVE_DSCPACK) 1754 PetscTruth flag; 1755 #endif 1756 1757 PetscFunctionBegin; 1758 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1759 bs2 = bs*bs; 1760 1761 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1762 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1764 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1765 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1766 M = header[1]; N = header[2]; nz = header[3]; 1767 1768 if (header[3] < 0) { 1769 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1770 } 1771 1772 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1773 1774 /* 1775 This code adds extra rows to make sure the number of rows is 1776 divisible by the blocksize 1777 */ 1778 mbs = M/bs; 1779 extra_rows = bs - M + bs*(mbs); 1780 if (extra_rows == bs) extra_rows = 0; 1781 else mbs++; 1782 if (extra_rows) { 1783 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1784 } 1785 1786 /* read in row lengths */ 1787 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1788 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1789 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1790 1791 /* read in column indices */ 1792 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1793 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1794 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1795 1796 /* loop over row lengths determining block row lengths */ 1797 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1798 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1799 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1800 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1801 masked = mask + mbs; 1802 rowcount = 0; nzcount = 0; 1803 for (i=0; i<mbs; i++) { 1804 nmask = 0; 1805 for (j=0; j<bs; j++) { 1806 kmax = rowlengths[rowcount]; 1807 for (k=0; k<kmax; k++) { 1808 tmp = jj[nzcount++]/bs; 1809 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1810 } 1811 rowcount++; 1812 } 1813 browlengths[i] += nmask; 1814 /* zero out the mask elements we set */ 1815 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1816 } 1817 1818 /* create our matrix */ 1819 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1820 B = *A; 1821 a = (Mat_SeqBAIJ*)B->data; 1822 1823 /* set matrix "i" values */ 1824 a->i[0] = 0; 1825 for (i=1; i<= mbs; i++) { 1826 a->i[i] = a->i[i-1] + browlengths[i-1]; 1827 a->ilen[i-1] = browlengths[i-1]; 1828 } 1829 a->nz = 0; 1830 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1831 1832 /* read in nonzero values */ 1833 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1834 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1835 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1836 1837 /* set "a" and "j" values into matrix */ 1838 nzcount = 0; jcount = 0; 1839 for (i=0; i<mbs; i++) { 1840 nzcountb = nzcount; 1841 nmask = 0; 1842 for (j=0; j<bs; j++) { 1843 kmax = rowlengths[i*bs+j]; 1844 for (k=0; k<kmax; k++) { 1845 tmp = jj[nzcount++]/bs; 1846 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1847 } 1848 } 1849 /* sort the masked values */ 1850 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1851 1852 /* set "j" values into matrix */ 1853 maskcount = 1; 1854 for (j=0; j<nmask; j++) { 1855 a->j[jcount++] = masked[j]; 1856 mask[masked[j]] = maskcount++; 1857 } 1858 /* set "a" values into matrix */ 1859 ishift = bs2*a->i[i]; 1860 for (j=0; j<bs; j++) { 1861 kmax = rowlengths[i*bs+j]; 1862 for (k=0; k<kmax; k++) { 1863 tmp = jj[nzcountb]/bs ; 1864 block = mask[tmp] - 1; 1865 point = jj[nzcountb] - bs*tmp; 1866 idx = ishift + bs2*block + j + bs*point; 1867 a->a[idx] = (MatScalar)aa[nzcountb++]; 1868 } 1869 } 1870 /* zero out the mask elements we set */ 1871 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1872 } 1873 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1874 1875 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1876 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1877 ierr = PetscFree(aa);CHKERRQ(ierr); 1878 ierr = PetscFree(jj);CHKERRQ(ierr); 1879 ierr = PetscFree(mask);CHKERRQ(ierr); 1880 1881 B->assembled = PETSC_TRUE; 1882 1883 #if defined(PETSC_HAVE_DSCPACK) 1884 ierr = PetscOptionsHasName(B->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1885 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(B);CHKERRQ(ierr); } 1886 #endif 1887 1888 ierr = MatView_Private(B);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 EXTERN_C_END 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatCreateSeqBAIJ" 1895 /*@C 1896 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1897 compressed row) format. For good matrix assembly performance the 1898 user should preallocate the matrix storage by setting the parameter nz 1899 (or the array nnz). By setting these parameters accurately, performance 1900 during matrix assembly can be increased by more than a factor of 50. 1901 1902 Collective on MPI_Comm 1903 1904 Input Parameters: 1905 + comm - MPI communicator, set to PETSC_COMM_SELF 1906 . bs - size of block 1907 . m - number of rows 1908 . n - number of columns 1909 . nz - number of nonzero blocks per block row (same for all rows) 1910 - nnz - array containing the number of nonzero blocks in the various block rows 1911 (possibly different for each block row) or PETSC_NULL 1912 1913 Output Parameter: 1914 . A - the matrix 1915 1916 Options Database Keys: 1917 . -mat_no_unroll - uses code that does not unroll the loops in the 1918 block calculations (much slower) 1919 . -mat_block_size - size of the blocks to use 1920 1921 Level: intermediate 1922 1923 Notes: 1924 A nonzero block is any block that as 1 or more nonzeros in it 1925 1926 The block AIJ format is fully compatible with standard Fortran 77 1927 storage. That is, the stored row and column indices can begin at 1928 either one (as in Fortran) or zero. See the users' manual for details. 1929 1930 Specify the preallocated storage with either nz or nnz (not both). 1931 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1932 allocation. For additional details, see the users manual chapter on 1933 matrices. 1934 1935 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1936 @*/ 1937 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1938 { 1939 int ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1943 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1944 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1950 /*@C 1951 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1952 per row in the matrix. For good matrix assembly performance the 1953 user should preallocate the matrix storage by setting the parameter nz 1954 (or the array nnz). By setting these parameters accurately, performance 1955 during matrix assembly can be increased by more than a factor of 50. 1956 1957 Collective on MPI_Comm 1958 1959 Input Parameters: 1960 + A - the matrix 1961 . bs - size of block 1962 . nz - number of block nonzeros per block row (same for all rows) 1963 - nnz - array containing the number of block nonzeros in the various block rows 1964 (possibly different for each block row) or PETSC_NULL 1965 1966 Options Database Keys: 1967 . -mat_no_unroll - uses code that does not unroll the loops in the 1968 block calculations (much slower) 1969 . -mat_block_size - size of the blocks to use 1970 1971 Level: intermediate 1972 1973 Notes: 1974 The block AIJ format is fully compatible with standard Fortran 77 1975 storage. That is, the stored row and column indices can begin at 1976 either one (as in Fortran) or zero. See the users' manual for details. 1977 1978 Specify the preallocated storage with either nz or nnz (not both). 1979 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1980 allocation. For additional details, see the users manual chapter on 1981 matrices. 1982 1983 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1984 @*/ 1985 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1986 { 1987 Mat_SeqBAIJ *b; 1988 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1989 PetscTruth flg; 1990 1991 PetscFunctionBegin; 1992 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1993 if (!flg) PetscFunctionReturn(0); 1994 1995 B->preallocated = PETSC_TRUE; 1996 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1997 if (nnz && newbs != bs) { 1998 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1999 } 2000 bs = newbs; 2001 2002 mbs = B->m/bs; 2003 nbs = B->n/bs; 2004 bs2 = bs*bs; 2005 2006 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2007 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2008 } 2009 2010 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2011 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2012 if (nnz) { 2013 for (i=0; i<mbs; i++) { 2014 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2015 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2016 } 2017 } 2018 2019 b = (Mat_SeqBAIJ*)B->data; 2020 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2021 B->ops->solve = MatSolve_SeqBAIJ_Update; 2022 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2023 if (!flg) { 2024 switch (bs) { 2025 case 1: 2026 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2027 B->ops->mult = MatMult_SeqBAIJ_1; 2028 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2029 break; 2030 case 2: 2031 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2032 B->ops->mult = MatMult_SeqBAIJ_2; 2033 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2034 break; 2035 case 3: 2036 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2037 B->ops->mult = MatMult_SeqBAIJ_3; 2038 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2039 break; 2040 case 4: 2041 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2042 B->ops->mult = MatMult_SeqBAIJ_4; 2043 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2044 break; 2045 case 5: 2046 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2047 B->ops->mult = MatMult_SeqBAIJ_5; 2048 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2049 break; 2050 case 6: 2051 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2052 B->ops->mult = MatMult_SeqBAIJ_6; 2053 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2054 break; 2055 case 7: 2056 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2057 B->ops->mult = MatMult_SeqBAIJ_7; 2058 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2059 break; 2060 default: 2061 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2062 B->ops->mult = MatMult_SeqBAIJ_N; 2063 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2064 break; 2065 } 2066 } 2067 b->bs = bs; 2068 b->mbs = mbs; 2069 b->nbs = nbs; 2070 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2071 if (!nnz) { 2072 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2073 else if (nz <= 0) nz = 1; 2074 for (i=0; i<mbs; i++) b->imax[i] = nz; 2075 nz = nz*mbs; 2076 } else { 2077 nz = 0; 2078 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2079 } 2080 2081 /* allocate the matrix space */ 2082 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2083 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2084 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2085 b->j = (int*)(b->a + nz*bs2); 2086 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2087 b->i = b->j + nz; 2088 b->singlemalloc = PETSC_TRUE; 2089 2090 b->i[0] = 0; 2091 for (i=1; i<mbs+1; i++) { 2092 b->i[i] = b->i[i-1] + b->imax[i-1]; 2093 } 2094 2095 /* b->ilen will count nonzeros in each block row so far. */ 2096 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2097 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2098 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2099 2100 b->bs = bs; 2101 b->bs2 = bs2; 2102 b->mbs = mbs; 2103 b->nz = 0; 2104 b->maxnz = nz*bs2; 2105 B->info.nz_unneeded = (PetscReal)b->maxnz; 2106 PetscFunctionReturn(0); 2107 } 2108 2109