1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 #include "src/mat/impls/sbaij/seq/sbaij.h" 12 13 #define CHUNKSIZE 10 14 15 /* 16 Checks for missing diagonals 17 */ 18 #undef __FUNCT__ 19 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 20 int MatMissingDiagonal_SeqSBAIJ(Mat A) 21 { 22 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 23 int *diag,*jj = a->j,i,ierr; 24 25 PetscFunctionBegin; 26 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 27 diag = a->diag; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 31 } 32 } 33 PetscFunctionReturn(0); 34 } 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 38 int MatMarkDiagonal_SeqSBAIJ(Mat A) 39 { 40 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41 int i,j,*diag,m = a->mbs,ierr; 42 43 PetscFunctionBegin; 44 if (a->diag) PetscFunctionReturn(0); 45 46 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 47 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 48 for (i=0; i<m; i++) { 49 diag[i] = a->i[i+1]; 50 for (j=a->i[i]; j<a->i[i+1]; j++) { 51 if (a->j[j] == i) { 52 diag[i] = j; 53 break; 54 } 55 } 56 } 57 a->diag = diag; 58 PetscFunctionReturn(0); 59 } 60 61 62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 67 { 68 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 69 70 PetscFunctionBegin; 71 if (ia) { 72 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 73 } 74 *nn = a->mbs; 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 81 { 82 PetscFunctionBegin; 83 if (!ia) PetscFunctionReturn(0); 84 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 91 { 92 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 93 94 PetscFunctionBegin; 95 *bs = sbaij->bs; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 101 int MatDestroy_SeqSBAIJ(Mat A) 102 { 103 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 104 int ierr; 105 106 PetscFunctionBegin; 107 #if defined(PETSC_USE_LOG) 108 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 109 #endif 110 ierr = PetscFree(a->a);CHKERRQ(ierr); 111 if (!a->singlemalloc) { 112 ierr = PetscFree(a->i);CHKERRQ(ierr); 113 ierr = PetscFree(a->j);CHKERRQ(ierr); 114 } 115 if (a->row) { 116 ierr = ISDestroy(a->row);CHKERRQ(ierr); 117 } 118 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 119 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 120 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 121 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 122 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 125 126 if (a->inew){ 127 ierr = PetscFree(a->inew);CHKERRQ(ierr); 128 a->inew = 0; 129 } 130 ierr = PetscFree(a);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 136 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 137 { 138 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 139 140 PetscFunctionBegin; 141 switch (op) { 142 case MAT_ROW_ORIENTED: 143 a->roworiented = PETSC_TRUE; 144 break; 145 case MAT_COLUMN_ORIENTED: 146 a->roworiented = PETSC_FALSE; 147 break; 148 case MAT_COLUMNS_SORTED: 149 a->sorted = PETSC_TRUE; 150 break; 151 case MAT_COLUMNS_UNSORTED: 152 a->sorted = PETSC_FALSE; 153 break; 154 case MAT_KEEP_ZEROED_ROWS: 155 a->keepzeroedrows = PETSC_TRUE; 156 break; 157 case MAT_NO_NEW_NONZERO_LOCATIONS: 158 a->nonew = 1; 159 break; 160 case MAT_NEW_NONZERO_LOCATION_ERR: 161 a->nonew = -1; 162 break; 163 case MAT_NEW_NONZERO_ALLOCATION_ERR: 164 a->nonew = -2; 165 break; 166 case MAT_YES_NEW_NONZERO_LOCATIONS: 167 a->nonew = 0; 168 break; 169 case MAT_ROWS_SORTED: 170 case MAT_ROWS_UNSORTED: 171 case MAT_YES_NEW_DIAGONALS: 172 case MAT_IGNORE_OFF_PROC_ENTRIES: 173 case MAT_USE_HASH_TABLE: 174 case MAT_USE_SINGLE_PRECISION_SOLVES: 175 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 176 break; 177 case MAT_NO_NEW_DIAGONALS: 178 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 179 break; 180 default: 181 SETERRQ(PETSC_ERR_SUP,"unknown option"); 182 break; 183 } 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 190 { 191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 192 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 193 MatScalar *aa,*aa_i; 194 PetscScalar *v_i; 195 196 PetscFunctionBegin; 197 bs = a->bs; 198 ai = a->i; 199 aj = a->j; 200 aa = a->a; 201 bs2 = a->bs2; 202 203 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 204 205 bn = row/bs; /* Block number */ 206 bp = row % bs; /* Block position */ 207 M = ai[bn+1] - ai[bn]; 208 *ncols = bs*M; 209 210 if (v) { 211 *v = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 v_i = *v + i*bs; 216 aa_i = aa + bs2*(ai[bn] + i); 217 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 218 } 219 } 220 } 221 222 if (cols) { 223 *cols = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 cols_i = *cols + i*bs; 228 itmp = bs*aj[ai[bn] + i]; 229 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 230 } 231 } 232 } 233 234 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 235 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 236 #ifdef column_search 237 v_i = *v + M*bs; 238 cols_i = *cols + M*bs; 239 for (i=0; i<bn; i++){ /* for each block row */ 240 M = ai[i+1] - ai[i]; 241 for (j=0; j<M; j++){ 242 itmp = aj[ai[i] + j]; /* block column value */ 243 if (itmp == bn){ 244 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 245 for (k=0; k<bs; k++) { 246 *cols_i++ = i*bs+k; 247 *v_i++ = aa_i[k]; 248 } 249 *ncols += bs; 250 break; 251 } 252 } 253 } 254 #endif 255 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 262 { 263 int ierr; 264 265 PetscFunctionBegin; 266 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 267 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 274 { 275 PetscFunctionBegin; 276 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 277 /* PetscFunctionReturn(0); */ 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 282 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 283 { 284 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 285 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 286 PetscScalar *aa; 287 FILE *file; 288 289 PetscFunctionBegin; 290 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 291 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 292 col_lens[0] = MAT_COOKIE; 293 294 col_lens[1] = A->m; 295 col_lens[2] = A->m; 296 col_lens[3] = a->s_nz*bs2; 297 298 /* store lengths of each row and write (including header) to file */ 299 count = 0; 300 for (i=0; i<a->mbs; i++) { 301 for (j=0; j<bs; j++) { 302 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 303 } 304 } 305 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 306 ierr = PetscFree(col_lens);CHKERRQ(ierr); 307 308 /* store column indices (zero start index) */ 309 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 310 count = 0; 311 for (i=0; i<a->mbs; i++) { 312 for (j=0; j<bs; j++) { 313 for (k=a->i[i]; k<a->i[i+1]; k++) { 314 for (l=0; l<bs; l++) { 315 jj[count++] = bs*a->j[k] + l; 316 } 317 } 318 } 319 } 320 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 321 ierr = PetscFree(jj);CHKERRQ(ierr); 322 323 /* store nonzero values */ 324 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 325 count = 0; 326 for (i=0; i<a->mbs; i++) { 327 for (j=0; j<bs; j++) { 328 for (k=a->i[i]; k<a->i[i+1]; k++) { 329 for (l=0; l<bs; l++) { 330 aa[count++] = a->a[bs2*k + l*bs + j]; 331 } 332 } 333 } 334 } 335 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 336 ierr = PetscFree(aa);CHKERRQ(ierr); 337 338 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 339 if (file) { 340 fprintf(file,"-matload_block_size %d\n",a->bs); 341 } 342 PetscFunctionReturn(0); 343 } 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 347 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 348 { 349 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 350 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 351 char *name; 352 PetscViewerFormat format; 353 354 PetscFunctionBegin; 355 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 356 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 357 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 358 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 359 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 360 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 361 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 362 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 363 for (i=0; i<a->mbs; i++) { 364 for (j=0; j<bs; j++) { 365 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 366 for (k=a->i[i]; k<a->i[i+1]; k++) { 367 for (l=0; l<bs; l++) { 368 #if defined(PETSC_USE_COMPLEX) 369 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 370 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 371 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 372 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 373 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 374 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 375 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 376 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } 378 #else 379 if (a->a[bs2*k + l*bs + j] != 0.0) { 380 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 381 } 382 #endif 383 } 384 } 385 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 386 } 387 } 388 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 389 } else { 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 391 for (i=0; i<a->mbs; i++) { 392 for (j=0; j<bs; j++) { 393 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 394 for (k=a->i[i]; k<a->i[i+1]; k++) { 395 for (l=0; l<bs; l++) { 396 #if defined(PETSC_USE_COMPLEX) 397 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 398 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 399 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 400 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 401 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 402 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 403 } else { 404 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } 406 #else 407 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 408 #endif 409 } 410 } 411 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 412 } 413 } 414 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 415 } 416 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 422 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 423 { 424 Mat A = (Mat) Aa; 425 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 426 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 427 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 428 MatScalar *aa; 429 MPI_Comm comm; 430 PetscViewer viewer; 431 432 PetscFunctionBegin; 433 /* 434 This is nasty. If this is called from an originally parallel matrix 435 then all processes call this,but only the first has the matrix so the 436 rest should return immediately. 437 */ 438 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 439 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 440 if (rank) PetscFunctionReturn(0); 441 442 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 443 444 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 445 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 446 447 /* loop over matrix elements drawing boxes */ 448 color = PETSC_DRAW_BLUE; 449 for (i=0,row=0; i<mbs; i++,row+=bs) { 450 for (j=a->i[i]; j<a->i[i+1]; j++) { 451 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 452 x_l = a->j[j]*bs; x_r = x_l + 1.0; 453 aa = a->a + j*bs2; 454 for (k=0; k<bs; k++) { 455 for (l=0; l<bs; l++) { 456 if (PetscRealPart(*aa++) >= 0.) continue; 457 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 458 } 459 } 460 } 461 } 462 color = PETSC_DRAW_CYAN; 463 for (i=0,row=0; i<mbs; i++,row+=bs) { 464 for (j=a->i[i]; j<a->i[i+1]; j++) { 465 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 466 x_l = a->j[j]*bs; x_r = x_l + 1.0; 467 aa = a->a + j*bs2; 468 for (k=0; k<bs; k++) { 469 for (l=0; l<bs; l++) { 470 if (PetscRealPart(*aa++) != 0.) continue; 471 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 472 } 473 } 474 } 475 } 476 477 color = PETSC_DRAW_RED; 478 for (i=0,row=0; i<mbs; i++,row+=bs) { 479 for (j=a->i[i]; j<a->i[i+1]; j++) { 480 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 481 x_l = a->j[j]*bs; x_r = x_l + 1.0; 482 aa = a->a + j*bs2; 483 for (k=0; k<bs; k++) { 484 for (l=0; l<bs; l++) { 485 if (PetscRealPart(*aa++) <= 0.) continue; 486 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 487 } 488 } 489 } 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 496 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 497 { 498 int ierr; 499 PetscReal xl,yl,xr,yr,w,h; 500 PetscDraw draw; 501 PetscTruth isnull; 502 503 PetscFunctionBegin; 504 505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 507 508 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 509 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 510 xr += w; yr += h; xl = -w; yl = -h; 511 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 512 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 513 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatView_SeqSBAIJ" 519 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 520 { 521 int ierr; 522 PetscTruth issocket,isascii,isbinary,isdraw; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 529 if (issocket) { 530 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 531 } else if (isascii){ 532 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 533 } else if (isbinary) { 534 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 535 } else if (isdraw) { 536 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 537 } else { 538 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 539 } 540 PetscFunctionReturn(0); 541 } 542 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 546 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 547 { 548 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 549 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 550 int *ai = a->i,*ailen = a->ilen; 551 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 552 MatScalar *ap,*aa = a->a,zero = 0.0; 553 554 PetscFunctionBegin; 555 for (k=0; k<m; k++) { /* loop over rows */ 556 row = im[k]; brow = row/bs; 557 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 558 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 559 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 560 nrow = ailen[brow]; 561 for (l=0; l<n; l++) { /* loop over columns */ 562 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 563 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 564 col = in[l] ; 565 bcol = col/bs; 566 cidx = col%bs; 567 ridx = row%bs; 568 high = nrow; 569 low = 0; /* assume unsorted */ 570 while (high-low > 5) { 571 t = (low+high)/2; 572 if (rp[t] > bcol) high = t; 573 else low = t; 574 } 575 for (i=low; i<high; i++) { 576 if (rp[i] > bcol) break; 577 if (rp[i] == bcol) { 578 *v++ = ap[bs2*i+bs*cidx+ridx]; 579 goto finished; 580 } 581 } 582 *v++ = zero; 583 finished:; 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 592 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 593 { 594 PetscFunctionBegin; 595 SETERRQ(1,"Function not yet written for SBAIJ format"); 596 /* PetscFunctionReturn(0); */ 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 601 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 602 { 603 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 604 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 605 int m = A->m,*ip,N,*ailen = a->ilen; 606 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 607 MatScalar *aa = a->a,*ap; 608 609 PetscFunctionBegin; 610 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 611 612 if (m) rmax = ailen[0]; 613 for (i=1; i<mbs; i++) { 614 /* move each row back by the amount of empty slots (fshift) before it*/ 615 fshift += imax[i-1] - ailen[i-1]; 616 rmax = PetscMax(rmax,ailen[i]); 617 if (fshift) { 618 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 619 N = ailen[i]; 620 for (j=0; j<N; j++) { 621 ip[j-fshift] = ip[j]; 622 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 623 } 624 } 625 ai[i] = ai[i-1] + ailen[i-1]; 626 } 627 if (mbs) { 628 fshift += imax[mbs-1] - ailen[mbs-1]; 629 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 630 } 631 /* reset ilen and imax for each row */ 632 for (i=0; i<mbs; i++) { 633 ailen[i] = imax[i] = ai[i+1] - ai[i]; 634 } 635 a->s_nz = ai[mbs]; 636 637 /* diagonals may have moved, so kill the diagonal pointers */ 638 if (fshift && a->diag) { 639 ierr = PetscFree(a->diag);CHKERRQ(ierr); 640 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 641 a->diag = 0; 642 } 643 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 644 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 645 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 646 a->reallocs); 647 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 648 a->reallocs = 0; 649 A->info.nz_unneeded = (PetscReal)fshift*bs2; 650 651 PetscFunctionReturn(0); 652 } 653 654 /* 655 This function returns an array of flags which indicate the locations of contiguous 656 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 657 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 658 Assume: sizes should be long enough to hold all the values. 659 */ 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 662 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 663 { 664 int i,j,k,row; 665 PetscTruth flg; 666 667 PetscFunctionBegin; 668 for (i=0,j=0; i<n; j++) { 669 row = idx[i]; 670 if (row%bs!=0) { /* Not the begining of a block */ 671 sizes[j] = 1; 672 i++; 673 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 674 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 675 i++; 676 } else { /* Begining of the block, so check if the complete block exists */ 677 flg = PETSC_TRUE; 678 for (k=1; k<bs; k++) { 679 if (row+k != idx[i+k]) { /* break in the block */ 680 flg = PETSC_FALSE; 681 break; 682 } 683 } 684 if (flg == PETSC_TRUE) { /* No break in the bs */ 685 sizes[j] = bs; 686 i+= bs; 687 } else { 688 sizes[j] = 1; 689 i++; 690 } 691 } 692 } 693 *bs_max = j; 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 699 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 700 { 701 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 702 int ierr,i,j,k,count,is_n,*is_idx,*rows; 703 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 704 PetscScalar zero = 0.0; 705 MatScalar *aa; 706 707 PetscFunctionBegin; 708 /* Make a copy of the IS and sort it */ 709 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 710 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 711 712 /* allocate memory for rows,sizes */ 713 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 714 sizes = rows + is_n; 715 716 /* initialize copy IS values to rows, and sort them */ 717 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 718 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 719 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 720 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 721 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 722 } else { 723 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 724 } 725 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 726 727 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 728 row = rows[j]; /* row to be zeroed */ 729 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 730 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 731 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 732 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 733 if (diag) { 734 if (sbaij->ilen[row/bs] > 0) { 735 sbaij->ilen[row/bs] = 1; 736 sbaij->j[sbaij->i[row/bs]] = row/bs; 737 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 738 } 739 /* Now insert all the diagoanl values for this bs */ 740 for (k=0; k<bs; k++) { 741 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 742 } 743 } else { /* (!diag) */ 744 sbaij->ilen[row/bs] = 0; 745 } /* end (!diag) */ 746 } else { /* (sizes[i] != bs), broken block */ 747 #if defined (PETSC_USE_DEBUG) 748 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 749 #endif 750 for (k=0; k<count; k++) { 751 aa[0] = zero; 752 aa+=bs; 753 } 754 if (diag) { 755 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 756 } 757 } 758 } 759 760 ierr = PetscFree(rows);CHKERRQ(ierr); 761 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 762 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 /* Only add/insert a(i,j) with i<=j (blocks). 767 Any a(i,j) with i>j input by user is ingored. 768 */ 769 770 #undef __FUNCT__ 771 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 772 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 773 { 774 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 775 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 776 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 777 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 778 int ridx,cidx,bs2=a->bs2,ierr; 779 MatScalar *ap,value,*aa=a->a,*bap; 780 781 PetscFunctionBegin; 782 783 for (k=0; k<m; k++) { /* loop over added rows */ 784 row = im[k]; /* row number */ 785 brow = row/bs; /* block row number */ 786 if (row < 0) continue; 787 #if defined(PETSC_USE_BOPT_g) 788 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 789 #endif 790 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 791 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 792 rmax = imax[brow]; /* maximum space allocated for this row */ 793 nrow = ailen[brow]; /* actual length of this row */ 794 low = 0; 795 796 for (l=0; l<n; l++) { /* loop over added columns */ 797 if (in[l] < 0) continue; 798 #if defined(PETSC_USE_BOPT_g) 799 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 800 #endif 801 col = in[l]; 802 bcol = col/bs; /* block col number */ 803 804 if (brow <= bcol){ 805 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 806 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 807 /* element value a(k,l) */ 808 if (roworiented) { 809 value = v[l + k*n]; 810 } else { 811 value = v[k + l*m]; 812 } 813 814 /* move pointer bap to a(k,l) quickly and add/insert value */ 815 if (!sorted) low = 0; high = nrow; 816 while (high-low > 7) { 817 t = (low+high)/2; 818 if (rp[t] > bcol) high = t; 819 else low = t; 820 } 821 for (i=low; i<high; i++) { 822 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 823 if (rp[i] > bcol) break; 824 if (rp[i] == bcol) { 825 bap = ap + bs2*i + bs*cidx + ridx; 826 if (is == ADD_VALUES) *bap += value; 827 else *bap = value; 828 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 829 if (brow == bcol && ridx < cidx){ 830 bap = ap + bs2*i + bs*ridx + cidx; 831 if (is == ADD_VALUES) *bap += value; 832 else *bap = value; 833 } 834 goto noinsert1; 835 } 836 } 837 838 if (nonew == 1) goto noinsert1; 839 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 840 if (nrow >= rmax) { 841 /* there is no extra room in row, therefore enlarge */ 842 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 843 MatScalar *new_a; 844 845 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 846 847 /* Malloc new storage space */ 848 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 849 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 850 new_j = (int*)(new_a + bs2*new_nz); 851 new_i = new_j + new_nz; 852 853 /* copy over old data into new slots */ 854 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 855 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 856 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 857 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 858 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 859 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 860 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 861 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 862 /* free up old matrix storage */ 863 ierr = PetscFree(a->a);CHKERRQ(ierr); 864 if (!a->singlemalloc) { 865 ierr = PetscFree(a->i);CHKERRQ(ierr); 866 ierr = PetscFree(a->j);CHKERRQ(ierr); 867 } 868 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 869 a->singlemalloc = PETSC_TRUE; 870 871 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 872 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 873 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 874 a->s_maxnz += bs2*CHUNKSIZE; 875 a->reallocs++; 876 a->s_nz++; 877 } 878 879 N = nrow++ - 1; 880 /* shift up all the later entries in this row */ 881 for (ii=N; ii>=i; ii--) { 882 rp[ii+1] = rp[ii]; 883 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 884 } 885 if (N>=i) { 886 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 887 } 888 rp[i] = bcol; 889 ap[bs2*i + bs*cidx + ridx] = value; 890 noinsert1:; 891 low = i; 892 /* } */ 893 } 894 } /* end of if .. if.. */ 895 } /* end of loop over added columns */ 896 ailen[brow] = nrow; 897 } /* end of loop over added rows */ 898 899 PetscFunctionReturn(0); 900 } 901 902 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 903 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 904 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 905 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 906 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 907 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 908 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 909 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 910 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 911 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 912 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 913 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 914 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 915 extern int MatZeroEntries_SeqSBAIJ(Mat); 916 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 917 918 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 919 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 920 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 921 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 922 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 923 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 924 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 925 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 926 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 927 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 928 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 929 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 930 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 931 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 932 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 933 934 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 935 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 936 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 937 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 938 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 939 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 940 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 941 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 942 943 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 944 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 945 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 946 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 947 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 948 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 949 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 950 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 951 952 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 953 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 954 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 955 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 956 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 957 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 958 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 959 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 960 961 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 962 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 963 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 964 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 965 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 966 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 967 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 968 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 969 970 #ifdef HAVE_MatICCFactor 971 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 974 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 975 { 976 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 977 Mat outA; 978 int ierr; 979 PetscTruth row_identity,col_identity; 980 981 PetscFunctionBegin; 982 /* 983 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 984 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 985 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 986 if (!row_identity || !col_identity) { 987 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 988 } 989 */ 990 991 outA = inA; 992 inA->factor = FACTOR_CHOLESKY; 993 994 if (!a->diag) { 995 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 996 } 997 /* 998 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 999 for ILU(0) factorization with natural ordering 1000 */ 1001 switch (a->bs) { 1002 case 1: 1003 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1004 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1005 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1006 case 2: 1007 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1008 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1009 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1010 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1011 break; 1012 case 3: 1013 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1014 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1015 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1016 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1017 break; 1018 case 4: 1019 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1020 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1021 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1022 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1023 break; 1024 case 5: 1025 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1026 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1027 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1028 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1029 break; 1030 case 6: 1031 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1032 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1033 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1034 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1035 break; 1036 case 7: 1037 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1038 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1039 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1040 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1041 break; 1042 default: 1043 a->row = row; 1044 a->icol = col; 1045 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1046 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1047 1048 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1049 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1050 PetscLogObjectParent(inA,a->icol); 1051 1052 if (!a->solve_work) { 1053 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1054 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1055 } 1056 } 1057 1058 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1059 1060 PetscFunctionReturn(0); 1061 } 1062 #endif 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1066 int MatPrintHelp_SeqSBAIJ(Mat A) 1067 { 1068 static PetscTruth called = PETSC_FALSE; 1069 MPI_Comm comm = A->comm; 1070 int ierr; 1071 1072 PetscFunctionBegin; 1073 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1074 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1075 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 EXTERN_C_BEGIN 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1082 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1083 { 1084 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1085 int i,nz,n; 1086 1087 PetscFunctionBegin; 1088 nz = baij->s_maxnz; 1089 n = mat->n; 1090 for (i=0; i<nz; i++) { 1091 baij->j[i] = indices[i]; 1092 } 1093 baij->s_nz = nz; 1094 for (i=0; i<n; i++) { 1095 baij->ilen[i] = baij->imax[i]; 1096 } 1097 1098 PetscFunctionReturn(0); 1099 } 1100 EXTERN_C_END 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1104 /*@ 1105 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1106 in the matrix. 1107 1108 Input Parameters: 1109 + mat - the SeqSBAIJ matrix 1110 - indices - the column indices 1111 1112 Level: advanced 1113 1114 Notes: 1115 This can be called if you have precomputed the nonzero structure of the 1116 matrix and want to provide it to the matrix object to improve the performance 1117 of the MatSetValues() operation. 1118 1119 You MUST have set the correct numbers of nonzeros per row in the call to 1120 MatCreateSeqSBAIJ(). 1121 1122 MUST be called before any calls to MatSetValues() 1123 1124 .seealso: MatCreateSeqSBAIJ 1125 @*/ 1126 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1127 { 1128 int ierr,(*f)(Mat,int *); 1129 1130 PetscFunctionBegin; 1131 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1132 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1133 if (f) { 1134 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1135 } else { 1136 SETERRQ(1,"Wrong type of matrix to set column indices"); 1137 } 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1143 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1144 { 1145 int ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 /* -------------------------------------------------------------------*/ 1153 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1154 MatGetRow_SeqSBAIJ, 1155 MatRestoreRow_SeqSBAIJ, 1156 MatMult_SeqSBAIJ_N, 1157 MatMultAdd_SeqSBAIJ_N, 1158 MatMultTranspose_SeqSBAIJ, 1159 MatMultTransposeAdd_SeqSBAIJ, 1160 MatSolve_SeqSBAIJ_N, 1161 0, 1162 0, 1163 0, 1164 0, 1165 MatCholeskyFactor_SeqSBAIJ, 1166 0, 1167 MatTranspose_SeqSBAIJ, 1168 MatGetInfo_SeqSBAIJ, 1169 MatEqual_SeqSBAIJ, 1170 MatGetDiagonal_SeqSBAIJ, 1171 MatDiagonalScale_SeqSBAIJ, 1172 MatNorm_SeqSBAIJ, 1173 0, 1174 MatAssemblyEnd_SeqSBAIJ, 1175 0, 1176 MatSetOption_SeqSBAIJ, 1177 MatZeroEntries_SeqSBAIJ, 1178 MatZeroRows_SeqSBAIJ, 1179 0, 1180 0, 1181 MatCholeskyFactorSymbolic_SeqSBAIJ, 1182 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1183 MatSetUpPreallocation_SeqSBAIJ, 1184 0, 1185 MatICCFactorSymbolic_SeqSBAIJ, 1186 0, 1187 0, 1188 MatDuplicate_SeqSBAIJ, 1189 0, 1190 0, 1191 0, 1192 0, 1193 0, 1194 MatGetSubMatrices_SeqSBAIJ, 1195 MatIncreaseOverlap_SeqSBAIJ, 1196 MatGetValues_SeqSBAIJ, 1197 0, 1198 MatPrintHelp_SeqSBAIJ, 1199 MatScale_SeqSBAIJ, 1200 0, 1201 0, 1202 0, 1203 MatGetBlockSize_SeqSBAIJ, 1204 MatGetRowIJ_SeqSBAIJ, 1205 MatRestoreRowIJ_SeqSBAIJ, 1206 0, 1207 0, 1208 0, 1209 0, 1210 0, 1211 0, 1212 MatSetValuesBlocked_SeqSBAIJ, 1213 MatGetSubMatrix_SeqSBAIJ, 1214 0, 1215 0, 1216 MatGetPetscMaps_Petsc, 1217 0, 1218 0, 1219 0, 1220 0, 1221 0, 1222 0, 1223 MatGetRowMax_SeqSBAIJ}; 1224 1225 EXTERN_C_BEGIN 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1228 int MatStoreValues_SeqSBAIJ(Mat mat) 1229 { 1230 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1231 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1232 int ierr; 1233 1234 PetscFunctionBegin; 1235 if (aij->nonew != 1) { 1236 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1237 } 1238 1239 /* allocate space for values if not already there */ 1240 if (!aij->saved_values) { 1241 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1242 } 1243 1244 /* copy values over */ 1245 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 EXTERN_C_END 1249 1250 EXTERN_C_BEGIN 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1253 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1254 { 1255 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1256 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1257 1258 PetscFunctionBegin; 1259 if (aij->nonew != 1) { 1260 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1261 } 1262 if (!aij->saved_values) { 1263 SETERRQ(1,"Must call MatStoreValues(A);first"); 1264 } 1265 1266 /* copy values over */ 1267 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 EXTERN_C_END 1271 1272 EXTERN_C_BEGIN 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1275 int MatCreate_SeqSBAIJ(Mat B) 1276 { 1277 Mat_SeqSBAIJ *b; 1278 int ierr,size; 1279 1280 PetscFunctionBegin; 1281 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1282 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1283 B->m = B->M = PetscMax(B->m,B->M); 1284 B->n = B->N = PetscMax(B->n,B->N); 1285 1286 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1287 B->data = (void*)b; 1288 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1289 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1290 B->ops->destroy = MatDestroy_SeqSBAIJ; 1291 B->ops->view = MatView_SeqSBAIJ; 1292 B->factor = 0; 1293 B->lupivotthreshold = 1.0; 1294 B->mapping = 0; 1295 b->row = 0; 1296 b->icol = 0; 1297 b->reallocs = 0; 1298 b->saved_values = 0; 1299 1300 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1301 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1302 1303 b->sorted = PETSC_FALSE; 1304 b->roworiented = PETSC_TRUE; 1305 b->nonew = 0; 1306 b->diag = 0; 1307 b->solve_work = 0; 1308 b->mult_work = 0; 1309 b->spptr = 0; 1310 b->keepzeroedrows = PETSC_FALSE; 1311 1312 b->inew = 0; 1313 b->jnew = 0; 1314 b->anew = 0; 1315 b->a2anew = 0; 1316 b->permute = PETSC_FALSE; 1317 1318 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1319 "MatStoreValues_SeqSBAIJ", 1320 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1321 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1322 "MatRetrieveValues_SeqSBAIJ", 1323 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1324 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1325 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1326 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 EXTERN_C_END 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1333 /*@C 1334 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1335 compressed row) format. For good matrix assembly performance the 1336 user should preallocate the matrix storage by setting the parameter nz 1337 (or the array nnz). By setting these parameters accurately, performance 1338 during matrix assembly can be increased by more than a factor of 50. 1339 1340 Collective on Mat 1341 1342 Input Parameters: 1343 + A - the symmetric matrix 1344 . bs - size of block 1345 . nz - number of block nonzeros per block row (same for all rows) 1346 - nnz - array containing the number of block nonzeros in the various block rows 1347 (possibly different for each block row) or PETSC_NULL 1348 1349 Options Database Keys: 1350 . -mat_no_unroll - uses code that does not unroll the loops in the 1351 block calculations (much slower) 1352 . -mat_block_size - size of the blocks to use 1353 1354 Level: intermediate 1355 1356 Notes: 1357 The block AIJ format is compatible with standard Fortran 77 1358 storage. That is, the stored row and column indices can begin at 1359 either one (as in Fortran) or zero. See the users' manual for details. 1360 1361 Specify the preallocated storage with either nz or nnz (not both). 1362 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1363 allocation. For additional details, see the users manual chapter on 1364 matrices. 1365 1366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1367 @*/ 1368 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1369 { 1370 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1371 int i,len,ierr,mbs,bs2; 1372 PetscTruth flg; 1373 int s_nz; 1374 1375 PetscFunctionBegin; 1376 B->preallocated = PETSC_TRUE; 1377 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1378 mbs = B->m/bs; 1379 bs2 = bs*bs; 1380 1381 if (mbs*bs != B->m) { 1382 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1383 } 1384 1385 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1386 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1387 if (nnz) { 1388 for (i=0; i<mbs; i++) { 1389 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1390 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1391 } 1392 } 1393 1394 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1395 if (!flg) { 1396 switch (bs) { 1397 case 1: 1398 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1399 B->ops->solve = MatSolve_SeqSBAIJ_1; 1400 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1401 B->ops->mult = MatMult_SeqSBAIJ_1; 1402 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1403 break; 1404 case 2: 1405 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1406 B->ops->solve = MatSolve_SeqSBAIJ_2; 1407 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1408 B->ops->mult = MatMult_SeqSBAIJ_2; 1409 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1410 break; 1411 case 3: 1412 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1413 B->ops->solve = MatSolve_SeqSBAIJ_3; 1414 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1415 B->ops->mult = MatMult_SeqSBAIJ_3; 1416 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1417 break; 1418 case 4: 1419 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1420 B->ops->solve = MatSolve_SeqSBAIJ_4; 1421 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1422 B->ops->mult = MatMult_SeqSBAIJ_4; 1423 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1424 break; 1425 case 5: 1426 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1427 B->ops->solve = MatSolve_SeqSBAIJ_5; 1428 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1429 B->ops->mult = MatMult_SeqSBAIJ_5; 1430 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1431 break; 1432 case 6: 1433 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1434 B->ops->solve = MatSolve_SeqSBAIJ_6; 1435 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1436 B->ops->mult = MatMult_SeqSBAIJ_6; 1437 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1438 break; 1439 case 7: 1440 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1441 B->ops->solve = MatSolve_SeqSBAIJ_7; 1442 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1443 B->ops->mult = MatMult_SeqSBAIJ_7; 1444 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1445 break; 1446 } 1447 } 1448 1449 b->mbs = mbs; 1450 b->nbs = mbs; 1451 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1452 if (!nnz) { 1453 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1454 else if (nz <= 0) nz = 1; 1455 for (i=0; i<mbs; i++) { 1456 b->imax[i] = nz; 1457 } 1458 nz = nz*mbs; /* total nz */ 1459 } else { 1460 nz = 0; 1461 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1462 } 1463 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1464 s_nz = nz; 1465 1466 /* allocate the matrix space */ 1467 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1468 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1469 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1470 b->j = (int*)(b->a + s_nz*bs2); 1471 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1472 b->i = b->j + s_nz; 1473 b->singlemalloc = PETSC_TRUE; 1474 1475 /* pointer to beginning of each row */ 1476 b->i[0] = 0; 1477 for (i=1; i<mbs+1; i++) { 1478 b->i[i] = b->i[i-1] + b->imax[i-1]; 1479 } 1480 1481 /* b->ilen will count nonzeros in each block row so far. */ 1482 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1483 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1484 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1485 1486 b->bs = bs; 1487 b->bs2 = bs2; 1488 b->s_nz = 0; 1489 b->s_maxnz = s_nz*bs2; 1490 1491 b->inew = 0; 1492 b->jnew = 0; 1493 b->anew = 0; 1494 b->a2anew = 0; 1495 b->permute = PETSC_FALSE; 1496 PetscFunctionReturn(0); 1497 } 1498 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatCreateSeqSBAIJ" 1502 /*@C 1503 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1504 compressed row) format. For good matrix assembly performance the 1505 user should preallocate the matrix storage by setting the parameter nz 1506 (or the array nnz). By setting these parameters accurately, performance 1507 during matrix assembly can be increased by more than a factor of 50. 1508 1509 Collective on MPI_Comm 1510 1511 Input Parameters: 1512 + comm - MPI communicator, set to PETSC_COMM_SELF 1513 . bs - size of block 1514 . m - number of rows, or number of columns 1515 . nz - number of block nonzeros per block row (same for all rows) 1516 - nnz - array containing the number of block nonzeros in the various block rows 1517 (possibly different for each block row) or PETSC_NULL 1518 1519 Output Parameter: 1520 . A - the symmetric matrix 1521 1522 Options Database Keys: 1523 . -mat_no_unroll - uses code that does not unroll the loops in the 1524 block calculations (much slower) 1525 . -mat_block_size - size of the blocks to use 1526 1527 Level: intermediate 1528 1529 Notes: 1530 The block AIJ format is compatible with standard Fortran 77 1531 storage. That is, the stored row and column indices can begin at 1532 either one (as in Fortran) or zero. See the users' manual for details. 1533 1534 Specify the preallocated storage with either nz or nnz (not both). 1535 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1536 allocation. For additional details, see the users manual chapter on 1537 matrices. 1538 1539 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1540 @*/ 1541 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1542 { 1543 int ierr; 1544 1545 PetscFunctionBegin; 1546 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1547 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1548 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1554 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1555 { 1556 Mat C; 1557 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1558 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1559 1560 PetscFunctionBegin; 1561 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1562 1563 *B = 0; 1564 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1565 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1566 c = (Mat_SeqSBAIJ*)C->data; 1567 1568 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1569 C->preallocated = PETSC_TRUE; 1570 C->factor = A->factor; 1571 c->row = 0; 1572 c->icol = 0; 1573 c->saved_values = 0; 1574 c->keepzeroedrows = a->keepzeroedrows; 1575 C->assembled = PETSC_TRUE; 1576 1577 c->bs = a->bs; 1578 c->bs2 = a->bs2; 1579 c->mbs = a->mbs; 1580 c->nbs = a->nbs; 1581 1582 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1583 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1584 for (i=0; i<mbs; i++) { 1585 c->imax[i] = a->imax[i]; 1586 c->ilen[i] = a->ilen[i]; 1587 } 1588 1589 /* allocate the matrix space */ 1590 c->singlemalloc = PETSC_TRUE; 1591 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1592 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1593 c->j = (int*)(c->a + nz*bs2); 1594 c->i = c->j + nz; 1595 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1596 if (mbs > 0) { 1597 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1598 if (cpvalues == MAT_COPY_VALUES) { 1599 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1600 } else { 1601 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1602 } 1603 } 1604 1605 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1606 c->sorted = a->sorted; 1607 c->roworiented = a->roworiented; 1608 c->nonew = a->nonew; 1609 1610 if (a->diag) { 1611 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1612 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1613 for (i=0; i<mbs; i++) { 1614 c->diag[i] = a->diag[i]; 1615 } 1616 } else c->diag = 0; 1617 c->s_nz = a->s_nz; 1618 c->s_maxnz = a->s_maxnz; 1619 c->solve_work = 0; 1620 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1621 c->mult_work = 0; 1622 *B = C; 1623 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1624 PetscFunctionReturn(0); 1625 } 1626 1627 EXTERN_C_BEGIN 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1630 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1631 { 1632 Mat_SeqSBAIJ *a; 1633 Mat B; 1634 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1635 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount; 1636 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1637 int *masked,nmask,tmp,bs2,ishift; 1638 PetscScalar *aa; 1639 MPI_Comm comm = ((PetscObject)viewer)->comm; 1640 1641 PetscFunctionBegin; 1642 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1643 bs2 = bs*bs; 1644 1645 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1646 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1647 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1648 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1649 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1650 M = header[1]; N = header[2]; nz = header[3]; 1651 1652 if (header[3] < 0) { 1653 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1654 } 1655 1656 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1657 1658 /* 1659 This code adds extra rows to make sure the number of rows is 1660 divisible by the blocksize 1661 */ 1662 mbs = M/bs; 1663 extra_rows = bs - M + bs*(mbs); 1664 if (extra_rows == bs) extra_rows = 0; 1665 else mbs++; 1666 if (extra_rows) { 1667 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1668 } 1669 1670 /* read in row lengths */ 1671 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1672 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1673 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1674 1675 /* read in column indices */ 1676 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1677 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1678 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1679 1680 /* loop over row lengths determining block row lengths */ 1681 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1682 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1683 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1684 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1685 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1686 masked = mask + mbs; 1687 rowcount = 0; nzcount = 0; 1688 for (i=0; i<mbs; i++) { 1689 nmask = 0; 1690 for (j=0; j<bs; j++) { 1691 kmax = rowlengths[rowcount]; 1692 for (k=0; k<kmax; k++) { 1693 tmp = jj[nzcount++]/bs; /* block col. index */ 1694 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1695 } 1696 rowcount++; 1697 } 1698 s_browlengths[i] += nmask; 1699 browlengths[i] = 2*s_browlengths[i]; 1700 1701 /* zero out the mask elements we set */ 1702 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1703 } 1704 1705 /* create our matrix */ 1706 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1707 B = *A; 1708 a = (Mat_SeqSBAIJ*)B->data; 1709 1710 /* set matrix "i" values */ 1711 a->i[0] = 0; 1712 for (i=1; i<= mbs; i++) { 1713 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1714 a->ilen[i-1] = s_browlengths[i-1]; 1715 } 1716 a->s_nz = 0; 1717 for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i]; 1718 1719 /* read in nonzero values */ 1720 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1721 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1722 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1723 1724 /* set "a" and "j" values into matrix */ 1725 nzcount = 0; jcount = 0; 1726 for (i=0; i<mbs; i++) { 1727 nzcountb = nzcount; 1728 nmask = 0; 1729 for (j=0; j<bs; j++) { 1730 kmax = rowlengths[i*bs+j]; 1731 for (k=0; k<kmax; k++) { 1732 tmp = jj[nzcount++]/bs; /* block col. index */ 1733 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1734 } 1735 } 1736 /* sort the masked values */ 1737 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1738 1739 /* set "j" values into matrix */ 1740 maskcount = 1; 1741 for (j=0; j<nmask; j++) { 1742 a->j[jcount++] = masked[j]; 1743 mask[masked[j]] = maskcount++; 1744 } 1745 1746 /* set "a" values into matrix */ 1747 ishift = bs2*a->i[i]; 1748 for (j=0; j<bs; j++) { 1749 kmax = rowlengths[i*bs+j]; 1750 for (k=0; k<kmax; k++) { 1751 tmp = jj[nzcountb]/bs ; /* block col. index */ 1752 if (tmp >= i){ 1753 block = mask[tmp] - 1; 1754 point = jj[nzcountb] - bs*tmp; 1755 idx = ishift + bs2*block + j + bs*point; 1756 a->a[idx] = aa[nzcountb]; 1757 } 1758 nzcountb++; 1759 } 1760 } 1761 /* zero out the mask elements we set */ 1762 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1763 } 1764 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1765 1766 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1767 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1768 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1769 ierr = PetscFree(aa);CHKERRQ(ierr); 1770 ierr = PetscFree(jj);CHKERRQ(ierr); 1771 ierr = PetscFree(mask);CHKERRQ(ierr); 1772 1773 B->assembled = PETSC_TRUE; 1774 ierr = MatView_Private(B);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 EXTERN_C_END 1778 1779 1780 1781