1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 560 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 565 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 MatScalar *value = v,*ap,*aa = a->a,*bap; 602 603 PetscFunctionBegin; 604 if (roworiented) { 605 stepval = (n-1)*bs; 606 } else { 607 stepval = (m-1)*bs; 608 } 609 for (k=0; k<m; k++) { /* loop over added rows */ 610 row = im[k]; 611 if (row < 0) continue; 612 #if defined(PETSC_USE_BOPT_g) 613 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 614 #endif 615 rp = aj + ai[row]; 616 ap = aa + bs2*ai[row]; 617 rmax = imax[row]; 618 nrow = ailen[row]; 619 low = 0; 620 for (l=0; l<n; l++) { /* loop over added columns */ 621 if (in[l] < 0) continue; 622 #if defined(PETSC_USE_BOPT_g) 623 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 624 #endif 625 col = in[l]; 626 if (col < row) continue; /* ignore lower triangular block */ 627 if (roworiented) { 628 value = v + k*(stepval+bs)*bs + l*bs; 629 } else { 630 value = v + l*(stepval+bs)*bs + k*bs; 631 } 632 if (!sorted) low = 0; high = nrow; 633 while (high-low > 7) { 634 t = (low+high)/2; 635 if (rp[t] > col) high = t; 636 else low = t; 637 } 638 for (i=low; i<high; i++) { 639 if (rp[i] > col) break; 640 if (rp[i] == col) { 641 bap = ap + bs2*i; 642 if (roworiented) { 643 if (is == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=ii; jj<bs2; jj+=bs) { 646 bap[jj] += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=ii; jj<bs2; jj+=bs) { 652 bap[jj] = *value++; 653 } 654 } 655 } 656 } else { 657 if (is == ADD_VALUES) { 658 for (ii=0; ii<bs; ii++,value+=stepval) { 659 for (jj=0; jj<bs; jj++) { 660 *bap++ += *value++; 661 } 662 } 663 } else { 664 for (ii=0; ii<bs; ii++,value+=stepval) { 665 for (jj=0; jj<bs; jj++) { 666 *bap++ = *value++; 667 } 668 } 669 } 670 } 671 goto noinsert2; 672 } 673 } 674 if (nonew == 1) goto noinsert2; 675 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 676 if (nrow >= rmax) { 677 /* there is no extra room in row, therefore enlarge */ 678 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 679 MatScalar *new_a; 680 681 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 682 683 /* malloc new storage space */ 684 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 685 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 686 new_j = (int*)(new_a + bs2*new_nz); 687 new_i = new_j + new_nz; 688 689 /* copy over old data into new slots */ 690 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 691 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 692 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 693 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 694 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 695 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 698 /* free up old matrix storage */ 699 ierr = PetscFree(a->a);CHKERRQ(ierr); 700 if (!a->singlemalloc) { 701 ierr = PetscFree(a->i);CHKERRQ(ierr); 702 ierr = PetscFree(a->j);CHKERRQ(ierr); 703 } 704 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 705 a->singlemalloc = PETSC_TRUE; 706 707 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 708 rmax = imax[row] = imax[row] + CHUNKSIZE; 709 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 710 a->s_maxnz += bs2*CHUNKSIZE; 711 a->reallocs++; 712 a->s_nz++; 713 } 714 N = nrow++ - 1; 715 /* shift up all the later entries in this row */ 716 for (ii=N; ii>=i; ii--) { 717 rp[ii+1] = rp[ii]; 718 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 719 } 720 if (N >= i) { 721 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 rp[i] = col; 724 bap = ap + bs2*i; 725 if (roworiented) { 726 for (ii=0; ii<bs; ii++,value+=stepval) { 727 for (jj=ii; jj<bs2; jj+=bs) { 728 bap[jj] = *value++; 729 } 730 } 731 } else { 732 for (ii=0; ii<bs; ii++,value+=stepval) { 733 for (jj=0; jj<bs; jj++) { 734 *bap++ = *value++; 735 } 736 } 737 } 738 noinsert2:; 739 low = i; 740 } 741 ailen[row] = nrow; 742 } 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 748 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 749 { 750 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 751 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 752 int m = A->m,*ip,N,*ailen = a->ilen; 753 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 754 MatScalar *aa = a->a,*ap; 755 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 756 PetscTruth flag; 757 #endif 758 759 PetscFunctionBegin; 760 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 761 762 if (m) rmax = ailen[0]; 763 for (i=1; i<mbs; i++) { 764 /* move each row back by the amount of empty slots (fshift) before it*/ 765 fshift += imax[i-1] - ailen[i-1]; 766 rmax = PetscMax(rmax,ailen[i]); 767 if (fshift) { 768 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 769 N = ailen[i]; 770 for (j=0; j<N; j++) { 771 ip[j-fshift] = ip[j]; 772 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 773 } 774 } 775 ai[i] = ai[i-1] + ailen[i-1]; 776 } 777 if (mbs) { 778 fshift += imax[mbs-1] - ailen[mbs-1]; 779 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 780 } 781 /* reset ilen and imax for each row */ 782 for (i=0; i<mbs; i++) { 783 ailen[i] = imax[i] = ai[i+1] - ai[i]; 784 } 785 a->s_nz = ai[mbs]; 786 787 /* diagonals may have moved, reset it */ 788 if (a->diag) { 789 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 790 } 791 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 792 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 793 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 794 a->reallocs); 795 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 796 a->reallocs = 0; 797 A->info.nz_unneeded = (PetscReal)fshift*bs2; 798 799 #if defined(PETSC_HAVE_SPOOLES) 800 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 801 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 802 #endif 803 #if defined(PETSC_HAVE_MUMPS) 804 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 805 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 806 #endif 807 808 PetscFunctionReturn(0); 809 } 810 811 /* 812 This function returns an array of flags which indicate the locations of contiguous 813 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 814 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 815 Assume: sizes should be long enough to hold all the values. 816 */ 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 819 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 820 { 821 int i,j,k,row; 822 PetscTruth flg; 823 824 PetscFunctionBegin; 825 for (i=0,j=0; i<n; j++) { 826 row = idx[i]; 827 if (row%bs!=0) { /* Not the begining of a block */ 828 sizes[j] = 1; 829 i++; 830 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 831 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 832 i++; 833 } else { /* Begining of the block, so check if the complete block exists */ 834 flg = PETSC_TRUE; 835 for (k=1; k<bs; k++) { 836 if (row+k != idx[i+k]) { /* break in the block */ 837 flg = PETSC_FALSE; 838 break; 839 } 840 } 841 if (flg == PETSC_TRUE) { /* No break in the bs */ 842 sizes[j] = bs; 843 i+= bs; 844 } else { 845 sizes[j] = 1; 846 i++; 847 } 848 } 849 } 850 *bs_max = j; 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 856 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 857 { 858 PetscFunctionBegin; 859 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 860 } 861 862 /* Only add/insert a(i,j) with i<=j (blocks). 863 Any a(i,j) with i>j input by user is ingored. 864 */ 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 868 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 869 { 870 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 871 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 872 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 873 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 874 int ridx,cidx,bs2=a->bs2,ierr; 875 MatScalar *ap,value,*aa=a->a,*bap; 876 877 PetscFunctionBegin; 878 879 for (k=0; k<m; k++) { /* loop over added rows */ 880 row = im[k]; /* row number */ 881 brow = row/bs; /* block row number */ 882 if (row < 0) continue; 883 #if defined(PETSC_USE_BOPT_g) 884 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 885 #endif 886 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 887 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 888 rmax = imax[brow]; /* maximum space allocated for this row */ 889 nrow = ailen[brow]; /* actual length of this row */ 890 low = 0; 891 892 for (l=0; l<n; l++) { /* loop over added columns */ 893 if (in[l] < 0) continue; 894 #if defined(PETSC_USE_BOPT_g) 895 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 896 #endif 897 col = in[l]; 898 bcol = col/bs; /* block col number */ 899 900 if (brow <= bcol){ 901 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 902 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 903 /* element value a(k,l) */ 904 if (roworiented) { 905 value = v[l + k*n]; 906 } else { 907 value = v[k + l*m]; 908 } 909 910 /* move pointer bap to a(k,l) quickly and add/insert value */ 911 if (!sorted) low = 0; high = nrow; 912 while (high-low > 7) { 913 t = (low+high)/2; 914 if (rp[t] > bcol) high = t; 915 else low = t; 916 } 917 for (i=low; i<high; i++) { 918 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 919 if (rp[i] > bcol) break; 920 if (rp[i] == bcol) { 921 bap = ap + bs2*i + bs*cidx + ridx; 922 if (is == ADD_VALUES) *bap += value; 923 else *bap = value; 924 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 925 if (brow == bcol && ridx < cidx){ 926 bap = ap + bs2*i + bs*ridx + cidx; 927 if (is == ADD_VALUES) *bap += value; 928 else *bap = value; 929 } 930 goto noinsert1; 931 } 932 } 933 934 if (nonew == 1) goto noinsert1; 935 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 936 if (nrow >= rmax) { 937 /* there is no extra room in row, therefore enlarge */ 938 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 939 MatScalar *new_a; 940 941 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 942 943 /* Malloc new storage space */ 944 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 945 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 946 new_j = (int*)(new_a + bs2*new_nz); 947 new_i = new_j + new_nz; 948 949 /* copy over old data into new slots */ 950 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 951 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 952 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 953 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 954 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 955 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 957 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 958 /* free up old matrix storage */ 959 ierr = PetscFree(a->a);CHKERRQ(ierr); 960 if (!a->singlemalloc) { 961 ierr = PetscFree(a->i);CHKERRQ(ierr); 962 ierr = PetscFree(a->j);CHKERRQ(ierr); 963 } 964 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 965 a->singlemalloc = PETSC_TRUE; 966 967 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 968 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 969 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 970 a->s_maxnz += bs2*CHUNKSIZE; 971 a->reallocs++; 972 a->s_nz++; 973 } 974 975 N = nrow++ - 1; 976 /* shift up all the later entries in this row */ 977 for (ii=N; ii>=i; ii--) { 978 rp[ii+1] = rp[ii]; 979 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 if (N>=i) { 982 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 983 } 984 rp[i] = bcol; 985 ap[bs2*i + bs*cidx + ridx] = value; 986 noinsert1:; 987 low = i; 988 /* } */ 989 } 990 } /* end of if .. if.. */ 991 } /* end of loop over added columns */ 992 ailen[brow] = nrow; 993 } /* end of loop over added rows */ 994 995 PetscFunctionReturn(0); 996 } 997 998 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 999 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 1000 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 1001 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1002 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1003 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1004 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1005 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1006 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1007 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1008 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1009 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1010 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1011 extern int MatZeroEntries_SeqSBAIJ(Mat); 1012 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1013 1014 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1018 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1019 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1020 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1021 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1025 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1026 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1027 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1028 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1029 1030 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1031 1032 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1036 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1037 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1038 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1039 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1040 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1048 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1049 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1050 1051 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1055 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1056 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1057 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1058 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1059 1060 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1064 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1065 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1066 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1067 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1068 1069 #ifdef HAVE_MatICCFactor 1070 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1073 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1074 { 1075 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1076 Mat outA; 1077 int ierr; 1078 PetscTruth row_identity,col_identity; 1079 1080 PetscFunctionBegin; 1081 /* 1082 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1083 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1084 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1085 if (!row_identity || !col_identity) { 1086 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1087 } 1088 */ 1089 1090 outA = inA; 1091 inA->factor = FACTOR_CHOLESKY; 1092 1093 if (!a->diag) { 1094 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1095 } 1096 /* 1097 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1098 for ILU(0) factorization with natural ordering 1099 */ 1100 switch (a->bs) { 1101 case 1: 1102 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1103 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1104 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1105 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1106 case 2: 1107 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1108 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1109 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1110 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1111 break; 1112 case 3: 1113 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1114 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1115 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1116 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1117 break; 1118 case 4: 1119 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1120 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1121 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1122 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1123 break; 1124 case 5: 1125 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1126 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1127 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1128 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1129 break; 1130 case 6: 1131 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1132 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1133 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1134 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1135 break; 1136 case 7: 1137 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1138 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1139 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1140 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1141 break; 1142 default: 1143 a->row = row; 1144 a->icol = col; 1145 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1146 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1147 1148 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1149 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1150 PetscLogObjectParent(inA,a->icol); 1151 1152 if (!a->solve_work) { 1153 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1154 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1155 } 1156 } 1157 1158 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1159 1160 PetscFunctionReturn(0); 1161 } 1162 #endif 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1166 int MatPrintHelp_SeqSBAIJ(Mat A) 1167 { 1168 static PetscTruth called = PETSC_FALSE; 1169 MPI_Comm comm = A->comm; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1174 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1175 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 EXTERN_C_BEGIN 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1182 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1183 { 1184 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1185 int i,nz,n; 1186 1187 PetscFunctionBegin; 1188 nz = baij->s_maxnz; 1189 n = mat->n; 1190 for (i=0; i<nz; i++) { 1191 baij->j[i] = indices[i]; 1192 } 1193 baij->s_nz = nz; 1194 for (i=0; i<n; i++) { 1195 baij->ilen[i] = baij->imax[i]; 1196 } 1197 1198 PetscFunctionReturn(0); 1199 } 1200 EXTERN_C_END 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1204 /*@ 1205 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1206 in the matrix. 1207 1208 Input Parameters: 1209 + mat - the SeqSBAIJ matrix 1210 - indices - the column indices 1211 1212 Level: advanced 1213 1214 Notes: 1215 This can be called if you have precomputed the nonzero structure of the 1216 matrix and want to provide it to the matrix object to improve the performance 1217 of the MatSetValues() operation. 1218 1219 You MUST have set the correct numbers of nonzeros per row in the call to 1220 MatCreateSeqSBAIJ(). 1221 1222 MUST be called before any calls to MatSetValues() 1223 1224 .seealso: MatCreateSeqSBAIJ 1225 @*/ 1226 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1227 { 1228 int ierr,(*f)(Mat,int *); 1229 1230 PetscFunctionBegin; 1231 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1232 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1233 if (f) { 1234 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1235 } else { 1236 SETERRQ(1,"Wrong type of matrix to set column indices"); 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1243 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1244 { 1245 int ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1254 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1255 { 1256 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1257 PetscFunctionBegin; 1258 *array = a->a; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1264 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1265 { 1266 PetscFunctionBegin; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #include "petscblaslapack.h" 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1273 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1274 { 1275 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1276 int ierr,one=1,i,bs=y->bs,bs2,j; 1277 1278 PetscFunctionBegin; 1279 if (str == SAME_NONZERO_PATTERN) { 1280 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1281 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1282 if (y->xtoy && y->XtoY != X) { 1283 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1284 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1285 } 1286 if (!y->xtoy) { /* get xtoy */ 1287 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1288 y->XtoY = X; 1289 } 1290 bs2 = bs*bs; 1291 for (i=0; i<x->s_nz; i++) { 1292 j = 0; 1293 while (j < bs2){ 1294 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1295 j++; 1296 } 1297 } 1298 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1299 } else { 1300 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1301 } 1302 PetscFunctionReturn(0); 1303 } 1304 1305 /* -------------------------------------------------------------------*/ 1306 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1307 MatGetRow_SeqSBAIJ, 1308 MatRestoreRow_SeqSBAIJ, 1309 MatMult_SeqSBAIJ_N, 1310 MatMultAdd_SeqSBAIJ_N, 1311 MatMultTranspose_SeqSBAIJ, 1312 MatMultTransposeAdd_SeqSBAIJ, 1313 MatSolve_SeqSBAIJ_N, 1314 0, 1315 0, 1316 0, 1317 0, 1318 MatCholeskyFactor_SeqSBAIJ, 1319 MatRelax_SeqSBAIJ, 1320 MatTranspose_SeqSBAIJ, 1321 MatGetInfo_SeqSBAIJ, 1322 MatEqual_SeqSBAIJ, 1323 MatGetDiagonal_SeqSBAIJ, 1324 MatDiagonalScale_SeqSBAIJ, 1325 MatNorm_SeqSBAIJ, 1326 0, 1327 MatAssemblyEnd_SeqSBAIJ, 1328 0, 1329 MatSetOption_SeqSBAIJ, 1330 MatZeroEntries_SeqSBAIJ, 1331 MatZeroRows_SeqSBAIJ, 1332 0, 1333 0, 1334 MatCholeskyFactorSymbolic_SeqSBAIJ, 1335 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1336 MatSetUpPreallocation_SeqSBAIJ, 1337 0, 1338 MatICCFactorSymbolic_SeqSBAIJ, 1339 MatGetArray_SeqSBAIJ, 1340 MatRestoreArray_SeqSBAIJ, 1341 MatDuplicate_SeqSBAIJ, 1342 0, 1343 0, 1344 0, 1345 0, 1346 MatAXPY_SeqSBAIJ, 1347 MatGetSubMatrices_SeqSBAIJ, 1348 MatIncreaseOverlap_SeqSBAIJ, 1349 MatGetValues_SeqSBAIJ, 1350 0, 1351 MatPrintHelp_SeqSBAIJ, 1352 MatScale_SeqSBAIJ, 1353 0, 1354 0, 1355 0, 1356 MatGetBlockSize_SeqSBAIJ, 1357 MatGetRowIJ_SeqSBAIJ, 1358 MatRestoreRowIJ_SeqSBAIJ, 1359 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 MatSetValuesBlocked_SeqSBAIJ, 1366 MatGetSubMatrix_SeqSBAIJ, 1367 0, 1368 0, 1369 MatGetPetscMaps_Petsc, 1370 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 MatGetRowMax_SeqSBAIJ, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 #if !defined(PETSC_USE_COMPLEX) 1391 MatGetInertia_SeqSBAIJ 1392 #else 1393 0 1394 #endif 1395 }; 1396 1397 EXTERN_C_BEGIN 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1400 int MatStoreValues_SeqSBAIJ(Mat mat) 1401 { 1402 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1403 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1404 int ierr; 1405 1406 PetscFunctionBegin; 1407 if (aij->nonew != 1) { 1408 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1409 } 1410 1411 /* allocate space for values if not already there */ 1412 if (!aij->saved_values) { 1413 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1414 } 1415 1416 /* copy values over */ 1417 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 EXTERN_C_END 1421 1422 EXTERN_C_BEGIN 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1425 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1426 { 1427 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1428 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1429 1430 PetscFunctionBegin; 1431 if (aij->nonew != 1) { 1432 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1433 } 1434 if (!aij->saved_values) { 1435 SETERRQ(1,"Must call MatStoreValues(A);first"); 1436 } 1437 1438 /* copy values over */ 1439 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 EXTERN_C_END 1443 1444 EXTERN_C_BEGIN 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1447 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1448 { 1449 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1450 int i,len,ierr,mbs,bs2; 1451 PetscTruth flg; 1452 int s_nz; 1453 1454 PetscFunctionBegin; 1455 B->preallocated = PETSC_TRUE; 1456 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1457 mbs = B->m/bs; 1458 bs2 = bs*bs; 1459 1460 if (mbs*bs != B->m) { 1461 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1462 } 1463 1464 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1465 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1466 if (nnz) { 1467 for (i=0; i<mbs; i++) { 1468 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1469 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1470 } 1471 } 1472 1473 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1474 if (!flg) { 1475 switch (bs) { 1476 case 1: 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1478 B->ops->solve = MatSolve_SeqSBAIJ_1; 1479 B->ops->solves = MatSolves_SeqSBAIJ_1; 1480 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1481 B->ops->mult = MatMult_SeqSBAIJ_1; 1482 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1483 break; 1484 case 2: 1485 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1486 B->ops->solve = MatSolve_SeqSBAIJ_2; 1487 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1488 B->ops->mult = MatMult_SeqSBAIJ_2; 1489 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1490 break; 1491 case 3: 1492 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1493 B->ops->solve = MatSolve_SeqSBAIJ_3; 1494 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1495 B->ops->mult = MatMult_SeqSBAIJ_3; 1496 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1497 break; 1498 case 4: 1499 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1500 B->ops->solve = MatSolve_SeqSBAIJ_4; 1501 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1502 B->ops->mult = MatMult_SeqSBAIJ_4; 1503 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1504 break; 1505 case 5: 1506 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1507 B->ops->solve = MatSolve_SeqSBAIJ_5; 1508 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1509 B->ops->mult = MatMult_SeqSBAIJ_5; 1510 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1511 break; 1512 case 6: 1513 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1514 B->ops->solve = MatSolve_SeqSBAIJ_6; 1515 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1516 B->ops->mult = MatMult_SeqSBAIJ_6; 1517 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1518 break; 1519 case 7: 1520 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1521 B->ops->solve = MatSolve_SeqSBAIJ_7; 1522 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1523 B->ops->mult = MatMult_SeqSBAIJ_7; 1524 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1525 break; 1526 } 1527 } 1528 1529 b->mbs = mbs; 1530 b->nbs = mbs; 1531 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1532 if (!nnz) { 1533 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1534 else if (nz <= 0) nz = 1; 1535 for (i=0; i<mbs; i++) { 1536 b->imax[i] = nz; 1537 } 1538 nz = nz*mbs; /* total nz */ 1539 } else { 1540 nz = 0; 1541 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1542 } 1543 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1544 s_nz = nz; 1545 1546 /* allocate the matrix space */ 1547 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1548 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1549 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1550 b->j = (int*)(b->a + s_nz*bs2); 1551 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1552 b->i = b->j + s_nz; 1553 b->singlemalloc = PETSC_TRUE; 1554 1555 /* pointer to beginning of each row */ 1556 b->i[0] = 0; 1557 for (i=1; i<mbs+1; i++) { 1558 b->i[i] = b->i[i-1] + b->imax[i-1]; 1559 } 1560 1561 /* b->ilen will count nonzeros in each block row so far. */ 1562 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1563 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1564 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1565 1566 b->bs = bs; 1567 b->bs2 = bs2; 1568 b->s_nz = 0; 1569 b->s_maxnz = s_nz*bs2; 1570 1571 b->inew = 0; 1572 b->jnew = 0; 1573 b->anew = 0; 1574 b->a2anew = 0; 1575 b->permute = PETSC_FALSE; 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1583 int MatCreate_SeqSBAIJ(Mat B) 1584 { 1585 Mat_SeqSBAIJ *b; 1586 int ierr,size; 1587 1588 PetscFunctionBegin; 1589 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1590 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1591 B->m = B->M = PetscMax(B->m,B->M); 1592 B->n = B->N = PetscMax(B->n,B->N); 1593 1594 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1595 B->data = (void*)b; 1596 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1597 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1598 B->ops->destroy = MatDestroy_SeqSBAIJ; 1599 B->ops->view = MatView_SeqSBAIJ; 1600 B->factor = 0; 1601 B->lupivotthreshold = 1.0; 1602 B->mapping = 0; 1603 b->row = 0; 1604 b->icol = 0; 1605 b->reallocs = 0; 1606 b->saved_values = 0; 1607 1608 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1609 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1610 1611 b->sorted = PETSC_FALSE; 1612 b->roworiented = PETSC_TRUE; 1613 b->nonew = 0; 1614 b->diag = 0; 1615 b->solve_work = 0; 1616 b->mult_work = 0; 1617 B->spptr = 0; 1618 b->keepzeroedrows = PETSC_FALSE; 1619 b->xtoy = 0; 1620 b->XtoY = 0; 1621 1622 b->inew = 0; 1623 b->jnew = 0; 1624 b->anew = 0; 1625 b->a2anew = 0; 1626 b->permute = PETSC_FALSE; 1627 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1629 "MatStoreValues_SeqSBAIJ", 1630 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1632 "MatRetrieveValues_SeqSBAIJ", 1633 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1635 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1636 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1638 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1639 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1640 PetscFunctionReturn(0); 1641 } 1642 EXTERN_C_END 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1646 /*@C 1647 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1648 compressed row) format. For good matrix assembly performance the 1649 user should preallocate the matrix storage by setting the parameter nz 1650 (or the array nnz). By setting these parameters accurately, performance 1651 during matrix assembly can be increased by more than a factor of 50. 1652 1653 Collective on Mat 1654 1655 Input Parameters: 1656 + A - the symmetric matrix 1657 . bs - size of block 1658 . nz - number of block nonzeros per block row (same for all rows) 1659 - nnz - array containing the number of block nonzeros in the upper triangular plus 1660 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1661 1662 Options Database Keys: 1663 . -mat_no_unroll - uses code that does not unroll the loops in the 1664 block calculations (much slower) 1665 . -mat_block_size - size of the blocks to use 1666 1667 Level: intermediate 1668 1669 Notes: 1670 Specify the preallocated storage with either nz or nnz (not both). 1671 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1672 allocation. For additional details, see the users manual chapter on 1673 matrices. 1674 1675 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1676 @*/ 1677 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) { 1678 int ierr,(*f)(Mat,int,int,int *); 1679 1680 PetscFunctionBegin; 1681 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1682 if (f) { 1683 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatCreateSeqSBAIJ" 1690 /*@C 1691 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1692 compressed row) format. For good matrix assembly performance the 1693 user should preallocate the matrix storage by setting the parameter nz 1694 (or the array nnz). By setting these parameters accurately, performance 1695 during matrix assembly can be increased by more than a factor of 50. 1696 1697 Collective on MPI_Comm 1698 1699 Input Parameters: 1700 + comm - MPI communicator, set to PETSC_COMM_SELF 1701 . bs - size of block 1702 . m - number of rows, or number of columns 1703 . nz - number of block nonzeros per block row (same for all rows) 1704 - nnz - array containing the number of block nonzeros in the upper triangular plus 1705 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1706 1707 Output Parameter: 1708 . A - the symmetric matrix 1709 1710 Options Database Keys: 1711 . -mat_no_unroll - uses code that does not unroll the loops in the 1712 block calculations (much slower) 1713 . -mat_block_size - size of the blocks to use 1714 1715 Level: intermediate 1716 1717 Notes: 1718 1719 Specify the preallocated storage with either nz or nnz (not both). 1720 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1721 allocation. For additional details, see the users manual chapter on 1722 matrices. 1723 1724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1725 @*/ 1726 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1727 { 1728 int ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1732 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1733 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1739 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1740 { 1741 Mat C; 1742 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1743 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1744 1745 PetscFunctionBegin; 1746 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1747 1748 *B = 0; 1749 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1750 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1751 c = (Mat_SeqSBAIJ*)C->data; 1752 1753 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1754 C->preallocated = PETSC_TRUE; 1755 C->factor = A->factor; 1756 c->row = 0; 1757 c->icol = 0; 1758 c->saved_values = 0; 1759 c->keepzeroedrows = a->keepzeroedrows; 1760 C->assembled = PETSC_TRUE; 1761 1762 c->bs = a->bs; 1763 c->bs2 = a->bs2; 1764 c->mbs = a->mbs; 1765 c->nbs = a->nbs; 1766 1767 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1768 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1769 for (i=0; i<mbs; i++) { 1770 c->imax[i] = a->imax[i]; 1771 c->ilen[i] = a->ilen[i]; 1772 } 1773 1774 /* allocate the matrix space */ 1775 c->singlemalloc = PETSC_TRUE; 1776 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1777 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1778 c->j = (int*)(c->a + nz*bs2); 1779 c->i = c->j + nz; 1780 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1781 if (mbs > 0) { 1782 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1783 if (cpvalues == MAT_COPY_VALUES) { 1784 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1785 } else { 1786 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1787 } 1788 } 1789 1790 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1791 c->sorted = a->sorted; 1792 c->roworiented = a->roworiented; 1793 c->nonew = a->nonew; 1794 1795 if (a->diag) { 1796 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1797 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1798 for (i=0; i<mbs; i++) { 1799 c->diag[i] = a->diag[i]; 1800 } 1801 } else c->diag = 0; 1802 c->s_nz = a->s_nz; 1803 c->s_maxnz = a->s_maxnz; 1804 c->solve_work = 0; 1805 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1806 c->mult_work = 0; 1807 *B = C; 1808 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 EXTERN_C_BEGIN 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1815 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1816 { 1817 Mat_SeqSBAIJ *a; 1818 Mat B; 1819 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1820 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1821 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1822 int *masked,nmask,tmp,bs2,ishift; 1823 PetscScalar *aa; 1824 MPI_Comm comm = ((PetscObject)viewer)->comm; 1825 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 1826 PetscTruth flag; 1827 #endif 1828 1829 PetscFunctionBegin; 1830 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1831 bs2 = bs*bs; 1832 1833 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1834 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1835 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1836 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1837 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1838 M = header[1]; N = header[2]; nz = header[3]; 1839 1840 if (header[3] < 0) { 1841 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1842 } 1843 1844 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1845 1846 /* 1847 This code adds extra rows to make sure the number of rows is 1848 divisible by the blocksize 1849 */ 1850 mbs = M/bs; 1851 extra_rows = bs - M + bs*(mbs); 1852 if (extra_rows == bs) extra_rows = 0; 1853 else mbs++; 1854 if (extra_rows) { 1855 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1856 } 1857 1858 /* read in row lengths */ 1859 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1860 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1861 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1862 1863 /* read in column indices */ 1864 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1865 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1866 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1867 1868 /* loop over row lengths determining block row lengths */ 1869 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1870 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1871 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1872 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1873 masked = mask + mbs; 1874 rowcount = 0; nzcount = 0; 1875 for (i=0; i<mbs; i++) { 1876 nmask = 0; 1877 for (j=0; j<bs; j++) { 1878 kmax = rowlengths[rowcount]; 1879 for (k=0; k<kmax; k++) { 1880 tmp = jj[nzcount++]/bs; /* block col. index */ 1881 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1882 } 1883 rowcount++; 1884 } 1885 s_browlengths[i] += nmask; 1886 1887 /* zero out the mask elements we set */ 1888 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1889 } 1890 1891 /* create our matrix */ 1892 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1893 B = *A; 1894 a = (Mat_SeqSBAIJ*)B->data; 1895 1896 /* set matrix "i" values */ 1897 a->i[0] = 0; 1898 for (i=1; i<= mbs; i++) { 1899 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1900 a->ilen[i-1] = s_browlengths[i-1]; 1901 } 1902 a->s_nz = a->i[mbs]; 1903 1904 /* read in nonzero values */ 1905 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1906 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1907 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1908 1909 /* set "a" and "j" values into matrix */ 1910 nzcount = 0; jcount = 0; 1911 for (i=0; i<mbs; i++) { 1912 nzcountb = nzcount; 1913 nmask = 0; 1914 for (j=0; j<bs; j++) { 1915 kmax = rowlengths[i*bs+j]; 1916 for (k=0; k<kmax; k++) { 1917 tmp = jj[nzcount++]/bs; /* block col. index */ 1918 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1919 } 1920 } 1921 /* sort the masked values */ 1922 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1923 1924 /* set "j" values into matrix */ 1925 maskcount = 1; 1926 for (j=0; j<nmask; j++) { 1927 a->j[jcount++] = masked[j]; 1928 mask[masked[j]] = maskcount++; 1929 } 1930 1931 /* set "a" values into matrix */ 1932 ishift = bs2*a->i[i]; 1933 for (j=0; j<bs; j++) { 1934 kmax = rowlengths[i*bs+j]; 1935 for (k=0; k<kmax; k++) { 1936 tmp = jj[nzcountb]/bs ; /* block col. index */ 1937 if (tmp >= i){ 1938 block = mask[tmp] - 1; 1939 point = jj[nzcountb] - bs*tmp; 1940 idx = ishift + bs2*block + j + bs*point; 1941 a->a[idx] = aa[nzcountb]; 1942 } 1943 nzcountb++; 1944 } 1945 } 1946 /* zero out the mask elements we set */ 1947 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1948 } 1949 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1950 1951 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1952 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1953 ierr = PetscFree(aa);CHKERRQ(ierr); 1954 ierr = PetscFree(jj);CHKERRQ(ierr); 1955 ierr = PetscFree(mask);CHKERRQ(ierr); 1956 1957 B->assembled = PETSC_TRUE; 1958 #if defined(PETSC_HAVE_SPOOLES) 1959 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 1960 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); } 1961 #endif 1962 #if defined(PETSC_HAVE_MUMPS) 1963 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 1964 if (flag) { ierr = MatUseMUMPS_MPIAIJ(B);CHKERRQ(ierr); } 1965 #endif 1966 ierr = MatView_Private(B);CHKERRQ(ierr); 1967 PetscFunctionReturn(0); 1968 } 1969 EXTERN_C_END 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1973 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1974 { 1975 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1976 MatScalar *aa=a->a,*v,*v1; 1977 PetscScalar *x,*b,*t,sum,d; 1978 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1979 int nz,nz1,*vj,*vj1,i; 1980 1981 PetscFunctionBegin; 1982 its = its*lits; 1983 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1984 1985 if (bs > 1) 1986 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1987 1988 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1989 if (xx != bb) { 1990 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1991 } else { 1992 b = x; 1993 } 1994 1995 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1996 1997 if (flag & SOR_ZERO_INITIAL_GUESS) { 1998 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1999 for (i=0; i<m; i++) 2000 t[i] = b[i]; 2001 2002 for (i=0; i<m; i++){ 2003 d = *(aa + ai[i]); /* diag[i] */ 2004 v = aa + ai[i] + 1; 2005 vj = aj + ai[i] + 1; 2006 nz = ai[i+1] - ai[i] - 1; 2007 x[i] = omega*t[i]/d; 2008 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2009 PetscLogFlops(2*nz-1); 2010 } 2011 } 2012 2013 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2014 for (i=0; i<m; i++) 2015 t[i] = b[i]; 2016 2017 for (i=0; i<m-1; i++){ /* update rhs */ 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 while (nz--) t[*vj++] -= x[i]*(*v++); 2022 PetscLogFlops(2*nz-1); 2023 } 2024 for (i=m-1; i>=0; i--){ 2025 d = *(aa + ai[i]); 2026 v = aa + ai[i] + 1; 2027 vj = aj + ai[i] + 1; 2028 nz = ai[i+1] - ai[i] - 1; 2029 sum = t[i]; 2030 while (nz--) sum -= x[*vj++]*(*v++); 2031 PetscLogFlops(2*nz-1); 2032 x[i] = (1-omega)*x[i] + omega*sum/d; 2033 } 2034 } 2035 its--; 2036 } 2037 2038 while (its--) { 2039 /* 2040 forward sweep: 2041 for i=0,...,m-1: 2042 sum[i] = (b[i] - U(i,:)x )/d[i]; 2043 x[i] = (1-omega)x[i] + omega*sum[i]; 2044 b = b - x[i]*U^T(i,:); 2045 2046 */ 2047 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2048 for (i=0; i<m; i++) 2049 t[i] = b[i]; 2050 2051 for (i=0; i<m; i++){ 2052 d = *(aa + ai[i]); /* diag[i] */ 2053 v = aa + ai[i] + 1; v1=v; 2054 vj = aj + ai[i] + 1; vj1=vj; 2055 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2056 sum = t[i]; 2057 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2058 x[i] = (1-omega)*x[i] + omega*sum/d; 2059 while (nz--) t[*vj++] -= x[i]*(*v++); 2060 PetscLogFlops(4*nz-2); 2061 } 2062 } 2063 2064 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2065 /* 2066 backward sweep: 2067 b = b - x[i]*U^T(i,:), i=0,...,n-2 2068 for i=m-1,...,0: 2069 sum[i] = (b[i] - U(i,:)x )/d[i]; 2070 x[i] = (1-omega)x[i] + omega*sum[i]; 2071 */ 2072 for (i=0; i<m; i++) 2073 t[i] = b[i]; 2074 2075 for (i=0; i<m-1; i++){ /* update rhs */ 2076 v = aa + ai[i] + 1; 2077 vj = aj + ai[i] + 1; 2078 nz = ai[i+1] - ai[i] - 1; 2079 while (nz--) t[*vj++] -= x[i]*(*v++); 2080 PetscLogFlops(2*nz-1); 2081 } 2082 for (i=m-1; i>=0; i--){ 2083 d = *(aa + ai[i]); 2084 v = aa + ai[i] + 1; 2085 vj = aj + ai[i] + 1; 2086 nz = ai[i+1] - ai[i] - 1; 2087 sum = t[i]; 2088 while (nz--) sum -= x[*vj++]*(*v++); 2089 PetscLogFlops(2*nz-1); 2090 x[i] = (1-omega)*x[i] + omega*sum/d; 2091 } 2092 } 2093 } 2094 2095 ierr = PetscFree(t); CHKERRQ(ierr); 2096 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2097 if (bb != xx) { 2098 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2099 } 2100 2101 PetscFunctionReturn(0); 2102 } 2103 2104 2105 2106 2107 2108 2109