1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 /* extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); */ 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 53 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 56 57 PetscFunctionBegin; 58 59 if (ia) { 60 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 61 } 62 63 #ifdef NEW 64 int ierr; 65 66 if (!ia) PetscFunctionReturn(0); 67 68 /* 69 if (symmetric) { 70 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 71 } else if (oshift == 1) { 72 int nz = a->i[n]; 73 for (i=0; i<nz; i++) a->j[i]++; 74 for (i=0; i<n+1; i++) a->i[i]++; 75 *ia = a->i; *ja = a->j; 76 } else { 77 */ 78 *ia = a->i; *ja = a->j; 79 /* } */ 80 #endif 81 *nn = a->mbs; 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 88 { 89 PetscFunctionBegin; 90 if (!ia) PetscFunctionReturn(0); 91 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 92 /* PetscFunctionReturn(0); */ 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 97 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 98 { 99 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 100 101 PetscFunctionBegin; 102 *bs = sbaij->bs; 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 int MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 int ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 116 #endif 117 ierr = PetscFree(a->a);CHKERRQ(ierr); 118 if (!a->singlemalloc) { 119 ierr = PetscFree(a->i);CHKERRQ(ierr); 120 ierr = PetscFree(a->j);CHKERRQ(ierr); 121 } 122 if (a->row) { 123 ierr = ISDestroy(a->row);CHKERRQ(ierr); 124 } 125 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 126 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 127 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 128 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 129 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 130 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 131 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 132 133 if (a->inew){ 134 ierr = PetscFree(a->inew);CHKERRQ(ierr); 135 a->inew = 0; 136 } 137 ierr = PetscFree(a);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 143 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 144 { 145 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 146 147 PetscFunctionBegin; 148 switch (op) { 149 case MAT_ROW_ORIENTED: 150 a->roworiented = PETSC_TRUE; 151 break; 152 case MAT_COLUMN_ORIENTED: 153 a->roworiented = PETSC_FALSE; 154 break; 155 case MAT_COLUMNS_SORTED: 156 a->sorted = PETSC_TRUE; 157 break; 158 case MAT_COLUMNS_UNSORTED: 159 a->sorted = PETSC_FALSE; 160 break; 161 case MAT_KEEP_ZEROED_ROWS: 162 a->keepzeroedrows = PETSC_TRUE; 163 break; 164 case MAT_NO_NEW_NONZERO_LOCATIONS: 165 a->nonew = 1; 166 break; 167 case MAT_NEW_NONZERO_LOCATION_ERR: 168 a->nonew = -1; 169 break; 170 case MAT_NEW_NONZERO_ALLOCATION_ERR: 171 a->nonew = -2; 172 break; 173 case MAT_YES_NEW_NONZERO_LOCATIONS: 174 a->nonew = 0; 175 break; 176 case MAT_ROWS_SORTED: 177 case MAT_ROWS_UNSORTED: 178 case MAT_YES_NEW_DIAGONALS: 179 case MAT_IGNORE_OFF_PROC_ENTRIES: 180 case MAT_USE_HASH_TABLE: 181 case MAT_USE_SINGLE_PRECISION_SOLVES: 182 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 183 break; 184 case MAT_NO_NEW_DIAGONALS: 185 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 198 MatScalar *aa,*aa_i; 199 PetscScalar *v_i; 200 201 PetscFunctionBegin; 202 bs = a->bs; 203 ai = a->i; 204 aj = a->j; 205 aa = a->a; 206 bs2 = a->bs2; 207 208 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 209 210 bn = row/bs; /* Block number */ 211 bp = row % bs; /* Block position */ 212 M = ai[bn+1] - ai[bn]; 213 *ncols = bs*M; 214 215 if (v) { 216 *v = 0; 217 if (*ncols) { 218 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 219 for (i=0; i<M; i++) { /* for each block in the block row */ 220 v_i = *v + i*bs; 221 aa_i = aa + bs2*(ai[bn] + i); 222 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 223 } 224 } 225 } 226 227 if (cols) { 228 *cols = 0; 229 if (*ncols) { 230 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 231 for (i=0; i<M; i++) { /* for each block in the block row */ 232 cols_i = *cols + i*bs; 233 itmp = bs*aj[ai[bn] + i]; 234 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 235 } 236 } 237 } 238 239 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 240 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 241 #ifdef column_search 242 v_i = *v + M*bs; 243 cols_i = *cols + M*bs; 244 for (i=0; i<bn; i++){ /* for each block row */ 245 M = ai[i+1] - ai[i]; 246 for (j=0; j<M; j++){ 247 itmp = aj[ai[i] + j]; /* block column value */ 248 if (itmp == bn){ 249 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 250 for (k=0; k<bs; k++) { 251 *cols_i++ = i*bs+k; 252 *v_i++ = aa_i[k]; 253 } 254 *ncols += bs; 255 break; 256 } 257 } 258 } 259 #endif 260 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 267 { 268 int ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 279 { 280 int ierr; 281 PetscFunctionBegin; 282 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 288 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 292 PetscScalar *aa; 293 FILE *file; 294 295 PetscFunctionBegin; 296 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 297 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 298 col_lens[0] = MAT_FILE_COOKIE; 299 300 col_lens[1] = A->m; 301 col_lens[2] = A->m; 302 col_lens[3] = a->s_nz*bs2; 303 304 /* store lengths of each row and write (including header) to file */ 305 count = 0; 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 309 } 310 } 311 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 312 ierr = PetscFree(col_lens);CHKERRQ(ierr); 313 314 /* store column indices (zero start index) */ 315 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 316 count = 0; 317 for (i=0; i<a->mbs; i++) { 318 for (j=0; j<bs; j++) { 319 for (k=a->i[i]; k<a->i[i+1]; k++) { 320 for (l=0; l<bs; l++) { 321 jj[count++] = bs*a->j[k] + l; 322 } 323 } 324 } 325 } 326 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 327 ierr = PetscFree(jj);CHKERRQ(ierr); 328 329 /* store nonzero values */ 330 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 331 count = 0; 332 for (i=0; i<a->mbs; i++) { 333 for (j=0; j<bs; j++) { 334 for (k=a->i[i]; k<a->i[i+1]; k++) { 335 for (l=0; l<bs; l++) { 336 aa[count++] = a->a[bs2*k + l*bs + j]; 337 } 338 } 339 } 340 } 341 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 342 ierr = PetscFree(aa);CHKERRQ(ierr); 343 344 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 345 if (file) { 346 fprintf(file,"-matload_block_size %d\n",a->bs); 347 } 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 353 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 354 { 355 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 356 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 357 char *name; 358 PetscViewerFormat format; 359 360 PetscFunctionBegin; 361 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 362 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 363 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 364 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 365 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 366 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 367 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 368 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 369 for (i=0; i<a->mbs; i++) { 370 for (j=0; j<bs; j++) { 371 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 372 for (k=a->i[i]; k<a->i[i+1]; k++) { 373 for (l=0; l<bs; l++) { 374 #if defined(PETSC_USE_COMPLEX) 375 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 376 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 377 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 379 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 380 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 381 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 383 } 384 #else 385 if (a->a[bs2*k + l*bs + j] != 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 387 } 388 #endif 389 } 390 } 391 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 395 } else { 396 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 397 for (i=0; i<a->mbs; i++) { 398 for (j=0; j<bs; j++) { 399 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 400 for (k=a->i[i]; k<a->i[i+1]; k++) { 401 for (l=0; l<bs; l++) { 402 #if defined(PETSC_USE_COMPLEX) 403 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 404 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 405 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 406 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 407 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 408 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 409 } else { 410 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 411 } 412 #else 413 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 414 #endif 415 } 416 } 417 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 418 } 419 } 420 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 421 } 422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 428 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 429 { 430 Mat A = (Mat) Aa; 431 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 432 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 433 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 434 MatScalar *aa; 435 MPI_Comm comm; 436 PetscViewer viewer; 437 438 PetscFunctionBegin; 439 /* 440 This is nasty. If this is called from an originally parallel matrix 441 then all processes call this,but only the first has the matrix so the 442 rest should return immediately. 443 */ 444 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 445 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 446 if (rank) PetscFunctionReturn(0); 447 448 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 449 450 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 451 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 452 453 /* loop over matrix elements drawing boxes */ 454 color = PETSC_DRAW_BLUE; 455 for (i=0,row=0; i<mbs; i++,row+=bs) { 456 for (j=a->i[i]; j<a->i[i+1]; j++) { 457 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 458 x_l = a->j[j]*bs; x_r = x_l + 1.0; 459 aa = a->a + j*bs2; 460 for (k=0; k<bs; k++) { 461 for (l=0; l<bs; l++) { 462 if (PetscRealPart(*aa++) >= 0.) continue; 463 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 464 } 465 } 466 } 467 } 468 color = PETSC_DRAW_CYAN; 469 for (i=0,row=0; i<mbs; i++,row+=bs) { 470 for (j=a->i[i]; j<a->i[i+1]; j++) { 471 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 472 x_l = a->j[j]*bs; x_r = x_l + 1.0; 473 aa = a->a + j*bs2; 474 for (k=0; k<bs; k++) { 475 for (l=0; l<bs; l++) { 476 if (PetscRealPart(*aa++) != 0.) continue; 477 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 478 } 479 } 480 } 481 } 482 483 color = PETSC_DRAW_RED; 484 for (i=0,row=0; i<mbs; i++,row+=bs) { 485 for (j=a->i[i]; j<a->i[i+1]; j++) { 486 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 487 x_l = a->j[j]*bs; x_r = x_l + 1.0; 488 aa = a->a + j*bs2; 489 for (k=0; k<bs; k++) { 490 for (l=0; l<bs; l++) { 491 if (PetscRealPart(*aa++) <= 0.) continue; 492 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 493 } 494 } 495 } 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 502 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 503 { 504 int ierr; 505 PetscReal xl,yl,xr,yr,w,h; 506 PetscDraw draw; 507 PetscTruth isnull; 508 509 PetscFunctionBegin; 510 511 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 512 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 513 514 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 515 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 516 xr += w; yr += h; xl = -w; yl = -h; 517 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 518 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 519 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatView_SeqSBAIJ" 525 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 526 { 527 int ierr; 528 PetscTruth issocket,isascii,isbinary,isdraw; 529 530 PetscFunctionBegin; 531 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 532 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 533 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 534 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 535 if (issocket) { 536 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 537 } else if (isascii){ 538 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 539 } else if (isbinary) { 540 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 541 } else if (isdraw) { 542 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 543 } else { 544 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 545 } 546 PetscFunctionReturn(0); 547 } 548 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 552 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 553 { 554 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 555 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 556 int *ai = a->i,*ailen = a->ilen; 557 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 558 MatScalar *ap,*aa = a->a,zero = 0.0; 559 560 PetscFunctionBegin; 561 for (k=0; k<m; k++) { /* loop over rows */ 562 row = im[k]; brow = row/bs; 563 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 564 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 565 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 566 nrow = ailen[brow]; 567 for (l=0; l<n; l++) { /* loop over columns */ 568 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 569 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 570 col = in[l] ; 571 bcol = col/bs; 572 cidx = col%bs; 573 ridx = row%bs; 574 high = nrow; 575 low = 0; /* assume unsorted */ 576 while (high-low > 5) { 577 t = (low+high)/2; 578 if (rp[t] > bcol) high = t; 579 else low = t; 580 } 581 for (i=low; i<high; i++) { 582 if (rp[i] > bcol) break; 583 if (rp[i] == bcol) { 584 *v++ = ap[bs2*i+bs*cidx+ridx]; 585 goto finished; 586 } 587 } 588 *v++ = zero; 589 finished:; 590 } 591 } 592 PetscFunctionReturn(0); 593 } 594 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 598 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 599 { 600 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 601 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 602 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 603 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 604 PetscTruth roworiented=a->roworiented; 605 MatScalar *value = v,*ap,*aa = a->a,*bap; 606 607 PetscFunctionBegin; 608 if (roworiented) { 609 stepval = (n-1)*bs; 610 } else { 611 stepval = (m-1)*bs; 612 } 613 for (k=0; k<m; k++) { /* loop over added rows */ 614 row = im[k]; 615 if (row < 0) continue; 616 #if defined(PETSC_USE_BOPT_g) 617 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 618 #endif 619 rp = aj + ai[row]; 620 ap = aa + bs2*ai[row]; 621 rmax = imax[row]; 622 nrow = ailen[row]; 623 low = 0; 624 for (l=0; l<n; l++) { /* loop over added columns */ 625 if (in[l] < 0) continue; 626 #if defined(PETSC_USE_BOPT_g) 627 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 628 #endif 629 col = in[l]; 630 if (col < row) continue; /* ignore lower triangular block */ 631 if (roworiented) { 632 value = v + k*(stepval+bs)*bs + l*bs; 633 } else { 634 value = v + l*(stepval+bs)*bs + k*bs; 635 } 636 if (!sorted) low = 0; high = nrow; 637 while (high-low > 7) { 638 t = (low+high)/2; 639 if (rp[t] > col) high = t; 640 else low = t; 641 } 642 for (i=low; i<high; i++) { 643 if (rp[i] > col) break; 644 if (rp[i] == col) { 645 bap = ap + bs2*i; 646 if (roworiented) { 647 if (is == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=ii; jj<bs2; jj+=bs) { 650 bap[jj] += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=ii; jj<bs2; jj+=bs) { 656 bap[jj] = *value++; 657 } 658 } 659 } 660 } else { 661 if (is == ADD_VALUES) { 662 for (ii=0; ii<bs; ii++,value+=stepval) { 663 for (jj=0; jj<bs; jj++) { 664 *bap++ += *value++; 665 } 666 } 667 } else { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=0; jj<bs; jj++) { 670 *bap++ = *value++; 671 } 672 } 673 } 674 } 675 goto noinsert2; 676 } 677 } 678 if (nonew == 1) goto noinsert2; 679 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 680 if (nrow >= rmax) { 681 /* there is no extra room in row, therefore enlarge */ 682 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 683 MatScalar *new_a; 684 685 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 686 687 /* malloc new storage space */ 688 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 689 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 690 new_j = (int*)(new_a + bs2*new_nz); 691 new_i = new_j + new_nz; 692 693 /* copy over old data into new slots */ 694 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 695 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 696 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 697 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 698 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 699 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 700 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 701 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 702 /* free up old matrix storage */ 703 ierr = PetscFree(a->a);CHKERRQ(ierr); 704 if (!a->singlemalloc) { 705 ierr = PetscFree(a->i);CHKERRQ(ierr); 706 ierr = PetscFree(a->j);CHKERRQ(ierr); 707 } 708 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 709 a->singlemalloc = PETSC_TRUE; 710 711 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 712 rmax = imax[row] = imax[row] + CHUNKSIZE; 713 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 714 a->s_maxnz += bs2*CHUNKSIZE; 715 a->reallocs++; 716 a->s_nz++; 717 } 718 N = nrow++ - 1; 719 /* shift up all the later entries in this row */ 720 for (ii=N; ii>=i; ii--) { 721 rp[ii+1] = rp[ii]; 722 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 if (N >= i) { 725 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 726 } 727 rp[i] = col; 728 bap = ap + bs2*i; 729 if (roworiented) { 730 for (ii=0; ii<bs; ii++,value+=stepval) { 731 for (jj=ii; jj<bs2; jj+=bs) { 732 bap[jj] = *value++; 733 } 734 } 735 } else { 736 for (ii=0; ii<bs; ii++,value+=stepval) { 737 for (jj=0; jj<bs; jj++) { 738 *bap++ = *value++; 739 } 740 } 741 } 742 noinsert2:; 743 low = i; 744 } 745 ailen[row] = nrow; 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 752 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 753 { 754 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 755 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 756 int m = A->m,*ip,N,*ailen = a->ilen; 757 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 758 MatScalar *aa = a->a,*ap; 759 #if defined(PETSC_HAVE_SPOOLES) 760 PetscTruth flag; 761 #endif 762 763 PetscFunctionBegin; 764 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 765 766 if (m) rmax = ailen[0]; 767 for (i=1; i<mbs; i++) { 768 /* move each row back by the amount of empty slots (fshift) before it*/ 769 fshift += imax[i-1] - ailen[i-1]; 770 rmax = PetscMax(rmax,ailen[i]); 771 if (fshift) { 772 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 773 N = ailen[i]; 774 for (j=0; j<N; j++) { 775 ip[j-fshift] = ip[j]; 776 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 777 } 778 } 779 ai[i] = ai[i-1] + ailen[i-1]; 780 } 781 if (mbs) { 782 fshift += imax[mbs-1] - ailen[mbs-1]; 783 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 784 } 785 /* reset ilen and imax for each row */ 786 for (i=0; i<mbs; i++) { 787 ailen[i] = imax[i] = ai[i+1] - ai[i]; 788 } 789 a->s_nz = ai[mbs]; 790 791 /* diagonals may have moved, reset it */ 792 if (a->diag) { 793 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 794 } 795 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 796 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 797 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 798 a->reallocs); 799 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 800 a->reallocs = 0; 801 A->info.nz_unneeded = (PetscReal)fshift*bs2; 802 803 #if defined(PETSC_HAVE_SPOOLES) 804 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 805 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 806 #endif 807 808 PetscFunctionReturn(0); 809 } 810 811 /* 812 This function returns an array of flags which indicate the locations of contiguous 813 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 814 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 815 Assume: sizes should be long enough to hold all the values. 816 */ 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 819 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 820 { 821 int i,j,k,row; 822 PetscTruth flg; 823 824 PetscFunctionBegin; 825 for (i=0,j=0; i<n; j++) { 826 row = idx[i]; 827 if (row%bs!=0) { /* Not the begining of a block */ 828 sizes[j] = 1; 829 i++; 830 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 831 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 832 i++; 833 } else { /* Begining of the block, so check if the complete block exists */ 834 flg = PETSC_TRUE; 835 for (k=1; k<bs; k++) { 836 if (row+k != idx[i+k]) { /* break in the block */ 837 flg = PETSC_FALSE; 838 break; 839 } 840 } 841 if (flg == PETSC_TRUE) { /* No break in the bs */ 842 sizes[j] = bs; 843 i+= bs; 844 } else { 845 sizes[j] = 1; 846 i++; 847 } 848 } 849 } 850 *bs_max = j; 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 856 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 857 { 858 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 859 int ierr,i,j,k,count,is_n,*is_idx,*rows; 860 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 861 PetscScalar zero = 0.0; 862 MatScalar *aa; 863 864 PetscFunctionBegin; 865 /* Make a copy of the IS and sort it */ 866 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 867 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 868 869 /* allocate memory for rows,sizes */ 870 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 871 sizes = rows + is_n; 872 873 /* initialize copy IS values to rows, and sort them */ 874 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 875 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 876 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 877 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 878 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 879 } else { 880 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 881 } 882 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 883 884 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 885 row = rows[j]; /* row to be zeroed */ 886 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 887 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 888 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 889 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 890 if (diag) { 891 if (sbaij->ilen[row/bs] > 0) { 892 sbaij->ilen[row/bs] = 1; 893 sbaij->j[sbaij->i[row/bs]] = row/bs; 894 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 895 } 896 /* Now insert all the diagoanl values for this bs */ 897 for (k=0; k<bs; k++) { 898 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 899 } 900 } else { /* (!diag) */ 901 sbaij->ilen[row/bs] = 0; 902 } /* end (!diag) */ 903 } else { /* (sizes[i] != bs), broken block */ 904 #if defined (PETSC_USE_DEBUG) 905 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 906 #endif 907 for (k=0; k<count; k++) { 908 aa[0] = zero; 909 aa+=bs; 910 } 911 if (diag) { 912 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 913 } 914 } 915 } 916 917 ierr = PetscFree(rows);CHKERRQ(ierr); 918 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 923 /* Only add/insert a(i,j) with i<=j (blocks). 924 Any a(i,j) with i>j input by user is ingored. 925 */ 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 929 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 930 { 931 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 932 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 933 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 934 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 935 int ridx,cidx,bs2=a->bs2,ierr; 936 MatScalar *ap,value,*aa=a->a,*bap; 937 938 PetscFunctionBegin; 939 940 for (k=0; k<m; k++) { /* loop over added rows */ 941 row = im[k]; /* row number */ 942 brow = row/bs; /* block row number */ 943 if (row < 0) continue; 944 #if defined(PETSC_USE_BOPT_g) 945 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 946 #endif 947 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 948 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 949 rmax = imax[brow]; /* maximum space allocated for this row */ 950 nrow = ailen[brow]; /* actual length of this row */ 951 low = 0; 952 953 for (l=0; l<n; l++) { /* loop over added columns */ 954 if (in[l] < 0) continue; 955 #if defined(PETSC_USE_BOPT_g) 956 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 957 #endif 958 col = in[l]; 959 bcol = col/bs; /* block col number */ 960 961 if (brow <= bcol){ 962 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 963 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 964 /* element value a(k,l) */ 965 if (roworiented) { 966 value = v[l + k*n]; 967 } else { 968 value = v[k + l*m]; 969 } 970 971 /* move pointer bap to a(k,l) quickly and add/insert value */ 972 if (!sorted) low = 0; high = nrow; 973 while (high-low > 7) { 974 t = (low+high)/2; 975 if (rp[t] > bcol) high = t; 976 else low = t; 977 } 978 for (i=low; i<high; i++) { 979 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 980 if (rp[i] > bcol) break; 981 if (rp[i] == bcol) { 982 bap = ap + bs2*i + bs*cidx + ridx; 983 if (is == ADD_VALUES) *bap += value; 984 else *bap = value; 985 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 986 if (brow == bcol && ridx < cidx){ 987 bap = ap + bs2*i + bs*ridx + cidx; 988 if (is == ADD_VALUES) *bap += value; 989 else *bap = value; 990 } 991 goto noinsert1; 992 } 993 } 994 995 if (nonew == 1) goto noinsert1; 996 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 997 if (nrow >= rmax) { 998 /* there is no extra room in row, therefore enlarge */ 999 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1000 MatScalar *new_a; 1001 1002 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1003 1004 /* Malloc new storage space */ 1005 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1006 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1007 new_j = (int*)(new_a + bs2*new_nz); 1008 new_i = new_j + new_nz; 1009 1010 /* copy over old data into new slots */ 1011 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1012 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1013 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1014 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1015 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1016 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1017 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1018 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1019 /* free up old matrix storage */ 1020 ierr = PetscFree(a->a);CHKERRQ(ierr); 1021 if (!a->singlemalloc) { 1022 ierr = PetscFree(a->i);CHKERRQ(ierr); 1023 ierr = PetscFree(a->j);CHKERRQ(ierr); 1024 } 1025 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1026 a->singlemalloc = PETSC_TRUE; 1027 1028 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1029 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1030 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1031 a->s_maxnz += bs2*CHUNKSIZE; 1032 a->reallocs++; 1033 a->s_nz++; 1034 } 1035 1036 N = nrow++ - 1; 1037 /* shift up all the later entries in this row */ 1038 for (ii=N; ii>=i; ii--) { 1039 rp[ii+1] = rp[ii]; 1040 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1041 } 1042 if (N>=i) { 1043 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1044 } 1045 rp[i] = bcol; 1046 ap[bs2*i + bs*cidx + ridx] = value; 1047 noinsert1:; 1048 low = i; 1049 /* } */ 1050 } 1051 } /* end of if .. if.. */ 1052 } /* end of loop over added columns */ 1053 ailen[brow] = nrow; 1054 } /* end of loop over added rows */ 1055 1056 PetscFunctionReturn(0); 1057 } 1058 1059 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 1060 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 1061 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 1062 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1063 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1064 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1065 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1066 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1067 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1068 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1069 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1070 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1071 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1072 extern int MatZeroEntries_SeqSBAIJ(Mat); 1073 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1074 1075 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1076 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1077 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1078 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1079 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1080 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1081 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1082 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1083 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1084 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1085 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1086 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1087 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1088 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1089 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1090 1091 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1092 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1093 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1094 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1095 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1096 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1097 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1098 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1099 1100 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1101 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1102 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1103 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1104 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1105 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1106 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1107 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1108 1109 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1110 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1111 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1112 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1113 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1114 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1115 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1116 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1117 1118 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1119 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1120 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1121 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1122 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1123 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1124 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1125 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1126 1127 #ifdef HAVE_MatICCFactor 1128 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1131 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 1132 { 1133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1134 Mat outA; 1135 int ierr; 1136 PetscTruth row_identity,col_identity; 1137 1138 PetscFunctionBegin; 1139 /* 1140 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1141 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1142 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1143 if (!row_identity || !col_identity) { 1144 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1145 } 1146 */ 1147 1148 outA = inA; 1149 inA->factor = FACTOR_CHOLESKY; 1150 1151 if (!a->diag) { 1152 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1153 } 1154 /* 1155 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1156 for ILU(0) factorization with natural ordering 1157 */ 1158 switch (a->bs) { 1159 case 1: 1160 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1161 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1162 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1163 case 2: 1164 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1165 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1166 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1167 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1168 break; 1169 case 3: 1170 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1171 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1172 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1173 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1174 break; 1175 case 4: 1176 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1177 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1178 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1179 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1180 break; 1181 case 5: 1182 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1183 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1184 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1185 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1186 break; 1187 case 6: 1188 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1189 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1190 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1191 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1192 break; 1193 case 7: 1194 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1195 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1196 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1197 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1198 break; 1199 default: 1200 a->row = row; 1201 a->icol = col; 1202 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1203 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1204 1205 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1206 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1207 PetscLogObjectParent(inA,a->icol); 1208 1209 if (!a->solve_work) { 1210 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1211 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1212 } 1213 } 1214 1215 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1216 1217 PetscFunctionReturn(0); 1218 } 1219 #endif 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1223 int MatPrintHelp_SeqSBAIJ(Mat A) 1224 { 1225 static PetscTruth called = PETSC_FALSE; 1226 MPI_Comm comm = A->comm; 1227 int ierr; 1228 1229 PetscFunctionBegin; 1230 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1231 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1232 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 EXTERN_C_BEGIN 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1239 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1240 { 1241 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1242 int i,nz,n; 1243 1244 PetscFunctionBegin; 1245 nz = baij->s_maxnz; 1246 n = mat->n; 1247 for (i=0; i<nz; i++) { 1248 baij->j[i] = indices[i]; 1249 } 1250 baij->s_nz = nz; 1251 for (i=0; i<n; i++) { 1252 baij->ilen[i] = baij->imax[i]; 1253 } 1254 1255 PetscFunctionReturn(0); 1256 } 1257 EXTERN_C_END 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1261 /*@ 1262 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1263 in the matrix. 1264 1265 Input Parameters: 1266 + mat - the SeqSBAIJ matrix 1267 - indices - the column indices 1268 1269 Level: advanced 1270 1271 Notes: 1272 This can be called if you have precomputed the nonzero structure of the 1273 matrix and want to provide it to the matrix object to improve the performance 1274 of the MatSetValues() operation. 1275 1276 You MUST have set the correct numbers of nonzeros per row in the call to 1277 MatCreateSeqSBAIJ(). 1278 1279 MUST be called before any calls to MatSetValues() 1280 1281 .seealso: MatCreateSeqSBAIJ 1282 @*/ 1283 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1284 { 1285 int ierr,(*f)(Mat,int *); 1286 1287 PetscFunctionBegin; 1288 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1289 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1290 if (f) { 1291 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1292 } else { 1293 SETERRQ(1,"Wrong type of matrix to set column indices"); 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1300 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1301 { 1302 int ierr; 1303 1304 PetscFunctionBegin; 1305 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #include "petscblaslapack.h" 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1312 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1313 { 1314 int ierr,one=1; 1315 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1316 1317 PetscFunctionBegin; 1318 if (str == SAME_NONZERO_PATTERN) { 1319 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1320 } else { 1321 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* -------------------------------------------------------------------*/ 1327 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1328 MatGetRow_SeqSBAIJ, 1329 MatRestoreRow_SeqSBAIJ, 1330 MatMult_SeqSBAIJ_N, 1331 MatMultAdd_SeqSBAIJ_N, 1332 MatMultTranspose_SeqSBAIJ, 1333 MatMultTransposeAdd_SeqSBAIJ, 1334 MatSolve_SeqSBAIJ_N, 1335 0, 1336 0, 1337 0, 1338 0, 1339 MatCholeskyFactor_SeqSBAIJ, 1340 MatRelax_SeqSBAIJ, 1341 MatTranspose_SeqSBAIJ, 1342 MatGetInfo_SeqSBAIJ, 1343 MatEqual_SeqSBAIJ, 1344 MatGetDiagonal_SeqSBAIJ, 1345 MatDiagonalScale_SeqSBAIJ, 1346 MatNorm_SeqSBAIJ, 1347 0, 1348 MatAssemblyEnd_SeqSBAIJ, 1349 0, 1350 MatSetOption_SeqSBAIJ, 1351 MatZeroEntries_SeqSBAIJ, 1352 MatZeroRows_SeqSBAIJ, 1353 0, 1354 0, 1355 MatCholeskyFactorSymbolic_SeqSBAIJ, 1356 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1357 MatSetUpPreallocation_SeqSBAIJ, 1358 0, 1359 MatICCFactorSymbolic_SeqSBAIJ, 1360 0, 1361 0, 1362 MatDuplicate_SeqSBAIJ, 1363 0, 1364 0, 1365 0, 1366 0, 1367 MatAXPY_SeqSBAIJ, 1368 MatGetSubMatrices_SeqSBAIJ, 1369 MatIncreaseOverlap_SeqSBAIJ, 1370 MatGetValues_SeqSBAIJ, 1371 0, 1372 MatPrintHelp_SeqSBAIJ, 1373 MatScale_SeqSBAIJ, 1374 0, 1375 0, 1376 0, 1377 MatGetBlockSize_SeqSBAIJ, 1378 MatGetRowIJ_SeqSBAIJ, 1379 MatRestoreRowIJ_SeqSBAIJ, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 MatSetValuesBlocked_SeqSBAIJ, 1387 MatGetSubMatrix_SeqSBAIJ, 1388 0, 1389 0, 1390 MatGetPetscMaps_Petsc, 1391 0, 1392 0, 1393 0, 1394 0, 1395 0, 1396 0, 1397 MatGetRowMax_SeqSBAIJ}; 1398 1399 EXTERN_C_BEGIN 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1402 int MatStoreValues_SeqSBAIJ(Mat mat) 1403 { 1404 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1405 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1406 int ierr; 1407 1408 PetscFunctionBegin; 1409 if (aij->nonew != 1) { 1410 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1411 } 1412 1413 /* allocate space for values if not already there */ 1414 if (!aij->saved_values) { 1415 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1416 } 1417 1418 /* copy values over */ 1419 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 EXTERN_C_END 1423 1424 EXTERN_C_BEGIN 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1427 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1428 { 1429 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1430 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1431 1432 PetscFunctionBegin; 1433 if (aij->nonew != 1) { 1434 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1435 } 1436 if (!aij->saved_values) { 1437 SETERRQ(1,"Must call MatStoreValues(A);first"); 1438 } 1439 1440 /* copy values over */ 1441 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 EXTERN_C_END 1445 1446 EXTERN_C_BEGIN 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1449 int MatCreate_SeqSBAIJ(Mat B) 1450 { 1451 Mat_SeqSBAIJ *b; 1452 int ierr,size; 1453 1454 PetscFunctionBegin; 1455 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1456 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1457 B->m = B->M = PetscMax(B->m,B->M); 1458 B->n = B->N = PetscMax(B->n,B->N); 1459 1460 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1461 B->data = (void*)b; 1462 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1463 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1464 B->ops->destroy = MatDestroy_SeqSBAIJ; 1465 B->ops->view = MatView_SeqSBAIJ; 1466 B->factor = 0; 1467 B->lupivotthreshold = 1.0; 1468 B->mapping = 0; 1469 b->row = 0; 1470 b->icol = 0; 1471 b->reallocs = 0; 1472 b->saved_values = 0; 1473 1474 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1475 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1476 1477 b->sorted = PETSC_FALSE; 1478 b->roworiented = PETSC_TRUE; 1479 b->nonew = 0; 1480 b->diag = 0; 1481 b->solve_work = 0; 1482 b->mult_work = 0; 1483 B->spptr = 0; 1484 b->keepzeroedrows = PETSC_FALSE; 1485 1486 b->inew = 0; 1487 b->jnew = 0; 1488 b->anew = 0; 1489 b->a2anew = 0; 1490 b->permute = PETSC_FALSE; 1491 1492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1493 "MatStoreValues_SeqSBAIJ", 1494 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1496 "MatRetrieveValues_SeqSBAIJ", 1497 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1499 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1500 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 EXTERN_C_END 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1507 /*@C 1508 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1509 compressed row) format. For good matrix assembly performance the 1510 user should preallocate the matrix storage by setting the parameter nz 1511 (or the array nnz). By setting these parameters accurately, performance 1512 during matrix assembly can be increased by more than a factor of 50. 1513 1514 Collective on Mat 1515 1516 Input Parameters: 1517 + A - the symmetric matrix 1518 . bs - size of block 1519 . nz - number of block nonzeros per block row (same for all rows) 1520 - nnz - array containing the number of block nonzeros in the upper triangular plus 1521 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1522 1523 Options Database Keys: 1524 . -mat_no_unroll - uses code that does not unroll the loops in the 1525 block calculations (much slower) 1526 . -mat_block_size - size of the blocks to use 1527 1528 Level: intermediate 1529 1530 Notes: 1531 Specify the preallocated storage with either nz or nnz (not both). 1532 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1533 allocation. For additional details, see the users manual chapter on 1534 matrices. 1535 1536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1537 @*/ 1538 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1539 { 1540 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1541 int i,len,ierr,mbs,bs2; 1542 PetscTruth flg; 1543 int s_nz; 1544 1545 PetscFunctionBegin; 1546 B->preallocated = PETSC_TRUE; 1547 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1548 mbs = B->m/bs; 1549 bs2 = bs*bs; 1550 1551 if (mbs*bs != B->m) { 1552 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1553 } 1554 1555 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1556 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1557 if (nnz) { 1558 for (i=0; i<mbs; i++) { 1559 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1560 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1561 } 1562 } 1563 1564 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1565 if (!flg) { 1566 switch (bs) { 1567 case 1: 1568 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1569 B->ops->solve = MatSolve_SeqSBAIJ_1; 1570 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1571 B->ops->mult = MatMult_SeqSBAIJ_1; 1572 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1573 break; 1574 case 2: 1575 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1576 B->ops->solve = MatSolve_SeqSBAIJ_2; 1577 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1578 B->ops->mult = MatMult_SeqSBAIJ_2; 1579 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1580 break; 1581 case 3: 1582 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1583 B->ops->solve = MatSolve_SeqSBAIJ_3; 1584 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1585 B->ops->mult = MatMult_SeqSBAIJ_3; 1586 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1587 break; 1588 case 4: 1589 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1590 B->ops->solve = MatSolve_SeqSBAIJ_4; 1591 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1592 B->ops->mult = MatMult_SeqSBAIJ_4; 1593 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1594 break; 1595 case 5: 1596 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1597 B->ops->solve = MatSolve_SeqSBAIJ_5; 1598 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1599 B->ops->mult = MatMult_SeqSBAIJ_5; 1600 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1601 break; 1602 case 6: 1603 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1604 B->ops->solve = MatSolve_SeqSBAIJ_6; 1605 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1606 B->ops->mult = MatMult_SeqSBAIJ_6; 1607 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1608 break; 1609 case 7: 1610 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1611 B->ops->solve = MatSolve_SeqSBAIJ_7; 1612 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1613 B->ops->mult = MatMult_SeqSBAIJ_7; 1614 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1615 break; 1616 } 1617 } 1618 1619 b->mbs = mbs; 1620 b->nbs = mbs; 1621 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1622 if (!nnz) { 1623 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1624 else if (nz <= 0) nz = 1; 1625 for (i=0; i<mbs; i++) { 1626 b->imax[i] = nz; 1627 } 1628 nz = nz*mbs; /* total nz */ 1629 } else { 1630 nz = 0; 1631 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1632 } 1633 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1634 s_nz = nz; 1635 1636 /* allocate the matrix space */ 1637 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1638 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1639 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1640 b->j = (int*)(b->a + s_nz*bs2); 1641 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1642 b->i = b->j + s_nz; 1643 b->singlemalloc = PETSC_TRUE; 1644 1645 /* pointer to beginning of each row */ 1646 b->i[0] = 0; 1647 for (i=1; i<mbs+1; i++) { 1648 b->i[i] = b->i[i-1] + b->imax[i-1]; 1649 } 1650 1651 /* b->ilen will count nonzeros in each block row so far. */ 1652 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1653 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1654 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1655 1656 b->bs = bs; 1657 b->bs2 = bs2; 1658 b->s_nz = 0; 1659 b->s_maxnz = s_nz*bs2; 1660 1661 b->inew = 0; 1662 b->jnew = 0; 1663 b->anew = 0; 1664 b->a2anew = 0; 1665 b->permute = PETSC_FALSE; 1666 PetscFunctionReturn(0); 1667 } 1668 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatCreateSeqSBAIJ" 1672 /*@C 1673 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1674 compressed row) format. For good matrix assembly performance the 1675 user should preallocate the matrix storage by setting the parameter nz 1676 (or the array nnz). By setting these parameters accurately, performance 1677 during matrix assembly can be increased by more than a factor of 50. 1678 1679 Collective on MPI_Comm 1680 1681 Input Parameters: 1682 + comm - MPI communicator, set to PETSC_COMM_SELF 1683 . bs - size of block 1684 . m - number of rows, or number of columns 1685 . nz - number of block nonzeros per block row (same for all rows) 1686 - nnz - array containing the number of block nonzeros in the upper triangular plus 1687 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1688 1689 Output Parameter: 1690 . A - the symmetric matrix 1691 1692 Options Database Keys: 1693 . -mat_no_unroll - uses code that does not unroll the loops in the 1694 block calculations (much slower) 1695 . -mat_block_size - size of the blocks to use 1696 1697 Level: intermediate 1698 1699 Notes: 1700 1701 Specify the preallocated storage with either nz or nnz (not both). 1702 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1703 allocation. For additional details, see the users manual chapter on 1704 matrices. 1705 1706 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1707 @*/ 1708 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1709 { 1710 int ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1714 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1715 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1721 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1722 { 1723 Mat C; 1724 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1725 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1726 1727 PetscFunctionBegin; 1728 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1729 1730 *B = 0; 1731 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1732 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1733 c = (Mat_SeqSBAIJ*)C->data; 1734 1735 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1736 C->preallocated = PETSC_TRUE; 1737 C->factor = A->factor; 1738 c->row = 0; 1739 c->icol = 0; 1740 c->saved_values = 0; 1741 c->keepzeroedrows = a->keepzeroedrows; 1742 C->assembled = PETSC_TRUE; 1743 1744 c->bs = a->bs; 1745 c->bs2 = a->bs2; 1746 c->mbs = a->mbs; 1747 c->nbs = a->nbs; 1748 1749 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1750 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1751 for (i=0; i<mbs; i++) { 1752 c->imax[i] = a->imax[i]; 1753 c->ilen[i] = a->ilen[i]; 1754 } 1755 1756 /* allocate the matrix space */ 1757 c->singlemalloc = PETSC_TRUE; 1758 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1759 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1760 c->j = (int*)(c->a + nz*bs2); 1761 c->i = c->j + nz; 1762 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1763 if (mbs > 0) { 1764 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1765 if (cpvalues == MAT_COPY_VALUES) { 1766 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1767 } else { 1768 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1769 } 1770 } 1771 1772 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1773 c->sorted = a->sorted; 1774 c->roworiented = a->roworiented; 1775 c->nonew = a->nonew; 1776 1777 if (a->diag) { 1778 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1779 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1780 for (i=0; i<mbs; i++) { 1781 c->diag[i] = a->diag[i]; 1782 } 1783 } else c->diag = 0; 1784 c->s_nz = a->s_nz; 1785 c->s_maxnz = a->s_maxnz; 1786 c->solve_work = 0; 1787 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1788 c->mult_work = 0; 1789 *B = C; 1790 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 EXTERN_C_BEGIN 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1797 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1798 { 1799 Mat_SeqSBAIJ *a; 1800 Mat B; 1801 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1802 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1803 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1804 int *masked,nmask,tmp,bs2,ishift; 1805 PetscScalar *aa; 1806 MPI_Comm comm = ((PetscObject)viewer)->comm; 1807 1808 PetscFunctionBegin; 1809 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1810 bs2 = bs*bs; 1811 1812 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1813 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1814 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1815 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1816 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1817 M = header[1]; N = header[2]; nz = header[3]; 1818 1819 if (header[3] < 0) { 1820 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1821 } 1822 1823 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1824 1825 /* 1826 This code adds extra rows to make sure the number of rows is 1827 divisible by the blocksize 1828 */ 1829 mbs = M/bs; 1830 extra_rows = bs - M + bs*(mbs); 1831 if (extra_rows == bs) extra_rows = 0; 1832 else mbs++; 1833 if (extra_rows) { 1834 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1835 } 1836 1837 /* read in row lengths */ 1838 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1839 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1840 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1841 1842 /* read in column indices */ 1843 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1844 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1845 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1846 1847 /* loop over row lengths determining block row lengths */ 1848 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1849 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1850 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1851 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1852 masked = mask + mbs; 1853 rowcount = 0; nzcount = 0; 1854 for (i=0; i<mbs; i++) { 1855 nmask = 0; 1856 for (j=0; j<bs; j++) { 1857 kmax = rowlengths[rowcount]; 1858 for (k=0; k<kmax; k++) { 1859 tmp = jj[nzcount++]/bs; /* block col. index */ 1860 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1861 } 1862 rowcount++; 1863 } 1864 s_browlengths[i] += nmask; 1865 1866 /* zero out the mask elements we set */ 1867 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1868 } 1869 1870 /* create our matrix */ 1871 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1872 B = *A; 1873 a = (Mat_SeqSBAIJ*)B->data; 1874 1875 /* set matrix "i" values */ 1876 a->i[0] = 0; 1877 for (i=1; i<= mbs; i++) { 1878 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1879 a->ilen[i-1] = s_browlengths[i-1]; 1880 } 1881 a->s_nz = a->i[mbs]; 1882 1883 /* read in nonzero values */ 1884 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1885 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1886 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1887 1888 /* set "a" and "j" values into matrix */ 1889 nzcount = 0; jcount = 0; 1890 for (i=0; i<mbs; i++) { 1891 nzcountb = nzcount; 1892 nmask = 0; 1893 for (j=0; j<bs; j++) { 1894 kmax = rowlengths[i*bs+j]; 1895 for (k=0; k<kmax; k++) { 1896 tmp = jj[nzcount++]/bs; /* block col. index */ 1897 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1898 } 1899 } 1900 /* sort the masked values */ 1901 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1902 1903 /* set "j" values into matrix */ 1904 maskcount = 1; 1905 for (j=0; j<nmask; j++) { 1906 a->j[jcount++] = masked[j]; 1907 mask[masked[j]] = maskcount++; 1908 } 1909 1910 /* set "a" values into matrix */ 1911 ishift = bs2*a->i[i]; 1912 for (j=0; j<bs; j++) { 1913 kmax = rowlengths[i*bs+j]; 1914 for (k=0; k<kmax; k++) { 1915 tmp = jj[nzcountb]/bs ; /* block col. index */ 1916 if (tmp >= i){ 1917 block = mask[tmp] - 1; 1918 point = jj[nzcountb] - bs*tmp; 1919 idx = ishift + bs2*block + j + bs*point; 1920 a->a[idx] = aa[nzcountb]; 1921 } 1922 nzcountb++; 1923 } 1924 } 1925 /* zero out the mask elements we set */ 1926 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1927 } 1928 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1929 1930 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1931 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1932 ierr = PetscFree(aa);CHKERRQ(ierr); 1933 ierr = PetscFree(jj);CHKERRQ(ierr); 1934 ierr = PetscFree(mask);CHKERRQ(ierr); 1935 1936 B->assembled = PETSC_TRUE; 1937 ierr = MatView_Private(B);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 EXTERN_C_END 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1944 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1945 { 1946 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1947 MatScalar *aa=a->a,*v,*v1; 1948 PetscScalar *x,*b,*t,sum,d; 1949 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1950 int nz,nz1,*vj,*vj1,i; 1951 1952 PetscFunctionBegin; 1953 its = its*lits; 1954 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1955 1956 if (bs > 1) 1957 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1958 1959 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1960 if (xx != bb) { 1961 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1962 } else { 1963 b = x; 1964 } 1965 1966 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1967 1968 if (flag & SOR_ZERO_INITIAL_GUESS) { 1969 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1970 for (i=0; i<m; i++) 1971 t[i] = b[i]; 1972 1973 for (i=0; i<m; i++){ 1974 d = *(aa + ai[i]); /* diag[i] */ 1975 v = aa + ai[i] + 1; 1976 vj = aj + ai[i] + 1; 1977 nz = ai[i+1] - ai[i] - 1; 1978 x[i] = omega*t[i]/d; 1979 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1980 PetscLogFlops(2*nz-1); 1981 } 1982 } 1983 1984 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1985 for (i=0; i<m; i++) 1986 t[i] = b[i]; 1987 1988 for (i=0; i<m-1; i++){ /* update rhs */ 1989 v = aa + ai[i] + 1; 1990 vj = aj + ai[i] + 1; 1991 nz = ai[i+1] - ai[i] - 1; 1992 while (nz--) t[*vj++] -= x[i]*(*v++); 1993 PetscLogFlops(2*nz-1); 1994 } 1995 for (i=m-1; i>=0; i--){ 1996 d = *(aa + ai[i]); 1997 v = aa + ai[i] + 1; 1998 vj = aj + ai[i] + 1; 1999 nz = ai[i+1] - ai[i] - 1; 2000 sum = t[i]; 2001 while (nz--) sum -= x[*vj++]*(*v++); 2002 PetscLogFlops(2*nz-1); 2003 x[i] = (1-omega)*x[i] + omega*sum/d; 2004 } 2005 } 2006 its--; 2007 } 2008 2009 while (its--) { 2010 /* 2011 forward sweep: 2012 for i=0,...,m-1: 2013 sum[i] = (b[i] - U(i,:)x )/d[i]; 2014 x[i] = (1-omega)x[i] + omega*sum[i]; 2015 b = b - x[i]*U^T(i,:); 2016 2017 */ 2018 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2019 for (i=0; i<m; i++) 2020 t[i] = b[i]; 2021 2022 for (i=0; i<m; i++){ 2023 d = *(aa + ai[i]); /* diag[i] */ 2024 v = aa + ai[i] + 1; v1=v; 2025 vj = aj + ai[i] + 1; vj1=vj; 2026 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2027 sum = t[i]; 2028 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2029 x[i] = (1-omega)*x[i] + omega*sum/d; 2030 while (nz--) t[*vj++] -= x[i]*(*v++); 2031 PetscLogFlops(4*nz-2); 2032 } 2033 } 2034 2035 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2036 /* 2037 backward sweep: 2038 b = b - x[i]*U^T(i,:), i=0,...,n-2 2039 for i=m-1,...,0: 2040 sum[i] = (b[i] - U(i,:)x )/d[i]; 2041 x[i] = (1-omega)x[i] + omega*sum[i]; 2042 */ 2043 for (i=0; i<m; i++) 2044 t[i] = b[i]; 2045 2046 for (i=0; i<m-1; i++){ /* update rhs */ 2047 v = aa + ai[i] + 1; 2048 vj = aj + ai[i] + 1; 2049 nz = ai[i+1] - ai[i] - 1; 2050 while (nz--) t[*vj++] -= x[i]*(*v++); 2051 PetscLogFlops(2*nz-1); 2052 } 2053 for (i=m-1; i>=0; i--){ 2054 d = *(aa + ai[i]); 2055 v = aa + ai[i] + 1; 2056 vj = aj + ai[i] + 1; 2057 nz = ai[i+1] - ai[i] - 1; 2058 sum = t[i]; 2059 while (nz--) sum -= x[*vj++]*(*v++); 2060 PetscLogFlops(2*nz-1); 2061 x[i] = (1-omega)*x[i] + omega*sum/d; 2062 } 2063 } 2064 } 2065 2066 ierr = PetscFree(t); CHKERRQ(ierr); 2067 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2068 if (bb != xx) { 2069 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2070 } 2071 2072 PetscFunctionReturn(0); 2073 } 2074 2075 2076 2077 2078 2079 2080