1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled_local,sse_enabled_global; 217 int ierr; 218 219 sse_enabled_local = PETSC_FALSE; 220 sse_enabled_global = PETSC_FALSE; 221 222 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 223 #if defined(PETSC_HAVE_SSE) 224 if (sse_enabled_local) { 225 a->single_precision_solves = PETSC_TRUE; 226 A->ops->solve = MatSolve_SeqBAIJ_Update; 227 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 228 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 229 break; 230 } else { 231 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 232 } 233 #else 234 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 235 #endif 236 } 237 break; 238 default: 239 SETERRQ(PETSC_ERR_SUP,"unknown option"); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatGetRow_SeqBAIJ" 246 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 247 { 248 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 249 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 250 MatScalar *aa,*aa_i; 251 PetscScalar *v_i; 252 253 PetscFunctionBegin; 254 bs = a->bs; 255 ai = a->i; 256 aj = a->j; 257 aa = a->a; 258 bs2 = a->bs2; 259 260 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 261 262 bn = row/bs; /* Block number */ 263 bp = row % bs; /* Block Position */ 264 M = ai[bn+1] - ai[bn]; 265 *nz = bs*M; 266 267 if (v) { 268 *v = 0; 269 if (*nz) { 270 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 271 for (i=0; i<M; i++) { /* for each block in the block row */ 272 v_i = *v + i*bs; 273 aa_i = aa + bs2*(ai[bn] + i); 274 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 275 } 276 } 277 } 278 279 if (idx) { 280 *idx = 0; 281 if (*nz) { 282 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 283 for (i=0; i<M; i++) { /* for each block in the block row */ 284 idx_i = *idx + i*bs; 285 itmp = bs*aj[ai[bn] + i]; 286 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 287 } 288 } 289 } 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 295 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 296 { 297 int ierr; 298 299 PetscFunctionBegin; 300 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 301 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatTranspose_SeqBAIJ" 307 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 308 { 309 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 310 Mat C; 311 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 312 int *rows,*cols,bs2=a->bs2; 313 PetscScalar *array; 314 315 PetscFunctionBegin; 316 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 317 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 318 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 319 320 #if defined(PETSC_USE_MAT_SINGLE) 321 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 322 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 323 #else 324 array = a->a; 325 #endif 326 327 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 328 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 329 ierr = PetscFree(col);CHKERRQ(ierr); 330 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 331 cols = rows + bs; 332 for (i=0; i<mbs; i++) { 333 cols[0] = i*bs; 334 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 335 len = ai[i+1] - ai[i]; 336 for (j=0; j<len; j++) { 337 rows[0] = (*aj++)*bs; 338 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 339 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 340 array += bs2; 341 } 342 } 343 ierr = PetscFree(rows);CHKERRQ(ierr); 344 #if defined(PETSC_USE_MAT_SINGLE) 345 ierr = PetscFree(array); 346 #endif 347 348 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 350 351 if (B) { 352 *B = C; 353 } else { 354 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 361 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 362 { 363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 364 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 365 PetscScalar *aa; 366 FILE *file; 367 368 PetscFunctionBegin; 369 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 370 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 371 col_lens[0] = MAT_COOKIE; 372 373 col_lens[1] = A->m; 374 col_lens[2] = A->n; 375 col_lens[3] = a->nz*bs2; 376 377 /* store lengths of each row and write (including header) to file */ 378 count = 0; 379 for (i=0; i<a->mbs; i++) { 380 for (j=0; j<bs; j++) { 381 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 382 } 383 } 384 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 385 ierr = PetscFree(col_lens);CHKERRQ(ierr); 386 387 /* store column indices (zero start index) */ 388 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 389 count = 0; 390 for (i=0; i<a->mbs; i++) { 391 for (j=0; j<bs; j++) { 392 for (k=a->i[i]; k<a->i[i+1]; k++) { 393 for (l=0; l<bs; l++) { 394 jj[count++] = bs*a->j[k] + l; 395 } 396 } 397 } 398 } 399 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 400 ierr = PetscFree(jj);CHKERRQ(ierr); 401 402 /* store nonzero values */ 403 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 404 count = 0; 405 for (i=0; i<a->mbs; i++) { 406 for (j=0; j<bs; j++) { 407 for (k=a->i[i]; k<a->i[i+1]; k++) { 408 for (l=0; l<bs; l++) { 409 aa[count++] = a->a[bs2*k + l*bs + j]; 410 } 411 } 412 } 413 } 414 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 415 ierr = PetscFree(aa);CHKERRQ(ierr); 416 417 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 418 if (file) { 419 fprintf(file,"-matload_block_size %d\n",a->bs); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 426 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 427 { 428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 429 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 430 PetscViewerFormat format; 431 432 PetscFunctionBegin; 433 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 434 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 435 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 436 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 437 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 438 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 439 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 440 for (i=0; i<a->mbs; i++) { 441 for (j=0; j<bs; j++) { 442 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 443 for (k=a->i[i]; k<a->i[i+1]; k++) { 444 for (l=0; l<bs; l++) { 445 #if defined(PETSC_USE_COMPLEX) 446 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 447 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 448 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 449 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 450 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 451 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 453 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 454 } 455 #else 456 if (a->a[bs2*k + l*bs + j] != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 458 } 459 #endif 460 } 461 } 462 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 466 } else { 467 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 468 for (i=0; i<a->mbs; i++) { 469 for (j=0; j<bs; j++) { 470 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 471 for (k=a->i[i]; k<a->i[i+1]; k++) { 472 for (l=0; l<bs; l++) { 473 #if defined(PETSC_USE_COMPLEX) 474 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 475 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 476 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 477 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 478 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 479 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } else { 481 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 482 } 483 #else 484 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 485 #endif 486 } 487 } 488 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 489 } 490 } 491 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 492 } 493 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 499 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 500 { 501 Mat A = (Mat) Aa; 502 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 503 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 504 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 505 MatScalar *aa; 506 PetscViewer viewer; 507 508 PetscFunctionBegin; 509 510 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 511 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 512 513 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 514 515 /* loop over matrix elements drawing boxes */ 516 color = PETSC_DRAW_BLUE; 517 for (i=0,row=0; i<mbs; i++,row+=bs) { 518 for (j=a->i[i]; j<a->i[i+1]; j++) { 519 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 520 x_l = a->j[j]*bs; x_r = x_l + 1.0; 521 aa = a->a + j*bs2; 522 for (k=0; k<bs; k++) { 523 for (l=0; l<bs; l++) { 524 if (PetscRealPart(*aa++) >= 0.) continue; 525 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 526 } 527 } 528 } 529 } 530 color = PETSC_DRAW_CYAN; 531 for (i=0,row=0; i<mbs; i++,row+=bs) { 532 for (j=a->i[i]; j<a->i[i+1]; j++) { 533 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 534 x_l = a->j[j]*bs; x_r = x_l + 1.0; 535 aa = a->a + j*bs2; 536 for (k=0; k<bs; k++) { 537 for (l=0; l<bs; l++) { 538 if (PetscRealPart(*aa++) != 0.) continue; 539 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 540 } 541 } 542 } 543 } 544 545 color = PETSC_DRAW_RED; 546 for (i=0,row=0; i<mbs; i++,row+=bs) { 547 for (j=a->i[i]; j<a->i[i+1]; j++) { 548 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 549 x_l = a->j[j]*bs; x_r = x_l + 1.0; 550 aa = a->a + j*bs2; 551 for (k=0; k<bs; k++) { 552 for (l=0; l<bs; l++) { 553 if (PetscRealPart(*aa++) <= 0.) continue; 554 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 564 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 565 { 566 int ierr; 567 PetscReal xl,yl,xr,yr,w,h; 568 PetscDraw draw; 569 PetscTruth isnull; 570 571 PetscFunctionBegin; 572 573 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 574 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 575 576 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 577 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 578 xr += w; yr += h; xl = -w; yl = -h; 579 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 580 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 581 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatView_SeqBAIJ" 587 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 588 { 589 int ierr; 590 PetscTruth issocket,isascii,isbinary,isdraw; 591 592 PetscFunctionBegin; 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 596 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 597 if (issocket) { 598 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 599 } else if (isascii){ 600 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 601 } else if (isbinary) { 602 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 603 } else if (isdraw) { 604 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 605 } else { 606 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatGetValues_SeqBAIJ" 614 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 615 { 616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 617 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 618 int *ai = a->i,*ailen = a->ilen; 619 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 620 MatScalar *ap,*aa = a->a,zero = 0.0; 621 622 PetscFunctionBegin; 623 for (k=0; k<m; k++) { /* loop over rows */ 624 row = im[k]; brow = row/bs; 625 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 626 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 627 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 628 nrow = ailen[brow]; 629 for (l=0; l<n; l++) { /* loop over columns */ 630 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 631 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 632 col = in[l] ; 633 bcol = col/bs; 634 cidx = col%bs; 635 ridx = row%bs; 636 high = nrow; 637 low = 0; /* assume unsorted */ 638 while (high-low > 5) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 *v++ = ap[bs2*i+bs*cidx+ridx]; 647 goto finished; 648 } 649 } 650 *v++ = zero; 651 finished:; 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #if defined(PETSC_USE_MAT_SINGLE) 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 660 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 661 { 662 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 663 int ierr,i,N = m*n*b->bs2; 664 MatScalar *vsingle; 665 666 PetscFunctionBegin; 667 if (N > b->setvalueslen) { 668 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 669 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 670 b->setvalueslen = N; 671 } 672 vsingle = b->setvaluescopy; 673 for (i=0; i<N; i++) { 674 vsingle[i] = v[i]; 675 } 676 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 #endif 680 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 684 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 685 { 686 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 687 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 688 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 689 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 690 PetscTruth roworiented=a->roworiented; 691 MatScalar *value = v,*ap,*aa = a->a,*bap; 692 693 PetscFunctionBegin; 694 if (roworiented) { 695 stepval = (n-1)*bs; 696 } else { 697 stepval = (m-1)*bs; 698 } 699 for (k=0; k<m; k++) { /* loop over added rows */ 700 row = im[k]; 701 if (row < 0) continue; 702 #if defined(PETSC_USE_BOPT_g) 703 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 704 #endif 705 rp = aj + ai[row]; 706 ap = aa + bs2*ai[row]; 707 rmax = imax[row]; 708 nrow = ailen[row]; 709 low = 0; 710 for (l=0; l<n; l++) { /* loop over added columns */ 711 if (in[l] < 0) continue; 712 #if defined(PETSC_USE_BOPT_g) 713 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 714 #endif 715 col = in[l]; 716 if (roworiented) { 717 value = v + k*(stepval+bs)*bs + l*bs; 718 } else { 719 value = v + l*(stepval+bs)*bs + k*bs; 720 } 721 if (!sorted) low = 0; high = nrow; 722 while (high-low > 7) { 723 t = (low+high)/2; 724 if (rp[t] > col) high = t; 725 else low = t; 726 } 727 for (i=low; i<high; i++) { 728 if (rp[i] > col) break; 729 if (rp[i] == col) { 730 bap = ap + bs2*i; 731 if (roworiented) { 732 if (is == ADD_VALUES) { 733 for (ii=0; ii<bs; ii++,value+=stepval) { 734 for (jj=ii; jj<bs2; jj+=bs) { 735 bap[jj] += *value++; 736 } 737 } 738 } else { 739 for (ii=0; ii<bs; ii++,value+=stepval) { 740 for (jj=ii; jj<bs2; jj+=bs) { 741 bap[jj] = *value++; 742 } 743 } 744 } 745 } else { 746 if (is == ADD_VALUES) { 747 for (ii=0; ii<bs; ii++,value+=stepval) { 748 for (jj=0; jj<bs; jj++) { 749 *bap++ += *value++; 750 } 751 } 752 } else { 753 for (ii=0; ii<bs; ii++,value+=stepval) { 754 for (jj=0; jj<bs; jj++) { 755 *bap++ = *value++; 756 } 757 } 758 } 759 } 760 goto noinsert2; 761 } 762 } 763 if (nonew == 1) goto noinsert2; 764 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 765 if (nrow >= rmax) { 766 /* there is no extra room in row, therefore enlarge */ 767 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 768 MatScalar *new_a; 769 770 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 771 772 /* malloc new storage space */ 773 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 774 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 775 new_j = (int*)(new_a + bs2*new_nz); 776 new_i = new_j + new_nz; 777 778 /* copy over old data into new slots */ 779 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 780 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 781 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 782 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 783 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 784 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 785 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 786 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 787 /* free up old matrix storage */ 788 ierr = PetscFree(a->a);CHKERRQ(ierr); 789 if (!a->singlemalloc) { 790 ierr = PetscFree(a->i);CHKERRQ(ierr); 791 ierr = PetscFree(a->j);CHKERRQ(ierr); 792 } 793 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 794 a->singlemalloc = PETSC_TRUE; 795 796 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 797 rmax = imax[row] = imax[row] + CHUNKSIZE; 798 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 799 a->maxnz += bs2*CHUNKSIZE; 800 a->reallocs++; 801 a->nz++; 802 } 803 N = nrow++ - 1; 804 /* shift up all the later entries in this row */ 805 for (ii=N; ii>=i; ii--) { 806 rp[ii+1] = rp[ii]; 807 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 808 } 809 if (N >= i) { 810 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 811 } 812 rp[i] = col; 813 bap = ap + bs2*i; 814 if (roworiented) { 815 for (ii=0; ii<bs; ii++,value+=stepval) { 816 for (jj=ii; jj<bs2; jj+=bs) { 817 bap[jj] = *value++; 818 } 819 } 820 } else { 821 for (ii=0; ii<bs; ii++,value+=stepval) { 822 for (jj=0; jj<bs; jj++) { 823 *bap++ = *value++; 824 } 825 } 826 } 827 noinsert2:; 828 low = i; 829 } 830 ailen[row] = nrow; 831 } 832 PetscFunctionReturn(0); 833 } 834 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 838 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 839 { 840 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 841 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 842 int m = A->m,*ip,N,*ailen = a->ilen; 843 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 844 MatScalar *aa = a->a,*ap; 845 846 PetscFunctionBegin; 847 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 848 849 if (m) rmax = ailen[0]; 850 for (i=1; i<mbs; i++) { 851 /* move each row back by the amount of empty slots (fshift) before it*/ 852 fshift += imax[i-1] - ailen[i-1]; 853 rmax = PetscMax(rmax,ailen[i]); 854 if (fshift) { 855 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 856 N = ailen[i]; 857 for (j=0; j<N; j++) { 858 ip[j-fshift] = ip[j]; 859 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 860 } 861 } 862 ai[i] = ai[i-1] + ailen[i-1]; 863 } 864 if (mbs) { 865 fshift += imax[mbs-1] - ailen[mbs-1]; 866 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 867 } 868 /* reset ilen and imax for each row */ 869 for (i=0; i<mbs; i++) { 870 ailen[i] = imax[i] = ai[i+1] - ai[i]; 871 } 872 a->nz = ai[mbs]; 873 874 /* diagonals may have moved, so kill the diagonal pointers */ 875 if (fshift && a->diag) { 876 ierr = PetscFree(a->diag);CHKERRQ(ierr); 877 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 878 a->diag = 0; 879 } 880 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 881 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 882 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 883 a->reallocs = 0; 884 A->info.nz_unneeded = (PetscReal)fshift*bs2; 885 886 PetscFunctionReturn(0); 887 } 888 889 890 891 /* 892 This function returns an array of flags which indicate the locations of contiguous 893 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 894 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 895 Assume: sizes should be long enough to hold all the values. 896 */ 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 899 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 900 { 901 int i,j,k,row; 902 PetscTruth flg; 903 904 PetscFunctionBegin; 905 for (i=0,j=0; i<n; j++) { 906 row = idx[i]; 907 if (row%bs!=0) { /* Not the begining of a block */ 908 sizes[j] = 1; 909 i++; 910 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 911 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 912 i++; 913 } else { /* Begining of the block, so check if the complete block exists */ 914 flg = PETSC_TRUE; 915 for (k=1; k<bs; k++) { 916 if (row+k != idx[i+k]) { /* break in the block */ 917 flg = PETSC_FALSE; 918 break; 919 } 920 } 921 if (flg == PETSC_TRUE) { /* No break in the bs */ 922 sizes[j] = bs; 923 i+= bs; 924 } else { 925 sizes[j] = 1; 926 i++; 927 } 928 } 929 } 930 *bs_max = j; 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 936 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 937 { 938 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 939 int ierr,i,j,k,count,is_n,*is_idx,*rows; 940 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 941 PetscScalar zero = 0.0; 942 MatScalar *aa; 943 944 PetscFunctionBegin; 945 /* Make a copy of the IS and sort it */ 946 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 947 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 948 949 /* allocate memory for rows,sizes */ 950 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 951 sizes = rows + is_n; 952 953 /* copy IS values to rows, and sort them */ 954 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 955 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 956 if (baij->keepzeroedrows) { 957 for (i=0; i<is_n; i++) { sizes[i] = 1; } 958 bs_max = is_n; 959 } else { 960 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 961 } 962 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 963 964 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 965 row = rows[j]; 966 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 967 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 968 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 969 if (sizes[i] == bs && !baij->keepzeroedrows) { 970 if (diag) { 971 if (baij->ilen[row/bs] > 0) { 972 baij->ilen[row/bs] = 1; 973 baij->j[baij->i[row/bs]] = row/bs; 974 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 975 } 976 /* Now insert all the diagonal values for this bs */ 977 for (k=0; k<bs; k++) { 978 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 979 } 980 } else { /* (!diag) */ 981 baij->ilen[row/bs] = 0; 982 } /* end (!diag) */ 983 } else { /* (sizes[i] != bs) */ 984 #if defined (PETSC_USE_DEBUG) 985 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 986 #endif 987 for (k=0; k<count; k++) { 988 aa[0] = zero; 989 aa += bs; 990 } 991 if (diag) { 992 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 993 } 994 } 995 } 996 997 ierr = PetscFree(rows);CHKERRQ(ierr); 998 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1004 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1005 { 1006 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1007 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1008 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1009 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1010 int ridx,cidx,bs2=a->bs2,ierr; 1011 PetscTruth roworiented=a->roworiented; 1012 MatScalar *ap,value,*aa=a->a,*bap; 1013 1014 PetscFunctionBegin; 1015 for (k=0; k<m; k++) { /* loop over added rows */ 1016 row = im[k]; brow = row/bs; 1017 if (row < 0) continue; 1018 #if defined(PETSC_USE_BOPT_g) 1019 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1020 #endif 1021 rp = aj + ai[brow]; 1022 ap = aa + bs2*ai[brow]; 1023 rmax = imax[brow]; 1024 nrow = ailen[brow]; 1025 low = 0; 1026 for (l=0; l<n; l++) { /* loop over added columns */ 1027 if (in[l] < 0) continue; 1028 #if defined(PETSC_USE_BOPT_g) 1029 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1030 #endif 1031 col = in[l]; bcol = col/bs; 1032 ridx = row % bs; cidx = col % bs; 1033 if (roworiented) { 1034 value = v[l + k*n]; 1035 } else { 1036 value = v[k + l*m]; 1037 } 1038 if (!sorted) low = 0; high = nrow; 1039 while (high-low > 7) { 1040 t = (low+high)/2; 1041 if (rp[t] > bcol) high = t; 1042 else low = t; 1043 } 1044 for (i=low; i<high; i++) { 1045 if (rp[i] > bcol) break; 1046 if (rp[i] == bcol) { 1047 bap = ap + bs2*i + bs*cidx + ridx; 1048 if (is == ADD_VALUES) *bap += value; 1049 else *bap = value; 1050 goto noinsert1; 1051 } 1052 } 1053 if (nonew == 1) goto noinsert1; 1054 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1055 if (nrow >= rmax) { 1056 /* there is no extra room in row, therefore enlarge */ 1057 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1058 MatScalar *new_a; 1059 1060 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1061 1062 /* Malloc new storage space */ 1063 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1064 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1065 new_j = (int*)(new_a + bs2*new_nz); 1066 new_i = new_j + new_nz; 1067 1068 /* copy over old data into new slots */ 1069 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1070 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1071 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1072 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1073 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1074 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1075 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1076 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1077 /* free up old matrix storage */ 1078 ierr = PetscFree(a->a);CHKERRQ(ierr); 1079 if (!a->singlemalloc) { 1080 ierr = PetscFree(a->i);CHKERRQ(ierr); 1081 ierr = PetscFree(a->j);CHKERRQ(ierr); 1082 } 1083 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1084 a->singlemalloc = PETSC_TRUE; 1085 1086 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1087 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1088 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1089 a->maxnz += bs2*CHUNKSIZE; 1090 a->reallocs++; 1091 a->nz++; 1092 } 1093 N = nrow++ - 1; 1094 /* shift up all the later entries in this row */ 1095 for (ii=N; ii>=i; ii--) { 1096 rp[ii+1] = rp[ii]; 1097 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1098 } 1099 if (N>=i) { 1100 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1101 } 1102 rp[i] = bcol; 1103 ap[bs2*i + bs*cidx + ridx] = value; 1104 noinsert1:; 1105 low = i; 1106 } 1107 ailen[brow] = nrow; 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1115 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1116 { 1117 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1118 Mat outA; 1119 int ierr; 1120 PetscTruth row_identity,col_identity; 1121 1122 PetscFunctionBegin; 1123 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1124 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1125 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1126 if (!row_identity || !col_identity) { 1127 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1128 } 1129 1130 outA = inA; 1131 inA->factor = FACTOR_LU; 1132 1133 if (!a->diag) { 1134 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1135 } 1136 1137 a->row = row; 1138 a->col = col; 1139 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1140 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1141 1142 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1143 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1144 PetscLogObjectParent(inA,a->icol); 1145 1146 /* 1147 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1148 for ILU(0) factorization with natural ordering 1149 */ 1150 if (a->bs < 8) { 1151 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1152 } else { 1153 if (!a->solve_work) { 1154 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1155 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1156 } 1157 } 1158 1159 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1160 1161 PetscFunctionReturn(0); 1162 } 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1165 int MatPrintHelp_SeqBAIJ(Mat A) 1166 { 1167 static PetscTruth called = PETSC_FALSE; 1168 MPI_Comm comm = A->comm; 1169 int ierr; 1170 1171 PetscFunctionBegin; 1172 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1173 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1174 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 EXTERN_C_BEGIN 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1181 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1182 { 1183 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1184 int i,nz,n; 1185 1186 PetscFunctionBegin; 1187 nz = baij->maxnz; 1188 n = mat->n; 1189 for (i=0; i<nz; i++) { 1190 baij->j[i] = indices[i]; 1191 } 1192 baij->nz = nz; 1193 for (i=0; i<n; i++) { 1194 baij->ilen[i] = baij->imax[i]; 1195 } 1196 1197 PetscFunctionReturn(0); 1198 } 1199 EXTERN_C_END 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1203 /*@ 1204 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1205 in the matrix. 1206 1207 Input Parameters: 1208 + mat - the SeqBAIJ matrix 1209 - indices - the column indices 1210 1211 Level: advanced 1212 1213 Notes: 1214 This can be called if you have precomputed the nonzero structure of the 1215 matrix and want to provide it to the matrix object to improve the performance 1216 of the MatSetValues() operation. 1217 1218 You MUST have set the correct numbers of nonzeros per row in the call to 1219 MatCreateSeqBAIJ(). 1220 1221 MUST be called before any calls to MatSetValues(); 1222 1223 @*/ 1224 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1225 { 1226 int ierr,(*f)(Mat,int *); 1227 1228 PetscFunctionBegin; 1229 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1230 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1231 if (f) { 1232 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1233 } else { 1234 SETERRQ(1,"Wrong type of matrix to set column indices"); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1241 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1242 { 1243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1244 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1245 PetscReal atmp; 1246 PetscScalar *x,zero = 0.0; 1247 MatScalar *aa; 1248 int ncols,brow,krow,kcol; 1249 1250 PetscFunctionBegin; 1251 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1252 bs = a->bs; 1253 aa = a->a; 1254 ai = a->i; 1255 aj = a->j; 1256 mbs = a->mbs; 1257 1258 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1259 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1260 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1261 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1262 for (i=0; i<mbs; i++) { 1263 ncols = ai[1] - ai[0]; ai++; 1264 brow = bs*i; 1265 for (j=0; j<ncols; j++){ 1266 /* bcol = bs*(*aj); */ 1267 for (kcol=0; kcol<bs; kcol++){ 1268 for (krow=0; krow<bs; krow++){ 1269 atmp = PetscAbsScalar(*aa); aa++; 1270 row = brow + krow; /* row index */ 1271 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1272 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1273 } 1274 } 1275 aj++; 1276 } 1277 } 1278 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1284 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1285 { 1286 int ierr; 1287 1288 PetscFunctionBegin; 1289 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1295 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1296 { 1297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1298 PetscFunctionBegin; 1299 *array = a->a; 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1305 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1306 { 1307 PetscFunctionBegin; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 /* -------------------------------------------------------------------*/ 1312 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1313 MatGetRow_SeqBAIJ, 1314 MatRestoreRow_SeqBAIJ, 1315 MatMult_SeqBAIJ_N, 1316 MatMultAdd_SeqBAIJ_N, 1317 MatMultTranspose_SeqBAIJ, 1318 MatMultTransposeAdd_SeqBAIJ, 1319 MatSolve_SeqBAIJ_N, 1320 0, 1321 0, 1322 0, 1323 MatLUFactor_SeqBAIJ, 1324 0, 1325 0, 1326 MatTranspose_SeqBAIJ, 1327 MatGetInfo_SeqBAIJ, 1328 MatEqual_SeqBAIJ, 1329 MatGetDiagonal_SeqBAIJ, 1330 MatDiagonalScale_SeqBAIJ, 1331 MatNorm_SeqBAIJ, 1332 0, 1333 MatAssemblyEnd_SeqBAIJ, 1334 0, 1335 MatSetOption_SeqBAIJ, 1336 MatZeroEntries_SeqBAIJ, 1337 MatZeroRows_SeqBAIJ, 1338 MatLUFactorSymbolic_SeqBAIJ, 1339 MatLUFactorNumeric_SeqBAIJ_N, 1340 0, 1341 0, 1342 MatSetUpPreallocation_SeqBAIJ, 1343 0, 1344 MatILUFactorSymbolic_SeqBAIJ, 1345 0, 1346 MatGetArray_SeqBAIJ, 1347 MatRestoreArray_SeqBAIJ, 1348 MatDuplicate_SeqBAIJ, 1349 0, 1350 0, 1351 MatILUFactor_SeqBAIJ, 1352 0, 1353 0, 1354 MatGetSubMatrices_SeqBAIJ, 1355 MatIncreaseOverlap_SeqBAIJ, 1356 MatGetValues_SeqBAIJ, 1357 0, 1358 MatPrintHelp_SeqBAIJ, 1359 MatScale_SeqBAIJ, 1360 0, 1361 0, 1362 0, 1363 MatGetBlockSize_SeqBAIJ, 1364 MatGetRowIJ_SeqBAIJ, 1365 MatRestoreRowIJ_SeqBAIJ, 1366 0, 1367 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 MatSetValuesBlocked_SeqBAIJ, 1373 MatGetSubMatrix_SeqBAIJ, 1374 MatDestroy_SeqBAIJ, 1375 MatView_SeqBAIJ, 1376 MatGetPetscMaps_Petsc, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 MatGetRowMax_SeqBAIJ, 1384 MatConvert_Basic}; 1385 1386 EXTERN_C_BEGIN 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1389 int MatStoreValues_SeqBAIJ(Mat mat) 1390 { 1391 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1392 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1393 int ierr; 1394 1395 PetscFunctionBegin; 1396 if (aij->nonew != 1) { 1397 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1398 } 1399 1400 /* allocate space for values if not already there */ 1401 if (!aij->saved_values) { 1402 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1403 } 1404 1405 /* copy values over */ 1406 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 EXTERN_C_END 1410 1411 EXTERN_C_BEGIN 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1414 int MatRetrieveValues_SeqBAIJ(Mat mat) 1415 { 1416 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1417 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1418 1419 PetscFunctionBegin; 1420 if (aij->nonew != 1) { 1421 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1422 } 1423 if (!aij->saved_values) { 1424 SETERRQ(1,"Must call MatStoreValues(A);first"); 1425 } 1426 1427 /* copy values over */ 1428 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 EXTERN_C_END 1432 1433 EXTERN_C_BEGIN 1434 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1435 EXTERN_C_END 1436 1437 EXTERN_C_BEGIN 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatCreate_SeqBAIJ" 1440 int MatCreate_SeqBAIJ(Mat B) 1441 { 1442 int ierr,size; 1443 Mat_SeqBAIJ *b; 1444 1445 PetscFunctionBegin; 1446 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1447 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1448 1449 B->m = B->M = PetscMax(B->m,B->M); 1450 B->n = B->N = PetscMax(B->n,B->N); 1451 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1452 B->data = (void*)b; 1453 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1454 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1455 B->factor = 0; 1456 B->lupivotthreshold = 1.0; 1457 B->mapping = 0; 1458 b->row = 0; 1459 b->col = 0; 1460 b->icol = 0; 1461 b->reallocs = 0; 1462 b->saved_values = 0; 1463 #if defined(PETSC_USE_MAT_SINGLE) 1464 b->setvalueslen = 0; 1465 b->setvaluescopy = PETSC_NULL; 1466 #endif 1467 b->single_precision_solves = PETSC_FALSE; 1468 1469 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1470 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1471 1472 b->sorted = PETSC_FALSE; 1473 b->roworiented = PETSC_TRUE; 1474 b->nonew = 0; 1475 b->diag = 0; 1476 b->solve_work = 0; 1477 b->mult_work = 0; 1478 b->spptr = 0; 1479 B->info.nz_unneeded = (PetscReal)b->maxnz; 1480 b->keepzeroedrows = PETSC_FALSE; 1481 1482 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1483 "MatStoreValues_SeqBAIJ", 1484 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1485 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1486 "MatRetrieveValues_SeqBAIJ", 1487 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1489 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1490 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1492 "MatConvert_SeqBAIJ_SeqAIJ", 1493 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 EXTERN_C_END 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1500 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1501 { 1502 Mat C; 1503 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1504 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1505 1506 PetscFunctionBegin; 1507 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1508 1509 *B = 0; 1510 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1511 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1512 c = (Mat_SeqBAIJ*)C->data; 1513 1514 c->bs = a->bs; 1515 c->bs2 = a->bs2; 1516 c->mbs = a->mbs; 1517 c->nbs = a->nbs; 1518 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1519 1520 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1521 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1522 for (i=0; i<mbs; i++) { 1523 c->imax[i] = a->imax[i]; 1524 c->ilen[i] = a->ilen[i]; 1525 } 1526 1527 /* allocate the matrix space */ 1528 c->singlemalloc = PETSC_TRUE; 1529 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1530 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1531 c->j = (int*)(c->a + nz*bs2); 1532 c->i = c->j + nz; 1533 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1534 if (mbs > 0) { 1535 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1536 if (cpvalues == MAT_COPY_VALUES) { 1537 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1538 } else { 1539 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1540 } 1541 } 1542 1543 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1544 c->sorted = a->sorted; 1545 c->roworiented = a->roworiented; 1546 c->nonew = a->nonew; 1547 1548 if (a->diag) { 1549 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1550 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1551 for (i=0; i<mbs; i++) { 1552 c->diag[i] = a->diag[i]; 1553 } 1554 } else c->diag = 0; 1555 c->nz = a->nz; 1556 c->maxnz = a->maxnz; 1557 c->solve_work = 0; 1558 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1559 c->mult_work = 0; 1560 C->preallocated = PETSC_TRUE; 1561 C->assembled = PETSC_TRUE; 1562 *B = C; 1563 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1564 PetscFunctionReturn(0); 1565 } 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatLoad_SeqBAIJ" 1570 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1571 { 1572 Mat_SeqBAIJ *a; 1573 Mat B; 1574 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1575 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1576 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1577 int *masked,nmask,tmp,bs2,ishift; 1578 PetscScalar *aa; 1579 MPI_Comm comm = ((PetscObject)viewer)->comm; 1580 1581 PetscFunctionBegin; 1582 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1583 bs2 = bs*bs; 1584 1585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1586 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1587 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1588 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1589 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1590 M = header[1]; N = header[2]; nz = header[3]; 1591 1592 if (header[3] < 0) { 1593 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1594 } 1595 1596 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1597 1598 /* 1599 This code adds extra rows to make sure the number of rows is 1600 divisible by the blocksize 1601 */ 1602 mbs = M/bs; 1603 extra_rows = bs - M + bs*(mbs); 1604 if (extra_rows == bs) extra_rows = 0; 1605 else mbs++; 1606 if (extra_rows) { 1607 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1608 } 1609 1610 /* read in row lengths */ 1611 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1612 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1613 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1614 1615 /* read in column indices */ 1616 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1617 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1618 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1619 1620 /* loop over row lengths determining block row lengths */ 1621 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1622 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1623 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1624 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1625 masked = mask + mbs; 1626 rowcount = 0; nzcount = 0; 1627 for (i=0; i<mbs; i++) { 1628 nmask = 0; 1629 for (j=0; j<bs; j++) { 1630 kmax = rowlengths[rowcount]; 1631 for (k=0; k<kmax; k++) { 1632 tmp = jj[nzcount++]/bs; 1633 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1634 } 1635 rowcount++; 1636 } 1637 browlengths[i] += nmask; 1638 /* zero out the mask elements we set */ 1639 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1640 } 1641 1642 /* create our matrix */ 1643 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1644 B = *A; 1645 a = (Mat_SeqBAIJ*)B->data; 1646 1647 /* set matrix "i" values */ 1648 a->i[0] = 0; 1649 for (i=1; i<= mbs; i++) { 1650 a->i[i] = a->i[i-1] + browlengths[i-1]; 1651 a->ilen[i-1] = browlengths[i-1]; 1652 } 1653 a->nz = 0; 1654 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1655 1656 /* read in nonzero values */ 1657 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1658 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1659 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1660 1661 /* set "a" and "j" values into matrix */ 1662 nzcount = 0; jcount = 0; 1663 for (i=0; i<mbs; i++) { 1664 nzcountb = nzcount; 1665 nmask = 0; 1666 for (j=0; j<bs; j++) { 1667 kmax = rowlengths[i*bs+j]; 1668 for (k=0; k<kmax; k++) { 1669 tmp = jj[nzcount++]/bs; 1670 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1671 } 1672 } 1673 /* sort the masked values */ 1674 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1675 1676 /* set "j" values into matrix */ 1677 maskcount = 1; 1678 for (j=0; j<nmask; j++) { 1679 a->j[jcount++] = masked[j]; 1680 mask[masked[j]] = maskcount++; 1681 } 1682 /* set "a" values into matrix */ 1683 ishift = bs2*a->i[i]; 1684 for (j=0; j<bs; j++) { 1685 kmax = rowlengths[i*bs+j]; 1686 for (k=0; k<kmax; k++) { 1687 tmp = jj[nzcountb]/bs ; 1688 block = mask[tmp] - 1; 1689 point = jj[nzcountb] - bs*tmp; 1690 idx = ishift + bs2*block + j + bs*point; 1691 a->a[idx] = (MatScalar)aa[nzcountb++]; 1692 } 1693 } 1694 /* zero out the mask elements we set */ 1695 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1696 } 1697 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1698 1699 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1700 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1701 ierr = PetscFree(aa);CHKERRQ(ierr); 1702 ierr = PetscFree(jj);CHKERRQ(ierr); 1703 ierr = PetscFree(mask);CHKERRQ(ierr); 1704 1705 B->assembled = PETSC_TRUE; 1706 1707 ierr = MatView_Private(B);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 EXTERN_C_END 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatCreateSeqBAIJ" 1714 /*@C 1715 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1716 compressed row) format. For good matrix assembly performance the 1717 user should preallocate the matrix storage by setting the parameter nz 1718 (or the array nnz). By setting these parameters accurately, performance 1719 during matrix assembly can be increased by more than a factor of 50. 1720 1721 Collective on MPI_Comm 1722 1723 Input Parameters: 1724 + comm - MPI communicator, set to PETSC_COMM_SELF 1725 . bs - size of block 1726 . m - number of rows 1727 . n - number of columns 1728 . nz - number of nonzero blocks per block row (same for all rows) 1729 - nnz - array containing the number of nonzero blocks in the various block rows 1730 (possibly different for each block row) or PETSC_NULL 1731 1732 Output Parameter: 1733 . A - the matrix 1734 1735 Options Database Keys: 1736 . -mat_no_unroll - uses code that does not unroll the loops in the 1737 block calculations (much slower) 1738 . -mat_block_size - size of the blocks to use 1739 1740 Level: intermediate 1741 1742 Notes: 1743 A nonzero block is any block that as 1 or more nonzeros in it 1744 1745 The block AIJ format is fully compatible with standard Fortran 77 1746 storage. That is, the stored row and column indices can begin at 1747 either one (as in Fortran) or zero. See the users' manual for details. 1748 1749 Specify the preallocated storage with either nz or nnz (not both). 1750 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1751 allocation. For additional details, see the users manual chapter on 1752 matrices. 1753 1754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1755 @*/ 1756 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1757 { 1758 int ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1762 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1763 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1769 /*@C 1770 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1771 per row in the matrix. For good matrix assembly performance the 1772 user should preallocate the matrix storage by setting the parameter nz 1773 (or the array nnz). By setting these parameters accurately, performance 1774 during matrix assembly can be increased by more than a factor of 50. 1775 1776 Collective on MPI_Comm 1777 1778 Input Parameters: 1779 + A - the matrix 1780 . bs - size of block 1781 . nz - number of block nonzeros per block row (same for all rows) 1782 - nnz - array containing the number of block nonzeros in the various block rows 1783 (possibly different for each block row) or PETSC_NULL 1784 1785 Options Database Keys: 1786 . -mat_no_unroll - uses code that does not unroll the loops in the 1787 block calculations (much slower) 1788 . -mat_block_size - size of the blocks to use 1789 1790 Level: intermediate 1791 1792 Notes: 1793 The block AIJ format is fully compatible with standard Fortran 77 1794 storage. That is, the stored row and column indices can begin at 1795 either one (as in Fortran) or zero. See the users' manual for details. 1796 1797 Specify the preallocated storage with either nz or nnz (not both). 1798 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1799 allocation. For additional details, see the users manual chapter on 1800 matrices. 1801 1802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1803 @*/ 1804 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1805 { 1806 Mat_SeqBAIJ *b; 1807 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1808 PetscTruth flg; 1809 1810 PetscFunctionBegin; 1811 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1812 if (!flg) PetscFunctionReturn(0); 1813 1814 B->preallocated = PETSC_TRUE; 1815 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1816 if (nnz && newbs != bs) { 1817 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1818 } 1819 bs = newbs; 1820 1821 mbs = B->m/bs; 1822 nbs = B->n/bs; 1823 bs2 = bs*bs; 1824 1825 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1826 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1827 } 1828 1829 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1830 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1831 if (nnz) { 1832 for (i=0; i<mbs; i++) { 1833 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1834 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1835 } 1836 } 1837 1838 b = (Mat_SeqBAIJ*)B->data; 1839 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1840 B->ops->solve = MatSolve_SeqBAIJ_Update; 1841 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1842 if (!flg) { 1843 switch (bs) { 1844 case 1: 1845 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1846 B->ops->mult = MatMult_SeqBAIJ_1; 1847 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1848 break; 1849 case 2: 1850 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1851 B->ops->mult = MatMult_SeqBAIJ_2; 1852 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1853 break; 1854 case 3: 1855 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1856 B->ops->mult = MatMult_SeqBAIJ_3; 1857 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1858 break; 1859 case 4: 1860 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1861 B->ops->mult = MatMult_SeqBAIJ_4; 1862 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1863 break; 1864 case 5: 1865 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1866 B->ops->mult = MatMult_SeqBAIJ_5; 1867 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1868 break; 1869 case 6: 1870 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1871 B->ops->mult = MatMult_SeqBAIJ_6; 1872 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1873 break; 1874 case 7: 1875 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1876 B->ops->mult = MatMult_SeqBAIJ_7; 1877 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1878 break; 1879 default: 1880 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1881 B->ops->mult = MatMult_SeqBAIJ_N; 1882 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1883 break; 1884 } 1885 } 1886 b->bs = bs; 1887 b->mbs = mbs; 1888 b->nbs = nbs; 1889 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1890 if (!nnz) { 1891 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1892 else if (nz <= 0) nz = 1; 1893 for (i=0; i<mbs; i++) b->imax[i] = nz; 1894 nz = nz*mbs; 1895 } else { 1896 nz = 0; 1897 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1898 } 1899 1900 /* allocate the matrix space */ 1901 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1902 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1903 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1904 b->j = (int*)(b->a + nz*bs2); 1905 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1906 b->i = b->j + nz; 1907 b->singlemalloc = PETSC_TRUE; 1908 1909 b->i[0] = 0; 1910 for (i=1; i<mbs+1; i++) { 1911 b->i[i] = b->i[i-1] + b->imax[i-1]; 1912 } 1913 1914 /* b->ilen will count nonzeros in each block row so far. */ 1915 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1916 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1917 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1918 1919 b->bs = bs; 1920 b->bs2 = bs2; 1921 b->mbs = mbs; 1922 b->nz = 0; 1923 b->maxnz = nz*bs2; 1924 B->info.nz_unneeded = (PetscReal)b->maxnz; 1925 PetscFunctionReturn(0); 1926 } 1927 1928