1 /*$Id: baij.c,v 1.239 2001/07/16 18:15:53 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled=PETSC_FALSE; 217 int ierr; 218 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 219 #if defined(PETSC_HAVE_SSE) 220 if (sse_enabled) { 221 a->single_precision_solves = PETSC_TRUE; 222 A->ops->solve = MatSolve_SeqBAIJ_Update; 223 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 224 break; 225 } 226 #else 227 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 228 #endif 229 } 230 break; 231 default: 232 SETERRQ(PETSC_ERR_SUP,"unknown option"); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 239 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 240 { 241 PetscFunctionBegin; 242 if (m) *m = 0; 243 if (n) *n = A->m; 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatGetRow_SeqBAIJ" 249 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 250 { 251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 252 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 253 MatScalar *aa,*aa_i; 254 Scalar *v_i; 255 256 PetscFunctionBegin; 257 bs = a->bs; 258 ai = a->i; 259 aj = a->j; 260 aa = a->a; 261 bs2 = a->bs2; 262 263 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 264 265 bn = row/bs; /* Block number */ 266 bp = row % bs; /* Block Position */ 267 M = ai[bn+1] - ai[bn]; 268 *nz = bs*M; 269 270 if (v) { 271 *v = 0; 272 if (*nz) { 273 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 274 for (i=0; i<M; i++) { /* for each block in the block row */ 275 v_i = *v + i*bs; 276 aa_i = aa + bs2*(ai[bn] + i); 277 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 278 } 279 } 280 } 281 282 if (idx) { 283 *idx = 0; 284 if (*nz) { 285 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 286 for (i=0; i<M; i++) { /* for each block in the block row */ 287 idx_i = *idx + i*bs; 288 itmp = bs*aj[ai[bn] + i]; 289 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 290 } 291 } 292 } 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 298 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 299 { 300 int ierr; 301 302 PetscFunctionBegin; 303 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 304 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatTranspose_SeqBAIJ" 310 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 311 { 312 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 313 Mat C; 314 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 315 int *rows,*cols,bs2=a->bs2; 316 Scalar *array; 317 318 PetscFunctionBegin; 319 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 320 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 321 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 322 323 #if defined(PETSC_USE_MAT_SINGLE) 324 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 325 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 326 #else 327 array = a->a; 328 #endif 329 330 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 331 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 332 ierr = PetscFree(col);CHKERRQ(ierr); 333 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 334 cols = rows + bs; 335 for (i=0; i<mbs; i++) { 336 cols[0] = i*bs; 337 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 338 len = ai[i+1] - ai[i]; 339 for (j=0; j<len; j++) { 340 rows[0] = (*aj++)*bs; 341 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 342 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 343 array += bs2; 344 } 345 } 346 ierr = PetscFree(rows);CHKERRQ(ierr); 347 #if defined(PETSC_USE_MAT_SINGLE) 348 ierr = PetscFree(array); 349 #endif 350 351 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 354 if (B) { 355 *B = C; 356 } else { 357 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 364 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 367 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 368 Scalar *aa; 369 FILE *file; 370 371 PetscFunctionBegin; 372 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 373 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 374 col_lens[0] = MAT_COOKIE; 375 376 col_lens[1] = A->m; 377 col_lens[2] = A->n; 378 col_lens[3] = a->nz*bs2; 379 380 /* store lengths of each row and write (including header) to file */ 381 count = 0; 382 for (i=0; i<a->mbs; i++) { 383 for (j=0; j<bs; j++) { 384 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 385 } 386 } 387 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 388 ierr = PetscFree(col_lens);CHKERRQ(ierr); 389 390 /* store column indices (zero start index) */ 391 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 392 count = 0; 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 jj[count++] = bs*a->j[k] + l; 398 } 399 } 400 } 401 } 402 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 403 ierr = PetscFree(jj);CHKERRQ(ierr); 404 405 /* store nonzero values */ 406 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 407 count = 0; 408 for (i=0; i<a->mbs; i++) { 409 for (j=0; j<bs; j++) { 410 for (k=a->i[i]; k<a->i[i+1]; k++) { 411 for (l=0; l<bs; l++) { 412 aa[count++] = a->a[bs2*k + l*bs + j]; 413 } 414 } 415 } 416 } 417 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 418 ierr = PetscFree(aa);CHKERRQ(ierr); 419 420 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 421 if (file) { 422 fprintf(file,"-matload_block_size %d\n",a->bs); 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 429 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 433 PetscViewerFormat format; 434 435 PetscFunctionBegin; 436 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 437 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 438 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 439 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 440 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 441 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 442 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 443 for (i=0; i<a->mbs; i++) { 444 for (j=0; j<bs; j++) { 445 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 446 for (k=a->i[i]; k<a->i[i+1]; k++) { 447 for (l=0; l<bs; l++) { 448 #if defined(PETSC_USE_COMPLEX) 449 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 450 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 451 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 453 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 454 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 455 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 456 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 457 } 458 #else 459 if (a->a[bs2*k + l*bs + j] != 0.0) { 460 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 461 } 462 #endif 463 } 464 } 465 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 466 } 467 } 468 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 469 } else { 470 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 471 for (i=0; i<a->mbs; i++) { 472 for (j=0; j<bs; j++) { 473 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 474 for (k=a->i[i]; k<a->i[i+1]; k++) { 475 for (l=0; l<bs; l++) { 476 #if defined(PETSC_USE_COMPLEX) 477 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 478 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 479 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 481 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 482 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 483 } else { 484 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 485 } 486 #else 487 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 488 #endif 489 } 490 } 491 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 495 } 496 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 502 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 503 { 504 Mat A = (Mat) Aa; 505 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 506 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 507 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 508 MatScalar *aa; 509 PetscViewer viewer; 510 511 PetscFunctionBegin; 512 513 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 514 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 515 516 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 517 518 /* loop over matrix elements drawing boxes */ 519 color = PETSC_DRAW_BLUE; 520 for (i=0,row=0; i<mbs; i++,row+=bs) { 521 for (j=a->i[i]; j<a->i[i+1]; j++) { 522 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 523 x_l = a->j[j]*bs; x_r = x_l + 1.0; 524 aa = a->a + j*bs2; 525 for (k=0; k<bs; k++) { 526 for (l=0; l<bs; l++) { 527 if (PetscRealPart(*aa++) >= 0.) continue; 528 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 529 } 530 } 531 } 532 } 533 color = PETSC_DRAW_CYAN; 534 for (i=0,row=0; i<mbs; i++,row+=bs) { 535 for (j=a->i[i]; j<a->i[i+1]; j++) { 536 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 537 x_l = a->j[j]*bs; x_r = x_l + 1.0; 538 aa = a->a + j*bs2; 539 for (k=0; k<bs; k++) { 540 for (l=0; l<bs; l++) { 541 if (PetscRealPart(*aa++) != 0.) continue; 542 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 543 } 544 } 545 } 546 } 547 548 color = PETSC_DRAW_RED; 549 for (i=0,row=0; i<mbs; i++,row+=bs) { 550 for (j=a->i[i]; j<a->i[i+1]; j++) { 551 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 552 x_l = a->j[j]*bs; x_r = x_l + 1.0; 553 aa = a->a + j*bs2; 554 for (k=0; k<bs; k++) { 555 for (l=0; l<bs; l++) { 556 if (PetscRealPart(*aa++) <= 0.) continue; 557 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 567 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 568 { 569 int ierr; 570 PetscReal xl,yl,xr,yr,w,h; 571 PetscDraw draw; 572 PetscTruth isnull; 573 574 PetscFunctionBegin; 575 576 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 577 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 578 579 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 580 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 581 xr += w; yr += h; xl = -w; yl = -h; 582 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 583 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 584 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_SeqBAIJ" 590 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth issocket,isascii,isbinary,isdraw; 594 595 PetscFunctionBegin; 596 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 600 if (issocket) { 601 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 602 } else if (isascii){ 603 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 606 } else if (isdraw) { 607 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 608 } else { 609 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 610 } 611 PetscFunctionReturn(0); 612 } 613 614 615 #undef __FUNCT__ 616 #define __FUNCT__ "MatGetValues_SeqBAIJ" 617 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 618 { 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 621 int *ai = a->i,*ailen = a->ilen; 622 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 623 MatScalar *ap,*aa = a->a,zero = 0.0; 624 625 PetscFunctionBegin; 626 for (k=0; k<m; k++) { /* loop over rows */ 627 row = im[k]; brow = row/bs; 628 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 629 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 630 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 631 nrow = ailen[brow]; 632 for (l=0; l<n; l++) { /* loop over columns */ 633 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 634 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 635 col = in[l] ; 636 bcol = col/bs; 637 cidx = col%bs; 638 ridx = row%bs; 639 high = nrow; 640 low = 0; /* assume unsorted */ 641 while (high-low > 5) { 642 t = (low+high)/2; 643 if (rp[t] > bcol) high = t; 644 else low = t; 645 } 646 for (i=low; i<high; i++) { 647 if (rp[i] > bcol) break; 648 if (rp[i] == bcol) { 649 *v++ = ap[bs2*i+bs*cidx+ridx]; 650 goto finished; 651 } 652 } 653 *v++ = zero; 654 finished:; 655 } 656 } 657 PetscFunctionReturn(0); 658 } 659 660 #if defined(PETSC_USE_MAT_SINGLE) 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 663 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 664 { 665 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 666 int ierr,i,N = m*n*b->bs2; 667 MatScalar *vsingle; 668 669 PetscFunctionBegin; 670 if (N > b->setvalueslen) { 671 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 672 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 673 b->setvalueslen = N; 674 } 675 vsingle = b->setvaluescopy; 676 for (i=0; i<N; i++) { 677 vsingle[i] = v[i]; 678 } 679 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 #endif 683 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 687 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 691 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 692 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 693 PetscTruth roworiented=a->roworiented; 694 MatScalar *value = v,*ap,*aa = a->a,*bap; 695 696 PetscFunctionBegin; 697 if (roworiented) { 698 stepval = (n-1)*bs; 699 } else { 700 stepval = (m-1)*bs; 701 } 702 for (k=0; k<m; k++) { /* loop over added rows */ 703 row = im[k]; 704 if (row < 0) continue; 705 #if defined(PETSC_USE_BOPT_g) 706 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 707 #endif 708 rp = aj + ai[row]; 709 ap = aa + bs2*ai[row]; 710 rmax = imax[row]; 711 nrow = ailen[row]; 712 low = 0; 713 for (l=0; l<n; l++) { /* loop over added columns */ 714 if (in[l] < 0) continue; 715 #if defined(PETSC_USE_BOPT_g) 716 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 717 #endif 718 col = in[l]; 719 if (roworiented) { 720 value = v + k*(stepval+bs)*bs + l*bs; 721 } else { 722 value = v + l*(stepval+bs)*bs + k*bs; 723 } 724 if (!sorted) low = 0; high = nrow; 725 while (high-low > 7) { 726 t = (low+high)/2; 727 if (rp[t] > col) high = t; 728 else low = t; 729 } 730 for (i=low; i<high; i++) { 731 if (rp[i] > col) break; 732 if (rp[i] == col) { 733 bap = ap + bs2*i; 734 if (roworiented) { 735 if (is == ADD_VALUES) { 736 for (ii=0; ii<bs; ii++,value+=stepval) { 737 for (jj=ii; jj<bs2; jj+=bs) { 738 bap[jj] += *value++; 739 } 740 } 741 } else { 742 for (ii=0; ii<bs; ii++,value+=stepval) { 743 for (jj=ii; jj<bs2; jj+=bs) { 744 bap[jj] = *value++; 745 } 746 } 747 } 748 } else { 749 if (is == ADD_VALUES) { 750 for (ii=0; ii<bs; ii++,value+=stepval) { 751 for (jj=0; jj<bs; jj++) { 752 *bap++ += *value++; 753 } 754 } 755 } else { 756 for (ii=0; ii<bs; ii++,value+=stepval) { 757 for (jj=0; jj<bs; jj++) { 758 *bap++ = *value++; 759 } 760 } 761 } 762 } 763 goto noinsert2; 764 } 765 } 766 if (nonew == 1) goto noinsert2; 767 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 768 if (nrow >= rmax) { 769 /* there is no extra room in row, therefore enlarge */ 770 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 771 MatScalar *new_a; 772 773 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 774 775 /* malloc new storage space */ 776 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 777 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 778 new_j = (int*)(new_a + bs2*new_nz); 779 new_i = new_j + new_nz; 780 781 /* copy over old data into new slots */ 782 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 783 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 784 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 785 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 786 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 787 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 788 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 789 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 790 /* free up old matrix storage */ 791 ierr = PetscFree(a->a);CHKERRQ(ierr); 792 if (!a->singlemalloc) { 793 ierr = PetscFree(a->i);CHKERRQ(ierr); 794 ierr = PetscFree(a->j);CHKERRQ(ierr); 795 } 796 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 797 a->singlemalloc = PETSC_TRUE; 798 799 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 800 rmax = imax[row] = imax[row] + CHUNKSIZE; 801 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 802 a->maxnz += bs2*CHUNKSIZE; 803 a->reallocs++; 804 a->nz++; 805 } 806 N = nrow++ - 1; 807 /* shift up all the later entries in this row */ 808 for (ii=N; ii>=i; ii--) { 809 rp[ii+1] = rp[ii]; 810 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 811 } 812 if (N >= i) { 813 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 814 } 815 rp[i] = col; 816 bap = ap + bs2*i; 817 if (roworiented) { 818 for (ii=0; ii<bs; ii++,value+=stepval) { 819 for (jj=ii; jj<bs2; jj+=bs) { 820 bap[jj] = *value++; 821 } 822 } 823 } else { 824 for (ii=0; ii<bs; ii++,value+=stepval) { 825 for (jj=0; jj<bs; jj++) { 826 *bap++ = *value++; 827 } 828 } 829 } 830 noinsert2:; 831 low = i; 832 } 833 ailen[row] = nrow; 834 } 835 PetscFunctionReturn(0); 836 } 837 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 841 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 842 { 843 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 844 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 845 int m = A->m,*ip,N,*ailen = a->ilen; 846 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 847 MatScalar *aa = a->a,*ap; 848 849 PetscFunctionBegin; 850 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 851 852 if (m) rmax = ailen[0]; 853 for (i=1; i<mbs; i++) { 854 /* move each row back by the amount of empty slots (fshift) before it*/ 855 fshift += imax[i-1] - ailen[i-1]; 856 rmax = PetscMax(rmax,ailen[i]); 857 if (fshift) { 858 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 859 N = ailen[i]; 860 for (j=0; j<N; j++) { 861 ip[j-fshift] = ip[j]; 862 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 863 } 864 } 865 ai[i] = ai[i-1] + ailen[i-1]; 866 } 867 if (mbs) { 868 fshift += imax[mbs-1] - ailen[mbs-1]; 869 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 870 } 871 /* reset ilen and imax for each row */ 872 for (i=0; i<mbs; i++) { 873 ailen[i] = imax[i] = ai[i+1] - ai[i]; 874 } 875 a->nz = ai[mbs]; 876 877 /* diagonals may have moved, so kill the diagonal pointers */ 878 if (fshift && a->diag) { 879 ierr = PetscFree(a->diag);CHKERRQ(ierr); 880 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 881 a->diag = 0; 882 } 883 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 884 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 885 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 886 a->reallocs); 887 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 888 a->reallocs = 0; 889 A->info.nz_unneeded = (PetscReal)fshift*bs2; 890 891 PetscFunctionReturn(0); 892 } 893 894 895 896 /* 897 This function returns an array of flags which indicate the locations of contiguous 898 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 899 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 900 Assume: sizes should be long enough to hold all the values. 901 */ 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 904 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 905 { 906 int i,j,k,row; 907 PetscTruth flg; 908 909 PetscFunctionBegin; 910 for (i=0,j=0; i<n; j++) { 911 row = idx[i]; 912 if (row%bs!=0) { /* Not the begining of a block */ 913 sizes[j] = 1; 914 i++; 915 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 916 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 917 i++; 918 } else { /* Begining of the block, so check if the complete block exists */ 919 flg = PETSC_TRUE; 920 for (k=1; k<bs; k++) { 921 if (row+k != idx[i+k]) { /* break in the block */ 922 flg = PETSC_FALSE; 923 break; 924 } 925 } 926 if (flg == PETSC_TRUE) { /* No break in the bs */ 927 sizes[j] = bs; 928 i+= bs; 929 } else { 930 sizes[j] = 1; 931 i++; 932 } 933 } 934 } 935 *bs_max = j; 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 941 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 942 { 943 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 944 int ierr,i,j,k,count,is_n,*is_idx,*rows; 945 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 946 Scalar zero = 0.0; 947 MatScalar *aa; 948 949 PetscFunctionBegin; 950 /* Make a copy of the IS and sort it */ 951 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 952 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 953 954 /* allocate memory for rows,sizes */ 955 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 956 sizes = rows + is_n; 957 958 /* copy IS values to rows, and sort them */ 959 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 960 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 961 if (baij->keepzeroedrows) { 962 for (i=0; i<is_n; i++) { sizes[i] = 1; } 963 bs_max = is_n; 964 } else { 965 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 968 969 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 970 row = rows[j]; 971 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 972 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 973 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 974 if (sizes[i] == bs && !baij->keepzeroedrows) { 975 if (diag) { 976 if (baij->ilen[row/bs] > 0) { 977 baij->ilen[row/bs] = 1; 978 baij->j[baij->i[row/bs]] = row/bs; 979 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 /* Now insert all the diagonal values for this bs */ 982 for (k=0; k<bs; k++) { 983 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 984 } 985 } else { /* (!diag) */ 986 baij->ilen[row/bs] = 0; 987 } /* end (!diag) */ 988 } else { /* (sizes[i] != bs) */ 989 #if defined (PETSC_USE_DEBUG) 990 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 991 #endif 992 for (k=0; k<count; k++) { 993 aa[0] = zero; 994 aa += bs; 995 } 996 if (diag) { 997 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 998 } 999 } 1000 } 1001 1002 ierr = PetscFree(rows);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1009 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1010 { 1011 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1012 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1013 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1014 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1015 int ridx,cidx,bs2=a->bs2,ierr; 1016 PetscTruth roworiented=a->roworiented; 1017 MatScalar *ap,value,*aa=a->a,*bap; 1018 1019 PetscFunctionBegin; 1020 for (k=0; k<m; k++) { /* loop over added rows */ 1021 row = im[k]; brow = row/bs; 1022 if (row < 0) continue; 1023 #if defined(PETSC_USE_BOPT_g) 1024 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1025 #endif 1026 rp = aj + ai[brow]; 1027 ap = aa + bs2*ai[brow]; 1028 rmax = imax[brow]; 1029 nrow = ailen[brow]; 1030 low = 0; 1031 for (l=0; l<n; l++) { /* loop over added columns */ 1032 if (in[l] < 0) continue; 1033 #if defined(PETSC_USE_BOPT_g) 1034 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1035 #endif 1036 col = in[l]; bcol = col/bs; 1037 ridx = row % bs; cidx = col % bs; 1038 if (roworiented) { 1039 value = v[l + k*n]; 1040 } else { 1041 value = v[k + l*m]; 1042 } 1043 if (!sorted) low = 0; high = nrow; 1044 while (high-low > 7) { 1045 t = (low+high)/2; 1046 if (rp[t] > bcol) high = t; 1047 else low = t; 1048 } 1049 for (i=low; i<high; i++) { 1050 if (rp[i] > bcol) break; 1051 if (rp[i] == bcol) { 1052 bap = ap + bs2*i + bs*cidx + ridx; 1053 if (is == ADD_VALUES) *bap += value; 1054 else *bap = value; 1055 goto noinsert1; 1056 } 1057 } 1058 if (nonew == 1) goto noinsert1; 1059 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1060 if (nrow >= rmax) { 1061 /* there is no extra room in row, therefore enlarge */ 1062 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1063 MatScalar *new_a; 1064 1065 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1066 1067 /* Malloc new storage space */ 1068 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1069 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1070 new_j = (int*)(new_a + bs2*new_nz); 1071 new_i = new_j + new_nz; 1072 1073 /* copy over old data into new slots */ 1074 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1075 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1076 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1077 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1078 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1079 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1080 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1081 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1082 /* free up old matrix storage */ 1083 ierr = PetscFree(a->a);CHKERRQ(ierr); 1084 if (!a->singlemalloc) { 1085 ierr = PetscFree(a->i);CHKERRQ(ierr); 1086 ierr = PetscFree(a->j);CHKERRQ(ierr); 1087 } 1088 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1089 a->singlemalloc = PETSC_TRUE; 1090 1091 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1092 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1093 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1094 a->maxnz += bs2*CHUNKSIZE; 1095 a->reallocs++; 1096 a->nz++; 1097 } 1098 N = nrow++ - 1; 1099 /* shift up all the later entries in this row */ 1100 for (ii=N; ii>=i; ii--) { 1101 rp[ii+1] = rp[ii]; 1102 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1103 } 1104 if (N>=i) { 1105 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1106 } 1107 rp[i] = bcol; 1108 ap[bs2*i + bs*cidx + ridx] = value; 1109 noinsert1:; 1110 low = i; 1111 } 1112 ailen[brow] = nrow; 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1120 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1121 { 1122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1123 Mat outA; 1124 int ierr; 1125 PetscTruth row_identity,col_identity; 1126 1127 PetscFunctionBegin; 1128 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1129 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1130 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1131 if (!row_identity || !col_identity) { 1132 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1133 } 1134 1135 outA = inA; 1136 inA->factor = FACTOR_LU; 1137 1138 if (!a->diag) { 1139 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1140 } 1141 /* 1142 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1143 for ILU(0) factorization with natural ordering 1144 */ 1145 if (a->bs < 8) { 1146 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1147 } else { 1148 a->row = row; 1149 a->col = col; 1150 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1151 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1152 1153 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1154 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1155 PetscLogObjectParent(inA,a->icol); 1156 1157 if (!a->solve_work) { 1158 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1159 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1160 } 1161 } 1162 1163 if (row_identity && col_identity) { 1164 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA); 1165 } 1166 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1167 1168 PetscFunctionReturn(0); 1169 } 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1172 int MatPrintHelp_SeqBAIJ(Mat A) 1173 { 1174 static PetscTruth called = PETSC_FALSE; 1175 MPI_Comm comm = A->comm; 1176 int ierr; 1177 1178 PetscFunctionBegin; 1179 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1180 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1181 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 EXTERN_C_BEGIN 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1188 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1189 { 1190 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1191 int i,nz,n; 1192 1193 PetscFunctionBegin; 1194 nz = baij->maxnz; 1195 n = mat->n; 1196 for (i=0; i<nz; i++) { 1197 baij->j[i] = indices[i]; 1198 } 1199 baij->nz = nz; 1200 for (i=0; i<n; i++) { 1201 baij->ilen[i] = baij->imax[i]; 1202 } 1203 1204 PetscFunctionReturn(0); 1205 } 1206 EXTERN_C_END 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1210 /*@ 1211 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1212 in the matrix. 1213 1214 Input Parameters: 1215 + mat - the SeqBAIJ matrix 1216 - indices - the column indices 1217 1218 Level: advanced 1219 1220 Notes: 1221 This can be called if you have precomputed the nonzero structure of the 1222 matrix and want to provide it to the matrix object to improve the performance 1223 of the MatSetValues() operation. 1224 1225 You MUST have set the correct numbers of nonzeros per row in the call to 1226 MatCreateSeqBAIJ(). 1227 1228 MUST be called before any calls to MatSetValues(); 1229 1230 @*/ 1231 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1232 { 1233 int ierr,(*f)(Mat,int *); 1234 1235 PetscFunctionBegin; 1236 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1237 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1238 if (f) { 1239 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1240 } else { 1241 SETERRQ(1,"Wrong type of matrix to set column indices"); 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1248 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1249 { 1250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1251 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1252 PetscReal atmp; 1253 Scalar *x,zero = 0.0; 1254 MatScalar *aa; 1255 int ncols,brow,krow,kcol; 1256 1257 PetscFunctionBegin; 1258 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1259 bs = a->bs; 1260 aa = a->a; 1261 ai = a->i; 1262 aj = a->j; 1263 mbs = a->mbs; 1264 1265 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1266 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1267 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1268 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1269 for (i=0; i<mbs; i++) { 1270 ncols = ai[1] - ai[0]; ai++; 1271 brow = bs*i; 1272 for (j=0; j<ncols; j++){ 1273 /* bcol = bs*(*aj); */ 1274 for (kcol=0; kcol<bs; kcol++){ 1275 for (krow=0; krow<bs; krow++){ 1276 atmp = PetscAbsScalar(*aa); aa++; 1277 row = brow + krow; /* row index */ 1278 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1279 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1280 } 1281 } 1282 aj++; 1283 } 1284 } 1285 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1291 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1292 { 1293 int ierr; 1294 1295 PetscFunctionBegin; 1296 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1302 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1303 { 1304 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1305 PetscFunctionBegin; 1306 *array = a->a; 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1312 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1313 { 1314 PetscFunctionBegin; 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* -------------------------------------------------------------------*/ 1319 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1320 MatGetRow_SeqBAIJ, 1321 MatRestoreRow_SeqBAIJ, 1322 MatMult_SeqBAIJ_N, 1323 MatMultAdd_SeqBAIJ_N, 1324 MatMultTranspose_SeqBAIJ, 1325 MatMultTransposeAdd_SeqBAIJ, 1326 MatSolve_SeqBAIJ_N, 1327 0, 1328 0, 1329 0, 1330 MatLUFactor_SeqBAIJ, 1331 0, 1332 0, 1333 MatTranspose_SeqBAIJ, 1334 MatGetInfo_SeqBAIJ, 1335 MatEqual_SeqBAIJ, 1336 MatGetDiagonal_SeqBAIJ, 1337 MatDiagonalScale_SeqBAIJ, 1338 MatNorm_SeqBAIJ, 1339 0, 1340 MatAssemblyEnd_SeqBAIJ, 1341 0, 1342 MatSetOption_SeqBAIJ, 1343 MatZeroEntries_SeqBAIJ, 1344 MatZeroRows_SeqBAIJ, 1345 MatLUFactorSymbolic_SeqBAIJ, 1346 MatLUFactorNumeric_SeqBAIJ_N, 1347 0, 1348 0, 1349 MatSetUpPreallocation_SeqBAIJ, 1350 0, 1351 MatGetOwnershipRange_SeqBAIJ, 1352 MatILUFactorSymbolic_SeqBAIJ, 1353 0, 1354 MatGetArray_SeqBAIJ, 1355 MatRestoreArray_SeqBAIJ, 1356 MatDuplicate_SeqBAIJ, 1357 0, 1358 0, 1359 MatILUFactor_SeqBAIJ, 1360 0, 1361 0, 1362 MatGetSubMatrices_SeqBAIJ, 1363 MatIncreaseOverlap_SeqBAIJ, 1364 MatGetValues_SeqBAIJ, 1365 0, 1366 MatPrintHelp_SeqBAIJ, 1367 MatScale_SeqBAIJ, 1368 0, 1369 0, 1370 0, 1371 MatGetBlockSize_SeqBAIJ, 1372 MatGetRowIJ_SeqBAIJ, 1373 MatRestoreRowIJ_SeqBAIJ, 1374 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 MatSetValuesBlocked_SeqBAIJ, 1381 MatGetSubMatrix_SeqBAIJ, 1382 MatDestroy_SeqBAIJ, 1383 MatView_SeqBAIJ, 1384 MatGetMaps_Petsc, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 0, 1391 MatGetRowMax_SeqBAIJ, 1392 MatConvert_Basic}; 1393 1394 EXTERN_C_BEGIN 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1397 int MatStoreValues_SeqBAIJ(Mat mat) 1398 { 1399 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1400 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1401 int ierr; 1402 1403 PetscFunctionBegin; 1404 if (aij->nonew != 1) { 1405 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1406 } 1407 1408 /* allocate space for values if not already there */ 1409 if (!aij->saved_values) { 1410 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1411 } 1412 1413 /* copy values over */ 1414 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 EXTERN_C_END 1418 1419 EXTERN_C_BEGIN 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1422 int MatRetrieveValues_SeqBAIJ(Mat mat) 1423 { 1424 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1425 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1426 1427 PetscFunctionBegin; 1428 if (aij->nonew != 1) { 1429 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1430 } 1431 if (!aij->saved_values) { 1432 SETERRQ(1,"Must call MatStoreValues(A);first"); 1433 } 1434 1435 /* copy values over */ 1436 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 EXTERN_C_END 1440 1441 EXTERN_C_BEGIN 1442 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1443 EXTERN_C_END 1444 1445 EXTERN_C_BEGIN 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatCreate_SeqBAIJ" 1448 int MatCreate_SeqBAIJ(Mat B) 1449 { 1450 int ierr,size; 1451 Mat_SeqBAIJ *b; 1452 1453 PetscFunctionBegin; 1454 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1455 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1456 1457 B->m = B->M = PetscMax(B->m,B->M); 1458 B->n = B->N = PetscMax(B->n,B->N); 1459 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1460 B->data = (void*)b; 1461 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1462 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1463 B->factor = 0; 1464 B->lupivotthreshold = 1.0; 1465 B->mapping = 0; 1466 b->row = 0; 1467 b->col = 0; 1468 b->icol = 0; 1469 b->reallocs = 0; 1470 b->saved_values = 0; 1471 #if defined(PETSC_USE_MAT_SINGLE) 1472 b->setvalueslen = 0; 1473 b->setvaluescopy = PETSC_NULL; 1474 #endif 1475 b->single_precision_solves = PETSC_FALSE; 1476 1477 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1478 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1479 1480 b->sorted = PETSC_FALSE; 1481 b->roworiented = PETSC_TRUE; 1482 b->nonew = 0; 1483 b->diag = 0; 1484 b->solve_work = 0; 1485 b->mult_work = 0; 1486 b->spptr = 0; 1487 B->info.nz_unneeded = (PetscReal)b->maxnz; 1488 b->keepzeroedrows = PETSC_FALSE; 1489 1490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1491 "MatStoreValues_SeqBAIJ", 1492 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1494 "MatRetrieveValues_SeqBAIJ", 1495 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1497 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1498 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1500 "MatConvert_SeqBAIJ_SeqAIJ", 1501 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 EXTERN_C_END 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1508 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1509 { 1510 Mat C; 1511 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1512 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1513 1514 PetscFunctionBegin; 1515 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1516 1517 *B = 0; 1518 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1519 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1520 c = (Mat_SeqBAIJ*)C->data; 1521 1522 c->bs = a->bs; 1523 c->bs2 = a->bs2; 1524 c->mbs = a->mbs; 1525 c->nbs = a->nbs; 1526 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1527 1528 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1529 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1530 for (i=0; i<mbs; i++) { 1531 c->imax[i] = a->imax[i]; 1532 c->ilen[i] = a->ilen[i]; 1533 } 1534 1535 /* allocate the matrix space */ 1536 c->singlemalloc = PETSC_TRUE; 1537 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1538 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1539 c->j = (int*)(c->a + nz*bs2); 1540 c->i = c->j + nz; 1541 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1542 if (mbs > 0) { 1543 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1544 if (cpvalues == MAT_COPY_VALUES) { 1545 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1546 } else { 1547 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1548 } 1549 } 1550 1551 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1552 c->sorted = a->sorted; 1553 c->roworiented = a->roworiented; 1554 c->nonew = a->nonew; 1555 1556 if (a->diag) { 1557 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1558 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1559 for (i=0; i<mbs; i++) { 1560 c->diag[i] = a->diag[i]; 1561 } 1562 } else c->diag = 0; 1563 c->nz = a->nz; 1564 c->maxnz = a->maxnz; 1565 c->solve_work = 0; 1566 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1567 c->mult_work = 0; 1568 C->preallocated = PETSC_TRUE; 1569 C->assembled = PETSC_TRUE; 1570 *B = C; 1571 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 EXTERN_C_BEGIN 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatLoad_SeqBAIJ" 1578 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1579 { 1580 Mat_SeqBAIJ *a; 1581 Mat B; 1582 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1583 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1584 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1585 int *masked,nmask,tmp,bs2,ishift; 1586 Scalar *aa; 1587 MPI_Comm comm = ((PetscObject)viewer)->comm; 1588 1589 PetscFunctionBegin; 1590 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1591 bs2 = bs*bs; 1592 1593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1594 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1595 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1596 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1597 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1598 M = header[1]; N = header[2]; nz = header[3]; 1599 1600 if (header[3] < 0) { 1601 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1602 } 1603 1604 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1605 1606 /* 1607 This code adds extra rows to make sure the number of rows is 1608 divisible by the blocksize 1609 */ 1610 mbs = M/bs; 1611 extra_rows = bs - M + bs*(mbs); 1612 if (extra_rows == bs) extra_rows = 0; 1613 else mbs++; 1614 if (extra_rows) { 1615 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1616 } 1617 1618 /* read in row lengths */ 1619 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1620 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1621 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1622 1623 /* read in column indices */ 1624 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1625 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1626 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1627 1628 /* loop over row lengths determining block row lengths */ 1629 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1630 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1631 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1632 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1633 masked = mask + mbs; 1634 rowcount = 0; nzcount = 0; 1635 for (i=0; i<mbs; i++) { 1636 nmask = 0; 1637 for (j=0; j<bs; j++) { 1638 kmax = rowlengths[rowcount]; 1639 for (k=0; k<kmax; k++) { 1640 tmp = jj[nzcount++]/bs; 1641 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1642 } 1643 rowcount++; 1644 } 1645 browlengths[i] += nmask; 1646 /* zero out the mask elements we set */ 1647 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1648 } 1649 1650 /* create our matrix */ 1651 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1652 B = *A; 1653 a = (Mat_SeqBAIJ*)B->data; 1654 1655 /* set matrix "i" values */ 1656 a->i[0] = 0; 1657 for (i=1; i<= mbs; i++) { 1658 a->i[i] = a->i[i-1] + browlengths[i-1]; 1659 a->ilen[i-1] = browlengths[i-1]; 1660 } 1661 a->nz = 0; 1662 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1663 1664 /* read in nonzero values */ 1665 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1666 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1667 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1668 1669 /* set "a" and "j" values into matrix */ 1670 nzcount = 0; jcount = 0; 1671 for (i=0; i<mbs; i++) { 1672 nzcountb = nzcount; 1673 nmask = 0; 1674 for (j=0; j<bs; j++) { 1675 kmax = rowlengths[i*bs+j]; 1676 for (k=0; k<kmax; k++) { 1677 tmp = jj[nzcount++]/bs; 1678 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1679 } 1680 } 1681 /* sort the masked values */ 1682 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1683 1684 /* set "j" values into matrix */ 1685 maskcount = 1; 1686 for (j=0; j<nmask; j++) { 1687 a->j[jcount++] = masked[j]; 1688 mask[masked[j]] = maskcount++; 1689 } 1690 /* set "a" values into matrix */ 1691 ishift = bs2*a->i[i]; 1692 for (j=0; j<bs; j++) { 1693 kmax = rowlengths[i*bs+j]; 1694 for (k=0; k<kmax; k++) { 1695 tmp = jj[nzcountb]/bs ; 1696 block = mask[tmp] - 1; 1697 point = jj[nzcountb] - bs*tmp; 1698 idx = ishift + bs2*block + j + bs*point; 1699 a->a[idx] = (MatScalar)aa[nzcountb++]; 1700 } 1701 } 1702 /* zero out the mask elements we set */ 1703 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1704 } 1705 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1706 1707 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1708 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1709 ierr = PetscFree(aa);CHKERRQ(ierr); 1710 ierr = PetscFree(jj);CHKERRQ(ierr); 1711 ierr = PetscFree(mask);CHKERRQ(ierr); 1712 1713 B->assembled = PETSC_TRUE; 1714 1715 ierr = MatView_Private(B);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 EXTERN_C_END 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatCreateSeqBAIJ" 1722 /*@C 1723 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1724 compressed row) format. For good matrix assembly performance the 1725 user should preallocate the matrix storage by setting the parameter nz 1726 (or the array nnz). By setting these parameters accurately, performance 1727 during matrix assembly can be increased by more than a factor of 50. 1728 1729 Collective on MPI_Comm 1730 1731 Input Parameters: 1732 + comm - MPI communicator, set to PETSC_COMM_SELF 1733 . bs - size of block 1734 . m - number of rows 1735 . n - number of columns 1736 . nz - number of nonzero blocks per block row (same for all rows) 1737 - nnz - array containing the number of nonzero blocks in the various block rows 1738 (possibly different for each block row) or PETSC_NULL 1739 1740 Output Parameter: 1741 . A - the matrix 1742 1743 Options Database Keys: 1744 . -mat_no_unroll - uses code that does not unroll the loops in the 1745 block calculations (much slower) 1746 . -mat_block_size - size of the blocks to use 1747 1748 Level: intermediate 1749 1750 Notes: 1751 A nonzero block is any block that as 1 or more nonzeros in it 1752 1753 The block AIJ format is fully compatible with standard Fortran 77 1754 storage. That is, the stored row and column indices can begin at 1755 either one (as in Fortran) or zero. See the users' manual for details. 1756 1757 Specify the preallocated storage with either nz or nnz (not both). 1758 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1759 allocation. For additional details, see the users manual chapter on 1760 matrices. 1761 1762 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1763 @*/ 1764 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1765 { 1766 int ierr; 1767 1768 PetscFunctionBegin; 1769 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1770 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1771 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1777 /*@C 1778 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1779 per row in the matrix. For good matrix assembly performance the 1780 user should preallocate the matrix storage by setting the parameter nz 1781 (or the array nnz). By setting these parameters accurately, performance 1782 during matrix assembly can be increased by more than a factor of 50. 1783 1784 Collective on MPI_Comm 1785 1786 Input Parameters: 1787 + A - the matrix 1788 . bs - size of block 1789 . nz - number of block nonzeros per block row (same for all rows) 1790 - nnz - array containing the number of block nonzeros in the various block rows 1791 (possibly different for each block row) or PETSC_NULL 1792 1793 Options Database Keys: 1794 . -mat_no_unroll - uses code that does not unroll the loops in the 1795 block calculations (much slower) 1796 . -mat_block_size - size of the blocks to use 1797 1798 Level: intermediate 1799 1800 Notes: 1801 The block AIJ format is fully compatible with standard Fortran 77 1802 storage. That is, the stored row and column indices can begin at 1803 either one (as in Fortran) or zero. See the users' manual for details. 1804 1805 Specify the preallocated storage with either nz or nnz (not both). 1806 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1807 allocation. For additional details, see the users manual chapter on 1808 matrices. 1809 1810 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1811 @*/ 1812 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1813 { 1814 Mat_SeqBAIJ *b; 1815 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1816 PetscTruth flg; 1817 1818 PetscFunctionBegin; 1819 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1820 if (!flg) PetscFunctionReturn(0); 1821 1822 B->preallocated = PETSC_TRUE; 1823 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1824 if (nnz && newbs != bs) { 1825 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1826 } 1827 bs = newbs; 1828 1829 mbs = B->m/bs; 1830 nbs = B->n/bs; 1831 bs2 = bs*bs; 1832 1833 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1834 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1835 } 1836 1837 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1838 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1839 if (nnz) { 1840 for (i=0; i<mbs; i++) { 1841 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1842 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1843 } 1844 } 1845 1846 b = (Mat_SeqBAIJ*)B->data; 1847 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1848 B->ops->solve = MatSolve_SeqBAIJ_Update; 1849 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1850 if (!flg) { 1851 switch (bs) { 1852 case 1: 1853 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1854 B->ops->mult = MatMult_SeqBAIJ_1; 1855 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1856 break; 1857 case 2: 1858 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1859 B->ops->mult = MatMult_SeqBAIJ_2; 1860 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1861 break; 1862 case 3: 1863 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1864 B->ops->mult = MatMult_SeqBAIJ_3; 1865 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1866 break; 1867 case 4: 1868 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1869 B->ops->mult = MatMult_SeqBAIJ_4; 1870 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1871 break; 1872 case 5: 1873 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1874 B->ops->mult = MatMult_SeqBAIJ_5; 1875 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1876 break; 1877 case 6: 1878 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1879 B->ops->mult = MatMult_SeqBAIJ_6; 1880 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1881 break; 1882 case 7: 1883 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1884 B->ops->mult = MatMult_SeqBAIJ_7; 1885 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1886 break; 1887 default: 1888 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1889 B->ops->mult = MatMult_SeqBAIJ_N; 1890 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1891 break; 1892 } 1893 } 1894 b->bs = bs; 1895 b->mbs = mbs; 1896 b->nbs = nbs; 1897 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1898 if (!nnz) { 1899 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1900 else if (nz <= 0) nz = 1; 1901 for (i=0; i<mbs; i++) b->imax[i] = nz; 1902 nz = nz*mbs; 1903 } else { 1904 nz = 0; 1905 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1906 } 1907 1908 /* allocate the matrix space */ 1909 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1910 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1911 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1912 b->j = (int*)(b->a + nz*bs2); 1913 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1914 b->i = b->j + nz; 1915 b->singlemalloc = PETSC_TRUE; 1916 1917 b->i[0] = 0; 1918 for (i=1; i<mbs+1; i++) { 1919 b->i[i] = b->i[i-1] + b->imax[i-1]; 1920 } 1921 1922 /* b->ilen will count nonzeros in each block row so far. */ 1923 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1924 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1925 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1926 1927 b->bs = bs; 1928 b->bs2 = bs2; 1929 b->mbs = mbs; 1930 b->nz = 0; 1931 b->maxnz = nz*bs2; 1932 B->info.nz_unneeded = (PetscReal)b->maxnz; 1933 PetscFunctionReturn(0); 1934 } 1935 1936