1 /*$Id: baij.c,v 1.185 1999/10/13 20:37:28 bsmith Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiag_SeqBAIJ" 19 int MatMissingDiag_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 22 int *diag = a->diag, *jj = a->j,i; 23 24 PetscFunctionBegin; 25 for ( i=0; i<a->mbs; i++ ) { 26 if (jj[diag[i]] != i) { 27 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 28 } 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNC__ 34 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 35 int MatMarkDiag_SeqBAIJ(Mat A) 36 { 37 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 38 int i,j, *diag, m = a->mbs; 39 40 PetscFunctionBegin; 41 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 42 PLogObjectMemory(A,(m+1)*sizeof(int)); 43 for ( i=0; i<m; i++ ) { 44 diag[i] = a->i[i+1]; 45 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 46 if (a->j[j] == i) { 47 diag[i] = j; 48 break; 49 } 50 } 51 } 52 a->diag = diag; 53 PetscFunctionReturn(0); 54 } 55 56 57 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 58 59 #undef __FUNC__ 60 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 61 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 62 PetscTruth *done) 63 { 64 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 65 int ierr, n = a->mbs,i; 66 67 PetscFunctionBegin; 68 *nn = n; 69 if (!ia) PetscFunctionReturn(0); 70 if (symmetric) { 71 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 72 } else if (oshift == 1) { 73 /* temporarily add 1 to i and j indices */ 74 int nz = a->i[n] + 1; 75 for ( i=0; i<nz; i++ ) a->j[i]++; 76 for ( i=0; i<n+1; i++ ) a->i[i]++; 77 *ia = a->i; *ja = a->j; 78 } else { 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNC__ 86 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 87 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 88 PetscTruth *done) 89 { 90 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 91 int i,n = a->mbs,ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 if (symmetric) { 96 ierr = PetscFree(*ia);CHKERRQ(ierr); 97 ierr = PetscFree(*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { 99 int nz = a->i[n]; 100 for ( i=0; i<nz; i++ ) a->j[i]--; 101 for ( i=0; i<n+1; i++ ) a->i[i]--; 102 } 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNC__ 107 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 108 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 109 { 110 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 111 112 PetscFunctionBegin; 113 *bs = baij->bs; 114 PetscFunctionReturn(0); 115 } 116 117 118 #undef __FUNC__ 119 #define __FUNC__ "MatDestroy_SeqBAIJ" 120 int MatDestroy_SeqBAIJ(Mat A) 121 { 122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 123 int ierr; 124 125 PetscFunctionBegin; 126 if (A->mapping) { 127 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 128 } 129 if (A->bmapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 131 } 132 if (A->rmap) { 133 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 134 } 135 if (A->cmap) { 136 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 137 } 138 #if defined(PETSC_USE_LOG) 139 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 140 #endif 141 ierr = PetscFree(a->a);CHKERRQ(ierr); 142 if (!a->singlemalloc) { 143 ierr = PetscFree(a->i);CHKERRQ(ierr); 144 ierr = PetscFree(a->j);CHKERRQ(ierr); 145 } 146 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 147 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 148 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 149 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 150 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 151 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 152 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 153 ierr = PetscFree(a);CHKERRQ(ierr); 154 PLogObjectDestroy(A); 155 PetscHeaderDestroy(A); 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNC__ 160 #define __FUNC__ "MatSetOption_SeqBAIJ" 161 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 162 { 163 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 164 165 PetscFunctionBegin; 166 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 167 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 168 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 169 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 170 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 171 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 172 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 173 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 174 else if (op == MAT_ROWS_SORTED || 175 op == MAT_ROWS_UNSORTED || 176 op == MAT_SYMMETRIC || 177 op == MAT_STRUCTURALLY_SYMMETRIC || 178 op == MAT_YES_NEW_DIAGONALS || 179 op == MAT_IGNORE_OFF_PROC_ENTRIES || 180 op == MAT_USE_HASH_TABLE) { 181 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 182 } else if (op == MAT_NO_NEW_DIAGONALS) { 183 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 184 } else { 185 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 191 #undef __FUNC__ 192 #define __FUNC__ "MatGetSize_SeqBAIJ" 193 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 194 { 195 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 196 197 PetscFunctionBegin; 198 if (m) *m = a->m; 199 if (n) *n = a->n; 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNC__ 204 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 205 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 206 { 207 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 208 209 PetscFunctionBegin; 210 *m = 0; *n = a->m; 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNC__ 215 #define __FUNC__ "MatGetRow_SeqBAIJ" 216 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 219 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 220 MatScalar *aa,*aa_i; 221 Scalar *v_i; 222 223 PetscFunctionBegin; 224 bs = a->bs; 225 ai = a->i; 226 aj = a->j; 227 aa = a->a; 228 bs2 = a->bs2; 229 230 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 231 232 bn = row/bs; /* Block number */ 233 bp = row % bs; /* Block Position */ 234 M = ai[bn+1] - ai[bn]; 235 *nz = bs*M; 236 237 if (v) { 238 *v = 0; 239 if (*nz) { 240 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 241 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 242 v_i = *v + i*bs; 243 aa_i = aa + bs2*(ai[bn] + i); 244 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 245 } 246 } 247 } 248 249 if (idx) { 250 *idx = 0; 251 if (*nz) { 252 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 253 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 254 idx_i = *idx + i*bs; 255 itmp = bs*aj[ai[bn] + i]; 256 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 257 } 258 } 259 } 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNC__ 264 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 265 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 266 { 267 int ierr; 268 269 PetscFunctionBegin; 270 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 271 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNC__ 276 #define __FUNC__ "MatTranspose_SeqBAIJ" 277 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 278 { 279 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 280 Mat C; 281 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 282 int *rows,*cols,bs2=a->bs2; 283 MatScalar *array = a->a; 284 285 PetscFunctionBegin; 286 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 287 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 288 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 289 290 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 291 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 292 ierr = PetscFree(col);CHKERRQ(ierr); 293 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 294 cols = rows + bs; 295 for ( i=0; i<mbs; i++ ) { 296 cols[0] = i*bs; 297 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 298 len = ai[i+1] - ai[i]; 299 for ( j=0; j<len; j++ ) { 300 rows[0] = (*aj++)*bs; 301 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 302 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 303 array += bs2; 304 } 305 } 306 ierr = PetscFree(rows);CHKERRQ(ierr); 307 308 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 309 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310 311 if (B != PETSC_NULL) { 312 *B = C; 313 } else { 314 PetscOps *Abops; 315 MatOps Aops; 316 317 /* This isn't really an in-place transpose */ 318 ierr = PetscFree(a->a);CHKERRQ(ierr); 319 if (!a->singlemalloc) { 320 ierr = PetscFree(a->i);CHKERRQ(ierr); 321 ierr = PetscFree(a->j);CHKERRQ(ierr); 322 } 323 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 324 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 325 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 326 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 327 ierr = PetscFree(a);CHKERRQ(ierr); 328 329 330 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 331 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 332 333 /* 334 This is horrible, horrible code. We need to keep the 335 A pointers for the bops and ops but copy everything 336 else from C. 337 */ 338 Abops = A->bops; 339 Aops = A->ops; 340 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 341 A->bops = Abops; 342 A->ops = Aops; 343 A->qlist = 0; 344 345 PetscHeaderDestroy(C); 346 } 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatView_SeqBAIJ_Binary" 352 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 353 { 354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 355 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 356 Scalar *aa; 357 FILE *file; 358 359 PetscFunctionBegin; 360 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 361 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 362 col_lens[0] = MAT_COOKIE; 363 364 col_lens[1] = a->m; 365 col_lens[2] = a->n; 366 col_lens[3] = a->nz*bs2; 367 368 /* store lengths of each row and write (including header) to file */ 369 count = 0; 370 for ( i=0; i<a->mbs; i++ ) { 371 for ( j=0; j<bs; j++ ) { 372 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 373 } 374 } 375 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 376 ierr = PetscFree(col_lens);CHKERRQ(ierr); 377 378 /* store column indices (zero start index) */ 379 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 380 count = 0; 381 for ( i=0; i<a->mbs; i++ ) { 382 for ( j=0; j<bs; j++ ) { 383 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 384 for ( l=0; l<bs; l++ ) { 385 jj[count++] = bs*a->j[k] + l; 386 } 387 } 388 } 389 } 390 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 391 ierr = PetscFree(jj);CHKERRQ(ierr); 392 393 /* store nonzero values */ 394 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 395 count = 0; 396 for ( i=0; i<a->mbs; i++ ) { 397 for ( j=0; j<bs; j++ ) { 398 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 399 for ( l=0; l<bs; l++ ) { 400 aa[count++] = a->a[bs2*k + l*bs + j]; 401 } 402 } 403 } 404 } 405 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 406 ierr = PetscFree(aa);CHKERRQ(ierr); 407 408 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 409 if (file) { 410 fprintf(file,"-matload_block_size %d\n",a->bs); 411 } 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNC__ 416 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 417 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 418 { 419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 420 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 421 char *outputname; 422 423 PetscFunctionBegin; 424 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 425 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 426 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 427 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 428 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 429 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 430 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 431 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 432 for ( i=0; i<a->mbs; i++ ) { 433 for ( j=0; j<bs; j++ ) { 434 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 435 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 436 for ( l=0; l<bs; l++ ) { 437 #if defined(PETSC_USE_COMPLEX) 438 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 439 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 440 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 441 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 442 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 443 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 444 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 445 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 446 } 447 #else 448 if (a->a[bs2*k + l*bs + j] != 0.0) { 449 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 450 } 451 #endif 452 } 453 } 454 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 455 } 456 } 457 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 458 } else { 459 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 460 for ( i=0; i<a->mbs; i++ ) { 461 for ( j=0; j<bs; j++ ) { 462 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 463 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 464 for ( l=0; l<bs; l++ ) { 465 #if defined(PETSC_USE_COMPLEX) 466 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 467 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 468 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 469 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 470 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 471 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 472 } else { 473 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 474 } 475 #else 476 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 477 #endif 478 } 479 } 480 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 481 } 482 } 483 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 484 } 485 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNC__ 490 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 491 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 492 { 493 Mat A = (Mat) Aa; 494 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 495 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 496 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 497 MatScalar *aa; 498 MPI_Comm comm; 499 Viewer viewer; 500 501 PetscFunctionBegin; 502 /* 503 This is nasty. If this is called from an originally parallel matrix 504 then all processes call this, but only the first has the matrix so the 505 rest should return immediately. 506 */ 507 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 508 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 509 if (rank) PetscFunctionReturn(0); 510 511 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 512 513 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 514 515 /* loop over matrix elements drawing boxes */ 516 color = DRAW_BLUE; 517 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 518 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 519 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 520 x_l = a->j[j]*bs; x_r = x_l + 1.0; 521 aa = a->a + j*bs2; 522 for ( k=0; k<bs; k++ ) { 523 for ( l=0; l<bs; l++ ) { 524 if (PetscReal(*aa++) >= 0.) continue; 525 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 526 } 527 } 528 } 529 } 530 color = DRAW_CYAN; 531 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 532 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 533 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 534 x_l = a->j[j]*bs; x_r = x_l + 1.0; 535 aa = a->a + j*bs2; 536 for ( k=0; k<bs; k++ ) { 537 for ( l=0; l<bs; l++ ) { 538 if (PetscReal(*aa++) != 0.) continue; 539 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 540 } 541 } 542 } 543 } 544 545 color = DRAW_RED; 546 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 547 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 548 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 549 x_l = a->j[j]*bs; x_r = x_l + 1.0; 550 aa = a->a + j*bs2; 551 for ( k=0; k<bs; k++ ) { 552 for ( l=0; l<bs; l++ ) { 553 if (PetscReal(*aa++) <= 0.) continue; 554 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNC__ 563 #define __FUNC__ "MatView_SeqBAIJ_Draw" 564 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 565 { 566 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 567 int ierr; 568 double xl,yl,xr,yr,w,h; 569 Draw draw; 570 PetscTruth isnull; 571 572 PetscFunctionBegin; 573 574 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 575 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 576 577 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 578 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 579 xr += w; yr += h; xl = -w; yl = -h; 580 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 581 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 582 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNC__ 587 #define __FUNC__ "MatView_SeqBAIJ" 588 int MatView_SeqBAIJ(Mat A,Viewer viewer) 589 { 590 int ierr; 591 PetscTruth issocket,isascii,isbinary,isdraw; 592 593 PetscFunctionBegin; 594 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 596 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 597 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 598 if (issocket) { 599 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 600 } else if (isascii){ 601 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 602 } else if (isbinary) { 603 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 604 } else if (isdraw) { 605 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatGetValues_SeqBAIJ" 615 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 616 { 617 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 618 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 619 int *ai = a->i, *ailen = a->ilen; 620 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 621 MatScalar *ap, *aa = a->a, zero = 0.0; 622 623 PetscFunctionBegin; 624 for ( k=0; k<m; k++ ) { /* loop over rows */ 625 row = im[k]; brow = row/bs; 626 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 627 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 628 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 629 nrow = ailen[brow]; 630 for ( l=0; l<n; l++ ) { /* loop over columns */ 631 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 632 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 633 col = in[l] ; 634 bcol = col/bs; 635 cidx = col%bs; 636 ridx = row%bs; 637 high = nrow; 638 low = 0; /* assume unsorted */ 639 while (high-low > 5) { 640 t = (low+high)/2; 641 if (rp[t] > bcol) high = t; 642 else low = t; 643 } 644 for ( i=low; i<high; i++ ) { 645 if (rp[i] > bcol) break; 646 if (rp[i] == bcol) { 647 *v++ = ap[bs2*i+bs*cidx+ridx]; 648 goto finished; 649 } 650 } 651 *v++ = zero; 652 finished:; 653 } 654 } 655 PetscFunctionReturn(0); 656 } 657 658 659 #undef __FUNC__ 660 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 661 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 662 { 663 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 664 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 665 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 666 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 667 Scalar *value = v; 668 MatScalar *ap,*aa=a->a,*bap; 669 670 PetscFunctionBegin; 671 if (roworiented) { 672 stepval = (n-1)*bs; 673 } else { 674 stepval = (m-1)*bs; 675 } 676 for ( k=0; k<m; k++ ) { /* loop over added rows */ 677 row = im[k]; 678 if (row < 0) continue; 679 #if defined(PETSC_USE_BOPT_g) 680 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 681 #endif 682 rp = aj + ai[row]; 683 ap = aa + bs2*ai[row]; 684 rmax = imax[row]; 685 nrow = ailen[row]; 686 low = 0; 687 for ( l=0; l<n; l++ ) { /* loop over added columns */ 688 if (in[l] < 0) continue; 689 #if defined(PETSC_USE_BOPT_g) 690 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 691 #endif 692 col = in[l]; 693 if (roworiented) { 694 value = v + k*(stepval+bs)*bs + l*bs; 695 } else { 696 value = v + l*(stepval+bs)*bs + k*bs; 697 } 698 if (!sorted) low = 0; high = nrow; 699 while (high-low > 7) { 700 t = (low+high)/2; 701 if (rp[t] > col) high = t; 702 else low = t; 703 } 704 for ( i=low; i<high; i++ ) { 705 if (rp[i] > col) break; 706 if (rp[i] == col) { 707 bap = ap + bs2*i; 708 if (roworiented) { 709 if (is == ADD_VALUES) { 710 for ( ii=0; ii<bs; ii++,value+=stepval ) { 711 for (jj=ii; jj<bs2; jj+=bs ) { 712 bap[jj] += *value++; 713 } 714 } 715 } else { 716 for ( ii=0; ii<bs; ii++,value+=stepval ) { 717 for (jj=ii; jj<bs2; jj+=bs ) { 718 bap[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 if (is == ADD_VALUES) { 724 for ( ii=0; ii<bs; ii++,value+=stepval ) { 725 for (jj=0; jj<bs; jj++ ) { 726 *bap++ += *value++; 727 } 728 } 729 } else { 730 for ( ii=0; ii<bs; ii++,value+=stepval ) { 731 for (jj=0; jj<bs; jj++ ) { 732 *bap++ = *value++; 733 } 734 } 735 } 736 } 737 goto noinsert2; 738 } 739 } 740 if (nonew == 1) goto noinsert2; 741 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 742 if (nrow >= rmax) { 743 /* there is no extra room in row, therefore enlarge */ 744 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 745 MatScalar *new_a; 746 747 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 748 749 /* malloc new storage space */ 750 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 751 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 752 new_j = (int *) (new_a + bs2*new_nz); 753 new_i = new_j + new_nz; 754 755 /* copy over old data into new slots */ 756 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 757 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 758 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 759 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 760 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 761 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 762 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 763 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 764 /* free up old matrix storage */ 765 ierr = PetscFree(a->a);CHKERRQ(ierr); 766 if (!a->singlemalloc) { 767 ierr = PetscFree(a->i);CHKERRQ(ierr); 768 ierr = PetscFree(a->j);CHKERRQ(ierr); 769 } 770 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 771 a->singlemalloc = 1; 772 773 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 774 rmax = imax[row] = imax[row] + CHUNKSIZE; 775 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 776 a->maxnz += bs2*CHUNKSIZE; 777 a->reallocs++; 778 a->nz++; 779 } 780 N = nrow++ - 1; 781 /* shift up all the later entries in this row */ 782 for ( ii=N; ii>=i; ii-- ) { 783 rp[ii+1] = rp[ii]; 784 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 785 } 786 if (N >= i) { 787 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 788 } 789 rp[i] = col; 790 bap = ap + bs2*i; 791 if (roworiented) { 792 for ( ii=0; ii<bs; ii++,value+=stepval) { 793 for (jj=ii; jj<bs2; jj+=bs ) { 794 bap[jj] = *value++; 795 } 796 } 797 } else { 798 for ( ii=0; ii<bs; ii++,value+=stepval ) { 799 for (jj=0; jj<bs; jj++ ) { 800 *bap++ = *value++; 801 } 802 } 803 } 804 noinsert2:; 805 low = i; 806 } 807 ailen[row] = nrow; 808 } 809 PetscFunctionReturn(0); 810 } 811 812 813 #undef __FUNC__ 814 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 815 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 816 { 817 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 818 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 819 int m = a->m,*ip, N, *ailen = a->ilen; 820 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 821 MatScalar *aa = a->a, *ap; 822 823 PetscFunctionBegin; 824 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 825 826 if (m) rmax = ailen[0]; 827 for ( i=1; i<mbs; i++ ) { 828 /* move each row back by the amount of empty slots (fshift) before it*/ 829 fshift += imax[i-1] - ailen[i-1]; 830 rmax = PetscMax(rmax,ailen[i]); 831 if (fshift) { 832 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 833 N = ailen[i]; 834 for ( j=0; j<N; j++ ) { 835 ip[j-fshift] = ip[j]; 836 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 837 } 838 } 839 ai[i] = ai[i-1] + ailen[i-1]; 840 } 841 if (mbs) { 842 fshift += imax[mbs-1] - ailen[mbs-1]; 843 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 844 } 845 /* reset ilen and imax for each row */ 846 for ( i=0; i<mbs; i++ ) { 847 ailen[i] = imax[i] = ai[i+1] - ai[i]; 848 } 849 a->nz = ai[mbs]; 850 851 /* diagonals may have moved, so kill the diagonal pointers */ 852 if (fshift && a->diag) { 853 ierr = PetscFree(a->diag);CHKERRQ(ierr); 854 PLogObjectMemory(A,-(m+1)*sizeof(int)); 855 a->diag = 0; 856 } 857 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 858 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 859 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 860 a->reallocs); 861 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 862 a->reallocs = 0; 863 A->info.nz_unneeded = (double)fshift*bs2; 864 865 PetscFunctionReturn(0); 866 } 867 868 869 870 /* 871 This function returns an array of flags which indicate the locations of contiguous 872 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 873 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 874 Assume: sizes should be long enough to hold all the values. 875 */ 876 #undef __FUNC__ 877 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 878 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 879 { 880 int i,j,k,row; 881 PetscTruth flg; 882 883 PetscFunctionBegin; 884 for ( i=0,j=0; i<n; j++ ) { 885 row = idx[i]; 886 if (row%bs!=0) { /* Not the begining of a block */ 887 sizes[j] = 1; 888 i++; 889 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 890 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 891 i++; 892 } else { /* Begining of the block, so check if the complete block exists */ 893 flg = PETSC_TRUE; 894 for ( k=1; k<bs; k++ ) { 895 if (row+k != idx[i+k]) { /* break in the block */ 896 flg = PETSC_FALSE; 897 break; 898 } 899 } 900 if (flg == PETSC_TRUE) { /* No break in the bs */ 901 sizes[j] = bs; 902 i+= bs; 903 } else { 904 sizes[j] = 1; 905 i++; 906 } 907 } 908 } 909 *bs_max = j; 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNC__ 914 #define __FUNC__ "MatZeroRows_SeqBAIJ" 915 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 916 { 917 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 918 int ierr,i,j,k,count,is_n,*is_idx,*rows; 919 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 920 Scalar zero = 0.0; 921 MatScalar *aa; 922 923 PetscFunctionBegin; 924 /* Make a copy of the IS and sort it */ 925 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 926 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 927 928 /* allocate memory for rows,sizes */ 929 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 930 sizes = rows + is_n; 931 932 /* initialize copy IS valurs to rows, and sort them */ 933 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 934 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 935 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 936 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 937 938 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 939 row = rows[j]; 940 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 941 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 942 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 943 if (sizes[i] == bs) { 944 if (diag) { 945 if (baij->ilen[row/bs] > 0) { 946 baij->ilen[row/bs] = 1; 947 baij->j[baij->i[row/bs]] = row/bs; 948 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 949 } 950 /* Now insert all the diagoanl values for this bs */ 951 for ( k=0; k<bs; k++ ) { 952 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 953 } 954 } else { /* (!diag) */ 955 baij->ilen[row/bs] = 0; 956 } /* end (!diag) */ 957 } else { /* (sizes[i] != bs) */ 958 #if defined (PETSC_USE_DEBUG) 959 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 960 #endif 961 for ( k=0; k<count; k++ ) { 962 aa[0] = zero; 963 aa+=bs; 964 } 965 if (diag) { 966 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 967 } 968 } 969 } 970 971 ierr = PetscFree(rows);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNC__ 977 #define __FUNC__ "MatSetValues_SeqBAIJ" 978 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 979 { 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 981 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 982 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 983 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 984 int ridx,cidx,bs2=a->bs2,ierr; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 987 PetscFunctionBegin; 988 for ( k=0; k<m; k++ ) { /* loop over added rows */ 989 row = im[k]; brow = row/bs; 990 if (row < 0) continue; 991 #if defined(PETSC_USE_BOPT_g) 992 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 993 #endif 994 rp = aj + ai[brow]; 995 ap = aa + bs2*ai[brow]; 996 rmax = imax[brow]; 997 nrow = ailen[brow]; 998 low = 0; 999 for ( l=0; l<n; l++ ) { /* loop over added columns */ 1000 if (in[l] < 0) continue; 1001 #if defined(PETSC_USE_BOPT_g) 1002 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1003 #endif 1004 col = in[l]; bcol = col/bs; 1005 ridx = row % bs; cidx = col % bs; 1006 if (roworiented) { 1007 value = v[l + k*n]; 1008 } else { 1009 value = v[k + l*m]; 1010 } 1011 if (!sorted) low = 0; high = nrow; 1012 while (high-low > 7) { 1013 t = (low+high)/2; 1014 if (rp[t] > bcol) high = t; 1015 else low = t; 1016 } 1017 for ( i=low; i<high; i++ ) { 1018 if (rp[i] > bcol) break; 1019 if (rp[i] == bcol) { 1020 bap = ap + bs2*i + bs*cidx + ridx; 1021 if (is == ADD_VALUES) *bap += value; 1022 else *bap = value; 1023 goto noinsert1; 1024 } 1025 } 1026 if (nonew == 1) goto noinsert1; 1027 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1028 if (nrow >= rmax) { 1029 /* there is no extra room in row, therefore enlarge */ 1030 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1031 MatScalar *new_a; 1032 1033 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1034 1035 /* Malloc new storage space */ 1036 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1037 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1038 new_j = (int *) (new_a + bs2*new_nz); 1039 new_i = new_j + new_nz; 1040 1041 /* copy over old data into new slots */ 1042 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1043 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1044 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1045 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1046 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1047 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1048 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1049 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1050 /* free up old matrix storage */ 1051 ierr = PetscFree(a->a);CHKERRQ(ierr); 1052 if (!a->singlemalloc) { 1053 ierr = PetscFree(a->i);CHKERRQ(ierr); 1054 ierr = PetscFree(a->j);CHKERRQ(ierr); 1055 } 1056 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1057 a->singlemalloc = 1; 1058 1059 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1060 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1061 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1062 a->maxnz += bs2*CHUNKSIZE; 1063 a->reallocs++; 1064 a->nz++; 1065 } 1066 N = nrow++ - 1; 1067 /* shift up all the later entries in this row */ 1068 for ( ii=N; ii>=i; ii-- ) { 1069 rp[ii+1] = rp[ii]; 1070 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1071 } 1072 if (N>=i) { 1073 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1074 } 1075 rp[i] = bcol; 1076 ap[bs2*i + bs*cidx + ridx] = value; 1077 noinsert1:; 1078 low = i; 1079 } 1080 ailen[brow] = nrow; 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1086 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1087 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1088 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1089 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1090 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1091 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1092 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1093 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1094 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1095 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1096 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1097 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1098 extern int MatZeroEntries_SeqBAIJ(Mat); 1099 1100 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1101 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1102 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1103 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1104 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1105 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1106 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1107 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1108 1109 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1110 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1111 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1112 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1113 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1114 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1115 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1116 1117 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1118 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1119 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1120 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1121 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1122 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1123 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1124 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1125 1126 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1127 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1128 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1129 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1130 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1131 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1132 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1133 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1134 1135 #undef __FUNC__ 1136 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1137 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1138 { 1139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1140 Mat outA; 1141 int ierr; 1142 PetscTruth row_identity, col_identity; 1143 1144 PetscFunctionBegin; 1145 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1146 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1147 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1148 if (!row_identity || !col_identity) { 1149 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1150 } 1151 1152 outA = inA; 1153 inA->factor = FACTOR_LU; 1154 a->row = row; 1155 a->col = col; 1156 1157 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1158 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1159 PLogObjectParent(inA,a->icol); 1160 1161 if (!a->solve_work) { 1162 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1163 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1164 } 1165 1166 if (!a->diag) { 1167 ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr); 1168 } 1169 /* 1170 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1171 for ILU(0) factorization with natural ordering 1172 */ 1173 switch (a->bs) { 1174 case 2: 1175 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1176 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1177 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1178 break; 1179 case 3: 1180 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1181 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1182 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1183 break; 1184 case 4: 1185 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1186 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1187 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1188 break; 1189 case 5: 1190 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1191 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1192 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1193 break; 1194 case 6: 1195 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1196 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1197 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1198 break; 1199 case 7: 1200 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1201 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1202 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1203 break; 1204 } 1205 1206 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1207 1208 PetscFunctionReturn(0); 1209 } 1210 #undef __FUNC__ 1211 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1212 int MatPrintHelp_SeqBAIJ(Mat A) 1213 { 1214 static int called = 0; 1215 MPI_Comm comm = A->comm; 1216 int ierr; 1217 1218 PetscFunctionBegin; 1219 if (called) {PetscFunctionReturn(0);} else called = 1; 1220 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1221 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 EXTERN_C_BEGIN 1226 #undef __FUNC__ 1227 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1228 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1229 { 1230 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1231 int i,nz,n; 1232 1233 PetscFunctionBegin; 1234 nz = baij->maxnz; 1235 n = baij->n; 1236 for (i=0; i<nz; i++) { 1237 baij->j[i] = indices[i]; 1238 } 1239 baij->nz = nz; 1240 for ( i=0; i<n; i++ ) { 1241 baij->ilen[i] = baij->imax[i]; 1242 } 1243 1244 PetscFunctionReturn(0); 1245 } 1246 EXTERN_C_END 1247 1248 #undef __FUNC__ 1249 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1250 /*@ 1251 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1252 in the matrix. 1253 1254 Input Parameters: 1255 + mat - the SeqBAIJ matrix 1256 - indices - the column indices 1257 1258 Level: advanced 1259 1260 Notes: 1261 This can be called if you have precomputed the nonzero structure of the 1262 matrix and want to provide it to the matrix object to improve the performance 1263 of the MatSetValues() operation. 1264 1265 You MUST have set the correct numbers of nonzeros per row in the call to 1266 MatCreateSeqBAIJ(). 1267 1268 MUST be called before any calls to MatSetValues(); 1269 1270 @*/ 1271 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1272 { 1273 int ierr,(*f)(Mat,int *); 1274 1275 PetscFunctionBegin; 1276 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1277 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1278 if (f) { 1279 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1280 } else { 1281 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1282 } 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /* -------------------------------------------------------------------*/ 1287 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1288 MatGetRow_SeqBAIJ, 1289 MatRestoreRow_SeqBAIJ, 1290 MatMult_SeqBAIJ_N, 1291 MatMultAdd_SeqBAIJ_N, 1292 MatMultTrans_SeqBAIJ, 1293 MatMultTransAdd_SeqBAIJ, 1294 MatSolve_SeqBAIJ_N, 1295 0, 1296 0, 1297 0, 1298 MatLUFactor_SeqBAIJ, 1299 0, 1300 0, 1301 MatTranspose_SeqBAIJ, 1302 MatGetInfo_SeqBAIJ, 1303 MatEqual_SeqBAIJ, 1304 MatGetDiagonal_SeqBAIJ, 1305 MatDiagonalScale_SeqBAIJ, 1306 MatNorm_SeqBAIJ, 1307 0, 1308 MatAssemblyEnd_SeqBAIJ, 1309 0, 1310 MatSetOption_SeqBAIJ, 1311 MatZeroEntries_SeqBAIJ, 1312 MatZeroRows_SeqBAIJ, 1313 MatLUFactorSymbolic_SeqBAIJ, 1314 MatLUFactorNumeric_SeqBAIJ_N, 1315 0, 1316 0, 1317 MatGetSize_SeqBAIJ, 1318 MatGetSize_SeqBAIJ, 1319 MatGetOwnershipRange_SeqBAIJ, 1320 MatILUFactorSymbolic_SeqBAIJ, 1321 0, 1322 0, 1323 0, 1324 MatDuplicate_SeqBAIJ, 1325 0, 1326 0, 1327 MatILUFactor_SeqBAIJ, 1328 0, 1329 0, 1330 MatGetSubMatrices_SeqBAIJ, 1331 MatIncreaseOverlap_SeqBAIJ, 1332 MatGetValues_SeqBAIJ, 1333 0, 1334 MatPrintHelp_SeqBAIJ, 1335 MatScale_SeqBAIJ, 1336 0, 1337 0, 1338 0, 1339 MatGetBlockSize_SeqBAIJ, 1340 MatGetRowIJ_SeqBAIJ, 1341 MatRestoreRowIJ_SeqBAIJ, 1342 0, 1343 0, 1344 0, 1345 0, 1346 0, 1347 0, 1348 MatSetValuesBlocked_SeqBAIJ, 1349 MatGetSubMatrix_SeqBAIJ, 1350 0, 1351 0, 1352 MatGetMaps_Petsc}; 1353 1354 EXTERN_C_BEGIN 1355 #undef __FUNC__ 1356 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1357 int MatStoreValues_SeqBAIJ(Mat mat) 1358 { 1359 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1360 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1361 int ierr; 1362 1363 PetscFunctionBegin; 1364 if (aij->nonew != 1) { 1365 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1366 } 1367 1368 /* allocate space for values if not already there */ 1369 if (!aij->saved_values) { 1370 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1371 } 1372 1373 /* copy values over */ 1374 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 EXTERN_C_END 1378 1379 EXTERN_C_BEGIN 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1382 int MatRetrieveValues_SeqBAIJ(Mat mat) 1383 { 1384 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1385 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1386 1387 PetscFunctionBegin; 1388 if (aij->nonew != 1) { 1389 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1390 } 1391 if (!aij->saved_values) { 1392 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1393 } 1394 1395 /* copy values over */ 1396 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 EXTERN_C_END 1400 1401 #undef __FUNC__ 1402 #define __FUNC__ "MatCreateSeqBAIJ" 1403 /*@C 1404 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1405 compressed row) format. For good matrix assembly performance the 1406 user should preallocate the matrix storage by setting the parameter nz 1407 (or the array nnz). By setting these parameters accurately, performance 1408 during matrix assembly can be increased by more than a factor of 50. 1409 1410 Collective on MPI_Comm 1411 1412 Input Parameters: 1413 + comm - MPI communicator, set to PETSC_COMM_SELF 1414 . bs - size of block 1415 . m - number of rows 1416 . n - number of columns 1417 . nz - number of block nonzeros per block row (same for all rows) 1418 - nnz - array containing the number of block nonzeros in the various block rows 1419 (possibly different for each block row) or PETSC_NULL 1420 1421 Output Parameter: 1422 . A - the matrix 1423 1424 Options Database Keys: 1425 . -mat_no_unroll - uses code that does not unroll the loops in the 1426 block calculations (much slower) 1427 . -mat_block_size - size of the blocks to use 1428 1429 Level: intermediate 1430 1431 Notes: 1432 The block AIJ format is fully compatible with standard Fortran 77 1433 storage. That is, the stored row and column indices can begin at 1434 either one (as in Fortran) or zero. See the users' manual for details. 1435 1436 Specify the preallocated storage with either nz or nnz (not both). 1437 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1438 allocation. For additional details, see the users manual chapter on 1439 matrices. 1440 1441 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1442 @*/ 1443 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1444 { 1445 Mat B; 1446 Mat_SeqBAIJ *b; 1447 int i,len,ierr,flg,mbs,nbs,bs2,size; 1448 1449 PetscFunctionBegin; 1450 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1451 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1452 1453 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1454 mbs = m/bs; 1455 nbs = n/bs; 1456 bs2 = bs*bs; 1457 1458 if (mbs*bs!=m || nbs*bs!=n) { 1459 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1460 } 1461 1462 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1463 if (nnz) { 1464 for (i=0; i<mbs; i++) { 1465 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1466 } 1467 } 1468 1469 *A = 0; 1470 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1471 PLogObjectCreate(B); 1472 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1473 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1474 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1475 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1476 if (!flg) { 1477 switch (bs) { 1478 case 1: 1479 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1480 B->ops->solve = MatSolve_SeqBAIJ_1; 1481 B->ops->mult = MatMult_SeqBAIJ_1; 1482 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1483 break; 1484 case 2: 1485 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1486 B->ops->solve = MatSolve_SeqBAIJ_2; 1487 B->ops->mult = MatMult_SeqBAIJ_2; 1488 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1489 break; 1490 case 3: 1491 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1492 B->ops->solve = MatSolve_SeqBAIJ_3; 1493 B->ops->mult = MatMult_SeqBAIJ_3; 1494 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1495 break; 1496 case 4: 1497 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1498 B->ops->solve = MatSolve_SeqBAIJ_4; 1499 B->ops->mult = MatMult_SeqBAIJ_4; 1500 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1501 break; 1502 case 5: 1503 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1504 B->ops->solve = MatSolve_SeqBAIJ_5; 1505 B->ops->mult = MatMult_SeqBAIJ_5; 1506 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1507 break; 1508 case 6: 1509 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1510 B->ops->solve = MatSolve_SeqBAIJ_6; 1511 B->ops->mult = MatMult_SeqBAIJ_6; 1512 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1513 break; 1514 case 7: 1515 B->ops->mult = MatMult_SeqBAIJ_7; 1516 B->ops->solve = MatSolve_SeqBAIJ_7; 1517 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1518 break; 1519 } 1520 } 1521 B->ops->destroy = MatDestroy_SeqBAIJ; 1522 B->ops->view = MatView_SeqBAIJ; 1523 B->factor = 0; 1524 B->lupivotthreshold = 1.0; 1525 B->mapping = 0; 1526 b->row = 0; 1527 b->col = 0; 1528 b->icol = 0; 1529 b->reallocs = 0; 1530 b->saved_values = 0; 1531 1532 b->m = m; B->m = m; B->M = m; 1533 b->n = n; B->n = n; B->N = n; 1534 1535 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1536 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1537 1538 b->mbs = mbs; 1539 b->nbs = nbs; 1540 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1541 if (nnz == PETSC_NULL) { 1542 if (nz == PETSC_DEFAULT) nz = 5; 1543 else if (nz <= 0) nz = 1; 1544 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1545 nz = nz*mbs; 1546 } else { 1547 nz = 0; 1548 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1549 } 1550 1551 /* allocate the matrix space */ 1552 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1553 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1554 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1555 b->j = (int *) (b->a + nz*bs2); 1556 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1557 b->i = b->j + nz; 1558 b->singlemalloc = 1; 1559 1560 b->i[0] = 0; 1561 for (i=1; i<mbs+1; i++) { 1562 b->i[i] = b->i[i-1] + b->imax[i-1]; 1563 } 1564 1565 /* b->ilen will count nonzeros in each block row so far. */ 1566 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1567 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1568 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1569 1570 b->bs = bs; 1571 b->bs2 = bs2; 1572 b->mbs = mbs; 1573 b->nz = 0; 1574 b->maxnz = nz*bs2; 1575 b->sorted = 0; 1576 b->roworiented = 1; 1577 b->nonew = 0; 1578 b->diag = 0; 1579 b->solve_work = 0; 1580 b->mult_work = 0; 1581 b->spptr = 0; 1582 B->info.nz_unneeded = (double)b->maxnz; 1583 1584 *A = B; 1585 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1586 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1587 1588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1589 "MatStoreValues_SeqBAIJ", 1590 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1592 "MatRetrieveValues_SeqBAIJ", 1593 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1595 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1596 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNC__ 1601 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1602 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1603 { 1604 Mat C; 1605 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1606 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1607 1608 PetscFunctionBegin; 1609 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1610 1611 *B = 0; 1612 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1613 PLogObjectCreate(C); 1614 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1615 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1616 C->ops->destroy = MatDestroy_SeqBAIJ; 1617 C->ops->view = MatView_SeqBAIJ; 1618 C->factor = A->factor; 1619 c->row = 0; 1620 c->col = 0; 1621 c->icol = 0; 1622 c->saved_values = 0; 1623 C->assembled = PETSC_TRUE; 1624 1625 c->m = C->m = a->m; 1626 c->n = C->n = a->n; 1627 C->M = a->m; 1628 C->N = a->n; 1629 1630 c->bs = a->bs; 1631 c->bs2 = a->bs2; 1632 c->mbs = a->mbs; 1633 c->nbs = a->nbs; 1634 1635 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1636 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1637 for ( i=0; i<mbs; i++ ) { 1638 c->imax[i] = a->imax[i]; 1639 c->ilen[i] = a->ilen[i]; 1640 } 1641 1642 /* allocate the matrix space */ 1643 c->singlemalloc = 1; 1644 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1645 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1646 c->j = (int *) (c->a + nz*bs2); 1647 c->i = c->j + nz; 1648 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1649 if (mbs > 0) { 1650 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1651 if (cpvalues == MAT_COPY_VALUES) { 1652 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1653 } else { 1654 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1655 } 1656 } 1657 1658 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1659 c->sorted = a->sorted; 1660 c->roworiented = a->roworiented; 1661 c->nonew = a->nonew; 1662 1663 if (a->diag) { 1664 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1665 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1666 for ( i=0; i<mbs; i++ ) { 1667 c->diag[i] = a->diag[i]; 1668 } 1669 } else c->diag = 0; 1670 c->nz = a->nz; 1671 c->maxnz = a->maxnz; 1672 c->solve_work = 0; 1673 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1674 c->mult_work = 0; 1675 *B = C; 1676 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNC__ 1681 #define __FUNC__ "MatLoad_SeqBAIJ" 1682 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1683 { 1684 Mat_SeqBAIJ *a; 1685 Mat B; 1686 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1687 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1688 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1689 int *masked, nmask,tmp,bs2,ishift; 1690 Scalar *aa; 1691 MPI_Comm comm = ((PetscObject) viewer)->comm; 1692 1693 PetscFunctionBegin; 1694 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1695 bs2 = bs*bs; 1696 1697 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1698 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1699 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1700 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1701 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1702 M = header[1]; N = header[2]; nz = header[3]; 1703 1704 if (header[3] < 0) { 1705 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1706 } 1707 1708 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1709 1710 /* 1711 This code adds extra rows to make sure the number of rows is 1712 divisible by the blocksize 1713 */ 1714 mbs = M/bs; 1715 extra_rows = bs - M + bs*(mbs); 1716 if (extra_rows == bs) extra_rows = 0; 1717 else mbs++; 1718 if (extra_rows) { 1719 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1720 } 1721 1722 /* read in row lengths */ 1723 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1724 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1725 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1726 1727 /* read in column indices */ 1728 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1729 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1730 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1731 1732 /* loop over row lengths determining block row lengths */ 1733 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1734 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1735 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1736 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1737 masked = mask + mbs; 1738 rowcount = 0; nzcount = 0; 1739 for ( i=0; i<mbs; i++ ) { 1740 nmask = 0; 1741 for ( j=0; j<bs; j++ ) { 1742 kmax = rowlengths[rowcount]; 1743 for ( k=0; k<kmax; k++ ) { 1744 tmp = jj[nzcount++]/bs; 1745 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1746 } 1747 rowcount++; 1748 } 1749 browlengths[i] += nmask; 1750 /* zero out the mask elements we set */ 1751 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1752 } 1753 1754 /* create our matrix */ 1755 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1756 B = *A; 1757 a = (Mat_SeqBAIJ *) B->data; 1758 1759 /* set matrix "i" values */ 1760 a->i[0] = 0; 1761 for ( i=1; i<= mbs; i++ ) { 1762 a->i[i] = a->i[i-1] + browlengths[i-1]; 1763 a->ilen[i-1] = browlengths[i-1]; 1764 } 1765 a->nz = 0; 1766 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1767 1768 /* read in nonzero values */ 1769 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1770 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1771 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1772 1773 /* set "a" and "j" values into matrix */ 1774 nzcount = 0; jcount = 0; 1775 for ( i=0; i<mbs; i++ ) { 1776 nzcountb = nzcount; 1777 nmask = 0; 1778 for ( j=0; j<bs; j++ ) { 1779 kmax = rowlengths[i*bs+j]; 1780 for ( k=0; k<kmax; k++ ) { 1781 tmp = jj[nzcount++]/bs; 1782 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1783 } 1784 rowcount++; 1785 } 1786 /* sort the masked values */ 1787 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1788 1789 /* set "j" values into matrix */ 1790 maskcount = 1; 1791 for ( j=0; j<nmask; j++ ) { 1792 a->j[jcount++] = masked[j]; 1793 mask[masked[j]] = maskcount++; 1794 } 1795 /* set "a" values into matrix */ 1796 ishift = bs2*a->i[i]; 1797 for ( j=0; j<bs; j++ ) { 1798 kmax = rowlengths[i*bs+j]; 1799 for ( k=0; k<kmax; k++ ) { 1800 tmp = jj[nzcountb]/bs ; 1801 block = mask[tmp] - 1; 1802 point = jj[nzcountb] - bs*tmp; 1803 idx = ishift + bs2*block + j + bs*point; 1804 a->a[idx] = aa[nzcountb++]; 1805 } 1806 } 1807 /* zero out the mask elements we set */ 1808 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1809 } 1810 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1811 1812 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1813 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1814 ierr = PetscFree(aa);CHKERRQ(ierr); 1815 ierr = PetscFree(jj);CHKERRQ(ierr); 1816 ierr = PetscFree(mask);CHKERRQ(ierr); 1817 1818 B->assembled = PETSC_TRUE; 1819 1820 ierr = MatView_Private(B);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 1825 1826