| 1 |
/* $OpenBSD: pat_rep.c,v 1.30 2005/08/05 08:30:10 djm Exp $ */ |
| 2 |
/* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */ |
| 3 |
|
| 4 |
/*- |
| 5 |
* Copyright (c) 1992 Keith Muller. |
| 6 |
* Copyright (c) 1992, 1993 |
| 7 |
* The Regents of the University of California. All rights reserved. |
| 8 |
* |
| 9 |
* This code is derived from software contributed to Berkeley by |
| 10 |
* Keith Muller of the University of California, San Diego. |
| 11 |
* |
| 12 |
* Redistribution and use in source and binary forms, with or without |
| 13 |
* modification, are permitted provided that the following conditions |
| 14 |
* are met: |
| 15 |
* 1. Redistributions of source code must retain the above copyright |
| 16 |
* notice, this list of conditions and the following disclaimer. |
| 17 |
* 2. Redistributions in binary form must reproduce the above copyright |
| 18 |
* notice, this list of conditions and the following disclaimer in the |
| 19 |
* documentation and/or other materials provided with the distribution. |
| 20 |
* 3. Neither the name of the University nor the names of its contributors |
| 21 |
* may be used to endorse or promote products derived from this software |
| 22 |
* without specific prior written permission. |
| 23 |
* |
| 24 |
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 25 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 26 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 27 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 28 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 29 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 30 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 31 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 32 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 33 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 34 |
* SUCH DAMAGE. |
| 35 |
*/ |
| 36 |
|
| 37 |
#include <sys/param.h> |
| 38 |
#include <sys/time.h> |
| 39 |
#include <sys/stat.h> |
| 40 |
#include <stdio.h> |
| 41 |
#include <string.h> |
| 42 |
#include <unistd.h> |
| 43 |
#include <stdlib.h> |
| 44 |
#include <errno.h> |
| 45 |
#include <regex.h> |
| 46 |
#include "pax.h" |
| 47 |
#include "pat_rep.h" |
| 48 |
#include "extern.h" |
| 49 |
|
| 50 |
__SCCSID("@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"); |
| 51 |
__RCSID("$MirOS: src/bin/pax/pat_rep.c,v 1.2 2007/02/17 04:52:41 tg Exp $"); |
| 52 |
|
| 53 |
/* |
| 54 |
* routines to handle pattern matching, name modification (regular expression |
| 55 |
* substitution and interactive renames), and destination name modification for |
| 56 |
* copy (-rw). Both file name and link names are adjusted as required in these |
| 57 |
* routines. |
| 58 |
*/ |
| 59 |
|
| 60 |
#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ |
| 61 |
static PATTERN *pathead = NULL; /* file pattern match list head */ |
| 62 |
static PATTERN *pattail = NULL; /* file pattern match list tail */ |
| 63 |
static REPLACE *rephead = NULL; /* replacement string list head */ |
| 64 |
static REPLACE *reptail = NULL; /* replacement string list tail */ |
| 65 |
|
| 66 |
static int rep_name(char *, size_t, int *, int); |
| 67 |
static int tty_rename(ARCHD *); |
| 68 |
static int fix_path(char *, int *, char *, int); |
| 69 |
static int fn_match(char *, char *, char **); |
| 70 |
static char * range_match(char *, int); |
| 71 |
static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); |
| 72 |
|
| 73 |
/* |
| 74 |
* rep_add() |
| 75 |
* parses the -s replacement string; compiles the regular expression |
| 76 |
* and stores the compiled value and it's replacement string together in |
| 77 |
* replacement string list. Input to this function is of the form: |
| 78 |
* /old/new/pg |
| 79 |
* The first char in the string specifies the delimiter used by this |
| 80 |
* replacement string. "Old" is a regular expression in "ed" format which |
| 81 |
* is compiled by regcomp() and is applied to filenames. "new" is the |
| 82 |
* substitution string; p and g are options flags for printing and global |
| 83 |
* replacement (over the single filename) |
| 84 |
* Return: |
| 85 |
* 0 if a proper replacement string and regular expression was added to |
| 86 |
* the list of replacement patterns; -1 otherwise. |
| 87 |
*/ |
| 88 |
|
| 89 |
int |
| 90 |
rep_add(char *str) |
| 91 |
{ |
| 92 |
char *pt1; |
| 93 |
char *pt2; |
| 94 |
REPLACE *rep; |
| 95 |
int res; |
| 96 |
char rebuf[BUFSIZ]; |
| 97 |
|
| 98 |
/* |
| 99 |
* throw out the bad parameters |
| 100 |
*/ |
| 101 |
if ((str == NULL) || (*str == '\0')) { |
| 102 |
paxwarn(1, "Empty replacement string"); |
| 103 |
return(-1); |
| 104 |
} |
| 105 |
|
| 106 |
/* |
| 107 |
* first character in the string specifies what the delimiter is for |
| 108 |
* this expression |
| 109 |
*/ |
| 110 |
for (pt1 = str+1; *pt1; pt1++) { |
| 111 |
if (*pt1 == '\\') { |
| 112 |
pt1++; |
| 113 |
continue; |
| 114 |
} |
| 115 |
if (*pt1 == *str) |
| 116 |
break; |
| 117 |
} |
| 118 |
if (*pt1 == '\0') { |
| 119 |
paxwarn(1, "Invalid replacement string %s", str); |
| 120 |
return(-1); |
| 121 |
} |
| 122 |
|
| 123 |
/* |
| 124 |
* allocate space for the node that handles this replacement pattern |
| 125 |
* and split out the regular expression and try to compile it |
| 126 |
*/ |
| 127 |
if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { |
| 128 |
paxwarn(1, "Unable to allocate memory for replacement string"); |
| 129 |
return(-1); |
| 130 |
} |
| 131 |
|
| 132 |
*pt1 = '\0'; |
| 133 |
if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { |
| 134 |
regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); |
| 135 |
paxwarn(1, "%s while compiling regular expression %s", rebuf, str); |
| 136 |
(void)free((char *)rep); |
| 137 |
return(-1); |
| 138 |
} |
| 139 |
|
| 140 |
/* |
| 141 |
* put the delimiter back in case we need an error message and |
| 142 |
* locate the delimiter at the end of the replacement string |
| 143 |
* we then point the node at the new substitution string |
| 144 |
*/ |
| 145 |
*pt1++ = *str; |
| 146 |
for (pt2 = pt1; *pt2; pt2++) { |
| 147 |
if (*pt2 == '\\') { |
| 148 |
pt2++; |
| 149 |
continue; |
| 150 |
} |
| 151 |
if (*pt2 == *str) |
| 152 |
break; |
| 153 |
} |
| 154 |
if (*pt2 == '\0') { |
| 155 |
regfree(&(rep->rcmp)); |
| 156 |
(void)free((char *)rep); |
| 157 |
paxwarn(1, "Invalid replacement string %s", str); |
| 158 |
return(-1); |
| 159 |
} |
| 160 |
|
| 161 |
*pt2 = '\0'; |
| 162 |
rep->nstr = pt1; |
| 163 |
pt1 = pt2++; |
| 164 |
rep->flgs = 0; |
| 165 |
|
| 166 |
/* |
| 167 |
* set the options if any |
| 168 |
*/ |
| 169 |
while (*pt2 != '\0') { |
| 170 |
switch (*pt2) { |
| 171 |
case 'g': |
| 172 |
case 'G': |
| 173 |
rep->flgs |= GLOB; |
| 174 |
break; |
| 175 |
case 'p': |
| 176 |
case 'P': |
| 177 |
rep->flgs |= PRNT; |
| 178 |
break; |
| 179 |
default: |
| 180 |
regfree(&(rep->rcmp)); |
| 181 |
(void)free((char *)rep); |
| 182 |
*pt1 = *str; |
| 183 |
paxwarn(1, "Invalid replacement string option %s", str); |
| 184 |
return(-1); |
| 185 |
} |
| 186 |
++pt2; |
| 187 |
} |
| 188 |
|
| 189 |
/* |
| 190 |
* all done, link it in at the end |
| 191 |
*/ |
| 192 |
rep->fow = NULL; |
| 193 |
if (rephead == NULL) { |
| 194 |
reptail = rephead = rep; |
| 195 |
return(0); |
| 196 |
} |
| 197 |
reptail->fow = rep; |
| 198 |
reptail = rep; |
| 199 |
return(0); |
| 200 |
} |
| 201 |
|
| 202 |
/* |
| 203 |
* pat_add() |
| 204 |
* add a pattern match to the pattern match list. Pattern matches are used |
| 205 |
* to select which archive members are extracted. (They appear as |
| 206 |
* arguments to pax in the list and read modes). If no patterns are |
| 207 |
* supplied to pax, all members in the archive will be selected (and the |
| 208 |
* pattern match list is empty). |
| 209 |
* Return: |
| 210 |
* 0 if the pattern was added to the list, -1 otherwise |
| 211 |
*/ |
| 212 |
|
| 213 |
int |
| 214 |
pat_add(char *str, char *chd_name) |
| 215 |
{ |
| 216 |
PATTERN *pt; |
| 217 |
|
| 218 |
/* |
| 219 |
* throw out the junk |
| 220 |
*/ |
| 221 |
if ((str == NULL) || (*str == '\0')) { |
| 222 |
paxwarn(1, "Empty pattern string"); |
| 223 |
return(-1); |
| 224 |
} |
| 225 |
|
| 226 |
/* |
| 227 |
* allocate space for the pattern and store the pattern. the pattern is |
| 228 |
* part of argv so do not bother to copy it, just point at it. Add the |
| 229 |
* node to the end of the pattern list |
| 230 |
*/ |
| 231 |
if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { |
| 232 |
paxwarn(1, "Unable to allocate memory for pattern string"); |
| 233 |
return(-1); |
| 234 |
} |
| 235 |
|
| 236 |
pt->pstr = str; |
| 237 |
pt->pend = NULL; |
| 238 |
pt->plen = strlen(str); |
| 239 |
pt->fow = NULL; |
| 240 |
pt->flgs = 0; |
| 241 |
pt->chdname = chd_name; |
| 242 |
|
| 243 |
if (pathead == NULL) { |
| 244 |
pattail = pathead = pt; |
| 245 |
return(0); |
| 246 |
} |
| 247 |
pattail->fow = pt; |
| 248 |
pattail = pt; |
| 249 |
return(0); |
| 250 |
} |
| 251 |
|
| 252 |
/* |
| 253 |
* pat_chk() |
| 254 |
* complain if any the user supplied pattern did not result in a match to |
| 255 |
* a selected archive member. |
| 256 |
*/ |
| 257 |
|
| 258 |
void |
| 259 |
pat_chk(void) |
| 260 |
{ |
| 261 |
PATTERN *pt; |
| 262 |
int wban = 0; |
| 263 |
|
| 264 |
/* |
| 265 |
* walk down the list checking the flags to make sure MTCH was set, |
| 266 |
* if not complain |
| 267 |
*/ |
| 268 |
for (pt = pathead; pt != NULL; pt = pt->fow) { |
| 269 |
if (pt->flgs & MTCH) |
| 270 |
continue; |
| 271 |
if (!wban) { |
| 272 |
paxwarn(1, "WARNING! These patterns were not matched:"); |
| 273 |
++wban; |
| 274 |
} |
| 275 |
(void)fprintf(stderr, "%s\n", pt->pstr); |
| 276 |
} |
| 277 |
} |
| 278 |
|
| 279 |
/* |
| 280 |
* pat_sel() |
| 281 |
* the archive member which matches a pattern was selected. Mark the |
| 282 |
* pattern as having selected an archive member. arcn->pat points at the |
| 283 |
* pattern that was matched. arcn->pat is set in pat_match() |
| 284 |
* |
| 285 |
* NOTE: When the -c option is used, we are called when there was no match |
| 286 |
* by pat_match() (that means we did match before the inverted sense of |
| 287 |
* the logic). Now this seems really strange at first, but with -c we |
| 288 |
* need to keep track of those patterns that cause an archive member to NOT |
| 289 |
* be selected (it found an archive member with a specified pattern) |
| 290 |
* Return: |
| 291 |
* 0 if the pattern pointed at by arcn->pat was tagged as creating a |
| 292 |
* match, -1 otherwise. |
| 293 |
*/ |
| 294 |
|
| 295 |
int |
| 296 |
pat_sel(ARCHD *arcn) |
| 297 |
{ |
| 298 |
PATTERN *pt; |
| 299 |
PATTERN **ppt; |
| 300 |
int len; |
| 301 |
|
| 302 |
/* |
| 303 |
* if no patterns just return |
| 304 |
*/ |
| 305 |
if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) |
| 306 |
return(0); |
| 307 |
|
| 308 |
/* |
| 309 |
* when we are NOT limited to a single match per pattern mark the |
| 310 |
* pattern and return |
| 311 |
*/ |
| 312 |
if (!nflag) { |
| 313 |
pt->flgs |= MTCH; |
| 314 |
return(0); |
| 315 |
} |
| 316 |
|
| 317 |
/* |
| 318 |
* we reach this point only when we allow a single selected match per |
| 319 |
* pattern, if the pattern matches a directory and we do not have -d |
| 320 |
* (dflag) we are done with this pattern. We may also be handed a file |
| 321 |
* in the subtree of a directory. in that case when we are operating |
| 322 |
* with -d, this pattern was already selected and we are done |
| 323 |
*/ |
| 324 |
if (pt->flgs & DIR_MTCH) |
| 325 |
return(0); |
| 326 |
|
| 327 |
if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { |
| 328 |
/* |
| 329 |
* ok we matched a directory and we are allowing |
| 330 |
* subtree matches but because of the -n only its children will |
| 331 |
* match. This is tagged as a DIR_MTCH type. |
| 332 |
* WATCH IT, the code assumes that pt->pend points |
| 333 |
* into arcn->name and arcn->name has not been modified. |
| 334 |
* If not we will have a big mess. Yup this is another kludge |
| 335 |
*/ |
| 336 |
|
| 337 |
/* |
| 338 |
* if this was a prefix match, remove trailing part of path |
| 339 |
* so we can copy it. Future matches will be exact prefix match |
| 340 |
*/ |
| 341 |
if (pt->pend != NULL) |
| 342 |
*pt->pend = '\0'; |
| 343 |
|
| 344 |
if ((pt->pstr = strdup(arcn->name)) == NULL) { |
| 345 |
paxwarn(1, "Pattern select out of memory"); |
| 346 |
if (pt->pend != NULL) |
| 347 |
*pt->pend = '/'; |
| 348 |
pt->pend = NULL; |
| 349 |
return(-1); |
| 350 |
} |
| 351 |
|
| 352 |
/* |
| 353 |
* put the trailing / back in the source string |
| 354 |
*/ |
| 355 |
if (pt->pend != NULL) { |
| 356 |
*pt->pend = '/'; |
| 357 |
pt->pend = NULL; |
| 358 |
} |
| 359 |
pt->plen = strlen(pt->pstr); |
| 360 |
|
| 361 |
/* |
| 362 |
* strip off any trailing /, this should really never happen |
| 363 |
*/ |
| 364 |
len = pt->plen - 1; |
| 365 |
if (*(pt->pstr + len) == '/') { |
| 366 |
*(pt->pstr + len) = '\0'; |
| 367 |
pt->plen = len; |
| 368 |
} |
| 369 |
pt->flgs = DIR_MTCH | MTCH; |
| 370 |
arcn->pat = pt; |
| 371 |
return(0); |
| 372 |
} |
| 373 |
|
| 374 |
/* |
| 375 |
* we are then done with this pattern, so we delete it from the list |
| 376 |
* because it can never be used for another match. |
| 377 |
* Seems kind of strange to do for a -c, but the pax spec is really |
| 378 |
* vague on the interaction of -c, -n and -d. We assume that when -c |
| 379 |
* and the pattern rejects a member (i.e. it matched it) it is done. |
| 380 |
* In effect we place the order of the flags as having -c last. |
| 381 |
*/ |
| 382 |
pt = pathead; |
| 383 |
ppt = &pathead; |
| 384 |
while ((pt != NULL) && (pt != arcn->pat)) { |
| 385 |
ppt = &(pt->fow); |
| 386 |
pt = pt->fow; |
| 387 |
} |
| 388 |
|
| 389 |
if (pt == NULL) { |
| 390 |
/* |
| 391 |
* should never happen.... |
| 392 |
*/ |
| 393 |
paxwarn(1, "Pattern list inconsistent"); |
| 394 |
return(-1); |
| 395 |
} |
| 396 |
*ppt = pt->fow; |
| 397 |
(void)free((char *)pt); |
| 398 |
arcn->pat = NULL; |
| 399 |
return(0); |
| 400 |
} |
| 401 |
|
| 402 |
/* |
| 403 |
* pat_match() |
| 404 |
* see if this archive member matches any supplied pattern, if a match |
| 405 |
* is found, arcn->pat is set to point at the potential pattern. Later if |
| 406 |
* this archive member is "selected" we process and mark the pattern as |
| 407 |
* one which matched a selected archive member (see pat_sel()) |
| 408 |
* Return: |
| 409 |
* 0 if this archive member should be processed, 1 if it should be |
| 410 |
* skipped and -1 if we are done with all patterns (and pax should quit |
| 411 |
* looking for more members) |
| 412 |
*/ |
| 413 |
|
| 414 |
int |
| 415 |
pat_match(ARCHD *arcn) |
| 416 |
{ |
| 417 |
PATTERN *pt; |
| 418 |
|
| 419 |
arcn->pat = NULL; |
| 420 |
|
| 421 |
/* |
| 422 |
* if there are no more patterns and we have -n (and not -c) we are |
| 423 |
* done. otherwise with no patterns to match, matches all |
| 424 |
*/ |
| 425 |
if (pathead == NULL) { |
| 426 |
if (nflag && !cflag) |
| 427 |
return(-1); |
| 428 |
return(0); |
| 429 |
} |
| 430 |
|
| 431 |
/* |
| 432 |
* have to search down the list one at a time looking for a match. |
| 433 |
*/ |
| 434 |
pt = pathead; |
| 435 |
while (pt != NULL) { |
| 436 |
/* |
| 437 |
* check for a file name match unless we have DIR_MTCH set in |
| 438 |
* this pattern then we want a prefix match |
| 439 |
*/ |
| 440 |
if (pt->flgs & DIR_MTCH) { |
| 441 |
/* |
| 442 |
* this pattern was matched before to a directory |
| 443 |
* as we must have -n set for this (but not -d). We can |
| 444 |
* only match CHILDREN of that directory so we must use |
| 445 |
* an exact prefix match (no wildcards). |
| 446 |
*/ |
| 447 |
if ((arcn->name[pt->plen] == '/') && |
| 448 |
(strncmp(pt->pstr, arcn->name, pt->plen) == 0)) |
| 449 |
break; |
| 450 |
} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) |
| 451 |
break; |
| 452 |
pt = pt->fow; |
| 453 |
} |
| 454 |
|
| 455 |
/* |
| 456 |
* return the result, remember that cflag (-c) inverts the sense of a |
| 457 |
* match |
| 458 |
*/ |
| 459 |
if (pt == NULL) |
| 460 |
return(cflag ? 0 : 1); |
| 461 |
|
| 462 |
/* |
| 463 |
* we had a match, now when we invert the sense (-c) we reject this |
| 464 |
* member. However we have to tag the pattern a being successful, (in a |
| 465 |
* match, not in selecting a archive member) so we call pat_sel() here. |
| 466 |
*/ |
| 467 |
arcn->pat = pt; |
| 468 |
if (!cflag) |
| 469 |
return(0); |
| 470 |
|
| 471 |
if (pat_sel(arcn) < 0) |
| 472 |
return(-1); |
| 473 |
arcn->pat = NULL; |
| 474 |
return(1); |
| 475 |
} |
| 476 |
|
| 477 |
/* |
| 478 |
* fn_match() |
| 479 |
* Return: |
| 480 |
* 0 if this archive member should be processed, 1 if it should be |
| 481 |
* skipped and -1 if we are done with all patterns (and pax should quit |
| 482 |
* looking for more members) |
| 483 |
* Note: *pend may be changed to show where the prefix ends. |
| 484 |
*/ |
| 485 |
|
| 486 |
static int |
| 487 |
fn_match(char *pattern, char *string, char **pend) |
| 488 |
{ |
| 489 |
char c; |
| 490 |
char test; |
| 491 |
|
| 492 |
*pend = NULL; |
| 493 |
for (;;) { |
| 494 |
switch (c = *pattern++) { |
| 495 |
case '\0': |
| 496 |
/* |
| 497 |
* Ok we found an exact match |
| 498 |
*/ |
| 499 |
if (*string == '\0') |
| 500 |
return(0); |
| 501 |
|
| 502 |
/* |
| 503 |
* Check if it is a prefix match |
| 504 |
*/ |
| 505 |
if ((dflag == 1) || (*string != '/')) |
| 506 |
return(-1); |
| 507 |
|
| 508 |
/* |
| 509 |
* It is a prefix match, remember where the trailing |
| 510 |
* / is located |
| 511 |
*/ |
| 512 |
*pend = string; |
| 513 |
return(0); |
| 514 |
case '?': |
| 515 |
if ((test = *string++) == '\0') |
| 516 |
return (-1); |
| 517 |
break; |
| 518 |
case '*': |
| 519 |
c = *pattern; |
| 520 |
/* |
| 521 |
* Collapse multiple *'s. |
| 522 |
*/ |
| 523 |
while (c == '*') |
| 524 |
c = *++pattern; |
| 525 |
|
| 526 |
/* |
| 527 |
* Optimized hack for pattern with a * at the end |
| 528 |
*/ |
| 529 |
if (c == '\0') |
| 530 |
return (0); |
| 531 |
|
| 532 |
/* |
| 533 |
* General case, use recursion. |
| 534 |
*/ |
| 535 |
while ((test = *string) != '\0') { |
| 536 |
if (!fn_match(pattern, string, pend)) |
| 537 |
return (0); |
| 538 |
++string; |
| 539 |
} |
| 540 |
return (-1); |
| 541 |
case '[': |
| 542 |
/* |
| 543 |
* range match |
| 544 |
*/ |
| 545 |
if (((test = *string++) == '\0') || |
| 546 |
((pattern = range_match(pattern, test)) == NULL)) |
| 547 |
return (-1); |
| 548 |
break; |
| 549 |
case '\\': |
| 550 |
default: |
| 551 |
if (c != *string++) |
| 552 |
return (-1); |
| 553 |
break; |
| 554 |
} |
| 555 |
} |
| 556 |
/* NOTREACHED */ |
| 557 |
} |
| 558 |
|
| 559 |
static char * |
| 560 |
range_match(char *pattern, int test) |
| 561 |
{ |
| 562 |
char c; |
| 563 |
char c2; |
| 564 |
int negate; |
| 565 |
int ok = 0; |
| 566 |
|
| 567 |
if ((negate = (*pattern == '!')) != 0) |
| 568 |
++pattern; |
| 569 |
|
| 570 |
while ((c = *pattern++) != ']') { |
| 571 |
/* |
| 572 |
* Illegal pattern |
| 573 |
*/ |
| 574 |
if (c == '\0') |
| 575 |
return (NULL); |
| 576 |
|
| 577 |
if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && |
| 578 |
(c2 != ']')) { |
| 579 |
if ((c <= test) && (test <= c2)) |
| 580 |
ok = 1; |
| 581 |
pattern += 2; |
| 582 |
} else if (c == test) |
| 583 |
ok = 1; |
| 584 |
} |
| 585 |
return (ok == negate ? NULL : pattern); |
| 586 |
} |
| 587 |
|
| 588 |
/* |
| 589 |
* mod_name() |
| 590 |
* modify a selected file name. first attempt to apply replacement string |
| 591 |
* expressions, then apply interactive file rename. We apply replacement |
| 592 |
* string expressions to both filenames and file links (if we didn't the |
| 593 |
* links would point to the wrong place, and we could never be able to |
| 594 |
* move an archive that has a file link in it). When we rename files |
| 595 |
* interactively, we store that mapping (old name to user input name) so |
| 596 |
* if we spot any file links to the old file name in the future, we will |
| 597 |
* know exactly how to fix the file link. |
| 598 |
* Return: |
| 599 |
* 0 continue to process file, 1 skip this file, -1 pax is finished |
| 600 |
*/ |
| 601 |
|
| 602 |
int |
| 603 |
mod_name(ARCHD *arcn) |
| 604 |
{ |
| 605 |
int res = 0; |
| 606 |
|
| 607 |
/* |
| 608 |
* Strip off leading '/' if appropriate. |
| 609 |
* Currently, this option is only set for the tar format. |
| 610 |
*/ |
| 611 |
while (rmleadslash && arcn->name[0] == '/') { |
| 612 |
if (arcn->name[1] == '\0') { |
| 613 |
arcn->name[0] = '.'; |
| 614 |
} else { |
| 615 |
(void)memmove(arcn->name, &arcn->name[1], |
| 616 |
strlen(arcn->name)); |
| 617 |
arcn->nlen--; |
| 618 |
} |
| 619 |
if (rmleadslash < 2) { |
| 620 |
rmleadslash = 2; |
| 621 |
paxwarn(0, "Removing leading / from absolute path names in the archive"); |
| 622 |
} |
| 623 |
} |
| 624 |
while (rmleadslash && arcn->ln_name[0] == '/' && |
| 625 |
(arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { |
| 626 |
if (arcn->ln_name[1] == '\0') { |
| 627 |
arcn->ln_name[0] = '.'; |
| 628 |
} else { |
| 629 |
(void)memmove(arcn->ln_name, &arcn->ln_name[1], |
| 630 |
strlen(arcn->ln_name)); |
| 631 |
arcn->ln_nlen--; |
| 632 |
} |
| 633 |
if (rmleadslash < 2) { |
| 634 |
rmleadslash = 2; |
| 635 |
paxwarn(0, "Removing leading / from absolute path names in the archive"); |
| 636 |
} |
| 637 |
} |
| 638 |
|
| 639 |
/* |
| 640 |
* IMPORTANT: We have a problem. what do we do with symlinks? |
| 641 |
* Modifying a hard link name makes sense, as we know the file it |
| 642 |
* points at should have been seen already in the archive (and if it |
| 643 |
* wasn't seen because of a read error or a bad archive, we lose |
| 644 |
* anyway). But there are no such requirements for symlinks. On one |
| 645 |
* hand the symlink that refers to a file in the archive will have to |
| 646 |
* be modified to so it will still work at its new location in the |
| 647 |
* file system. On the other hand a symlink that points elsewhere (and |
| 648 |
* should continue to do so) should not be modified. There is clearly |
| 649 |
* no perfect solution here. So we handle them like hardlinks. Clearly |
| 650 |
* a replacement made by the interactive rename mapping is very likely |
| 651 |
* to be correct since it applies to a single file and is an exact |
| 652 |
* match. The regular expression replacements are a little harder to |
| 653 |
* justify though. We claim that the symlink name is only likely |
| 654 |
* to be replaced when it points within the file tree being moved and |
| 655 |
* in that case it should be modified. what we really need to do is to |
| 656 |
* call an oracle here. :) |
| 657 |
*/ |
| 658 |
if (rephead != NULL) { |
| 659 |
/* |
| 660 |
* we have replacement strings, modify the name and the link |
| 661 |
* name if any. |
| 662 |
*/ |
| 663 |
if ((res = rep_name(arcn->name, sizeof(arcn->name), &(arcn->nlen), 1)) != 0) |
| 664 |
return(res); |
| 665 |
|
| 666 |
if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || |
| 667 |
(arcn->type == PAX_HRG)) && |
| 668 |
((res = rep_name(arcn->ln_name, sizeof(arcn->ln_name), &(arcn->ln_nlen), 0)) != 0)) |
| 669 |
return(res); |
| 670 |
} |
| 671 |
|
| 672 |
if (iflag) { |
| 673 |
/* |
| 674 |
* perform interactive file rename, then map the link if any |
| 675 |
*/ |
| 676 |
if ((res = tty_rename(arcn)) != 0) |
| 677 |
return(res); |
| 678 |
if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || |
| 679 |
(arcn->type == PAX_HRG)) |
| 680 |
sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); |
| 681 |
} |
| 682 |
return(res); |
| 683 |
} |
| 684 |
|
| 685 |
/* |
| 686 |
* tty_rename() |
| 687 |
* Prompt the user for a replacement file name. A "." keeps the old name, |
| 688 |
* a empty line skips the file, and an EOF on reading the tty, will cause |
| 689 |
* pax to stop processing and exit. Otherwise the file name input, replaces |
| 690 |
* the old one. |
| 691 |
* Return: |
| 692 |
* 0 process this file, 1 skip this file, -1 we need to exit pax |
| 693 |
*/ |
| 694 |
|
| 695 |
static int |
| 696 |
tty_rename(ARCHD *arcn) |
| 697 |
{ |
| 698 |
char tmpname[PAXPATHLEN+2]; |
| 699 |
int res; |
| 700 |
|
| 701 |
/* |
| 702 |
* prompt user for the replacement name for a file, keep trying until |
| 703 |
* we get some reasonable input. Archives may have more than one file |
| 704 |
* on them with the same name (from updates etc). We print verbose info |
| 705 |
* on the file so the user knows what is up. |
| 706 |
*/ |
| 707 |
tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); |
| 708 |
|
| 709 |
for (;;) { |
| 710 |
ls_tty(arcn); |
| 711 |
tty_prnt("Input new name, or a \".\" to keep the old name, "); |
| 712 |
tty_prnt("or a \"return\" to skip this file.\n"); |
| 713 |
tty_prnt("Input > "); |
| 714 |
if (tty_read(tmpname, sizeof(tmpname)) < 0) |
| 715 |
return(-1); |
| 716 |
if (strcmp(tmpname, "..") == 0) { |
| 717 |
tty_prnt("Try again, illegal file name: ..\n"); |
| 718 |
continue; |
| 719 |
} |
| 720 |
if (strlen(tmpname) > PAXPATHLEN) { |
| 721 |
tty_prnt("Try again, file name too long\n"); |
| 722 |
continue; |
| 723 |
} |
| 724 |
break; |
| 725 |
} |
| 726 |
|
| 727 |
/* |
| 728 |
* empty file name, skips this file. a "." leaves it alone |
| 729 |
*/ |
| 730 |
if (tmpname[0] == '\0') { |
| 731 |
tty_prnt("Skipping file.\n"); |
| 732 |
return(1); |
| 733 |
} |
| 734 |
if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { |
| 735 |
tty_prnt("Processing continues, name unchanged.\n"); |
| 736 |
return(0); |
| 737 |
} |
| 738 |
|
| 739 |
/* |
| 740 |
* ok the name changed. We may run into links that point at this |
| 741 |
* file later. we have to remember where the user sent the file |
| 742 |
* in order to repair any links. |
| 743 |
*/ |
| 744 |
tty_prnt("Processing continues, name changed to: %s\n", tmpname); |
| 745 |
res = add_name(arcn->name, arcn->nlen, tmpname); |
| 746 |
arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); |
| 747 |
if ((size_t)arcn->nlen >= sizeof(arcn->name)) |
| 748 |
arcn->nlen = sizeof(arcn->name) - 1; /* XXX truncate? */ |
| 749 |
if (res < 0) |
| 750 |
return(-1); |
| 751 |
return(0); |
| 752 |
} |
| 753 |
|
| 754 |
/* |
| 755 |
* set_dest() |
| 756 |
* fix up the file name and the link name (if any) so this file will land |
| 757 |
* in the destination directory (used during copy() -rw). |
| 758 |
* Return: |
| 759 |
* 0 if ok, -1 if failure (name too long) |
| 760 |
*/ |
| 761 |
|
| 762 |
int |
| 763 |
set_dest(ARCHD *arcn, char *dest_dir, int dir_len) |
| 764 |
{ |
| 765 |
if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) |
| 766 |
return(-1); |
| 767 |
|
| 768 |
/* |
| 769 |
* It is really hard to deal with symlinks here, we cannot be sure |
| 770 |
* if the name they point was moved (or will be moved). It is best to |
| 771 |
* leave them alone. |
| 772 |
*/ |
| 773 |
if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) |
| 774 |
return(0); |
| 775 |
|
| 776 |
if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) |
| 777 |
return(-1); |
| 778 |
return(0); |
| 779 |
} |
| 780 |
|
| 781 |
/* |
| 782 |
* fix_path |
| 783 |
* concatenate dir_name and or_name and store the result in or_name (if |
| 784 |
* it fits). This is one ugly function. |
| 785 |
* Return: |
| 786 |
* 0 if ok, -1 if the final name is too long |
| 787 |
*/ |
| 788 |
|
| 789 |
static int |
| 790 |
fix_path(char *or_name, int *or_len, char *dir_name, int dir_len) |
| 791 |
{ |
| 792 |
char *src; |
| 793 |
char *dest; |
| 794 |
char *start; |
| 795 |
int len; |
| 796 |
|
| 797 |
/* |
| 798 |
* we shift the or_name to the right enough to tack in the dir_name |
| 799 |
* at the front. We make sure we have enough space for it all before |
| 800 |
* we start. since dest always ends in a slash, we skip of or_name |
| 801 |
* if it also starts with one. |
| 802 |
*/ |
| 803 |
start = or_name; |
| 804 |
src = start + *or_len; |
| 805 |
dest = src + dir_len; |
| 806 |
if (*start == '/') { |
| 807 |
++start; |
| 808 |
--dest; |
| 809 |
} |
| 810 |
if ((len = dest - or_name) > PAXPATHLEN) { |
| 811 |
paxwarn(1, "File name %s/%s, too long", dir_name, start); |
| 812 |
return(-1); |
| 813 |
} |
| 814 |
*or_len = len; |
| 815 |
|
| 816 |
/* |
| 817 |
* enough space, shift |
| 818 |
*/ |
| 819 |
while (src >= start) |
| 820 |
*dest-- = *src--; |
| 821 |
src = dir_name + dir_len - 1; |
| 822 |
|
| 823 |
/* |
| 824 |
* splice in the destination directory name |
| 825 |
*/ |
| 826 |
while (src >= dir_name) |
| 827 |
*dest-- = *src--; |
| 828 |
|
| 829 |
*(or_name + len) = '\0'; |
| 830 |
return(0); |
| 831 |
} |
| 832 |
|
| 833 |
/* |
| 834 |
* rep_name() |
| 835 |
* walk down the list of replacement strings applying each one in order. |
| 836 |
* when we find one with a successful substitution, we modify the name |
| 837 |
* as specified. if required, we print the results. if the resulting name |
| 838 |
* is empty, we will skip this archive member. We use the regexp(3) |
| 839 |
* routines (regexp() ought to win a prize as having the most cryptic |
| 840 |
* library function manual page). |
| 841 |
* --Parameters-- |
| 842 |
* name is the file name we are going to apply the regular expressions to |
| 843 |
* (and may be modified) |
| 844 |
* nsize is the size of the name buffer. |
| 845 |
* nlen is the length of this name (and is modified to hold the length of |
| 846 |
* the final string). |
| 847 |
* prnt is a flag that says whether to print the final result. |
| 848 |
* Return: |
| 849 |
* 0 if substitution was successful, 1 if we are to skip the file (the name |
| 850 |
* ended up empty) |
| 851 |
*/ |
| 852 |
|
| 853 |
static int |
| 854 |
rep_name(char *name, size_t nsize, int *nlen, int prnt) |
| 855 |
{ |
| 856 |
REPLACE *pt; |
| 857 |
char *inpt; |
| 858 |
char *outpt; |
| 859 |
char *endpt; |
| 860 |
char *rpt; |
| 861 |
int found = 0; |
| 862 |
int res; |
| 863 |
regmatch_t pm[MAXSUBEXP]; |
| 864 |
char nname[PAXPATHLEN+1]; /* final result of all replacements */ |
| 865 |
char buf1[PAXPATHLEN+1]; /* where we work on the name */ |
| 866 |
|
| 867 |
/* |
| 868 |
* copy the name into buf1, where we will work on it. We need to keep |
| 869 |
* the orig string around so we can print out the result of the final |
| 870 |
* replacement. We build up the final result in nname. inpt points at |
| 871 |
* the string we apply the regular expression to. prnt is used to |
| 872 |
* suppress printing when we handle replacements on the link field |
| 873 |
* (the user already saw that substitution go by) |
| 874 |
*/ |
| 875 |
pt = rephead; |
| 876 |
(void)strlcpy(buf1, name, sizeof(buf1)); |
| 877 |
inpt = buf1; |
| 878 |
outpt = nname; |
| 879 |
endpt = outpt + PAXPATHLEN; |
| 880 |
|
| 881 |
/* |
| 882 |
* try each replacement string in order |
| 883 |
*/ |
| 884 |
while (pt != NULL) { |
| 885 |
do { |
| 886 |
char *oinpt = inpt; |
| 887 |
/* |
| 888 |
* check for a successful substitution, if not go to |
| 889 |
* the next pattern, or cleanup if we were global |
| 890 |
*/ |
| 891 |
if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) |
| 892 |
break; |
| 893 |
|
| 894 |
/* |
| 895 |
* ok we found one. We have three parts, the prefix |
| 896 |
* which did not match, the section that did and the |
| 897 |
* tail (that also did not match). Copy the prefix to |
| 898 |
* the final output buffer (watching to make sure we |
| 899 |
* do not create a string too long). |
| 900 |
*/ |
| 901 |
found = 1; |
| 902 |
rpt = inpt + pm[0].rm_so; |
| 903 |
|
| 904 |
while ((inpt < rpt) && (outpt < endpt)) |
| 905 |
*outpt++ = *inpt++; |
| 906 |
if (outpt == endpt) |
| 907 |
break; |
| 908 |
|
| 909 |
/* |
| 910 |
* for the second part (which matched the regular |
| 911 |
* expression) apply the substitution using the |
| 912 |
* replacement string and place it the prefix in the |
| 913 |
* final output. If we have problems, skip it. |
| 914 |
*/ |
| 915 |
if ((res = resub(&(pt->rcmp),pm,pt->nstr,oinpt,outpt,endpt)) |
| 916 |
< 0) { |
| 917 |
if (prnt) |
| 918 |
paxwarn(1, "Replacement name error %s", |
| 919 |
name); |
| 920 |
return(1); |
| 921 |
} |
| 922 |
outpt += res; |
| 923 |
|
| 924 |
/* |
| 925 |
* we set up to look again starting at the first |
| 926 |
* character in the tail (of the input string right |
| 927 |
* after the last character matched by the regular |
| 928 |
* expression (inpt always points at the first char in |
| 929 |
* the string to process). If we are not doing a global |
| 930 |
* substitution, we will use inpt to copy the tail to |
| 931 |
* the final result. Make sure we do not overrun the |
| 932 |
* output buffer |
| 933 |
*/ |
| 934 |
inpt += pm[0].rm_eo - pm[0].rm_so; |
| 935 |
|
| 936 |
if ((outpt == endpt) || (*inpt == '\0')) |
| 937 |
break; |
| 938 |
|
| 939 |
/* |
| 940 |
* if the user wants global we keep trying to |
| 941 |
* substitute until it fails, then we are done. |
| 942 |
*/ |
| 943 |
} while (pt->flgs & GLOB); |
| 944 |
|
| 945 |
if (found) |
| 946 |
break; |
| 947 |
|
| 948 |
/* |
| 949 |
* a successful substitution did NOT occur, try the next one |
| 950 |
*/ |
| 951 |
pt = pt->fow; |
| 952 |
} |
| 953 |
|
| 954 |
if (found) { |
| 955 |
/* |
| 956 |
* we had a substitution, copy the last tail piece (if there is |
| 957 |
* room) to the final result |
| 958 |
*/ |
| 959 |
while ((outpt < endpt) && (*inpt != '\0')) |
| 960 |
*outpt++ = *inpt++; |
| 961 |
|
| 962 |
*outpt = '\0'; |
| 963 |
if ((outpt == endpt) && (*inpt != '\0')) { |
| 964 |
if (prnt) |
| 965 |
paxwarn(1,"Replacement name too long %s >> %s", |
| 966 |
name, nname); |
| 967 |
return(1); |
| 968 |
} |
| 969 |
|
| 970 |
/* |
| 971 |
* inform the user of the result if wanted |
| 972 |
*/ |
| 973 |
if (prnt && (pt->flgs & PRNT)) { |
| 974 |
if (*nname == '\0') |
| 975 |
(void)fprintf(stderr,"%s >> <empty string>\n", |
| 976 |
name); |
| 977 |
else |
| 978 |
(void)fprintf(stderr,"%s >> %s\n", name, nname); |
| 979 |
} |
| 980 |
|
| 981 |
/* |
| 982 |
* if empty inform the caller this file is to be skipped |
| 983 |
* otherwise copy the new name over the orig name and return |
| 984 |
*/ |
| 985 |
if (*nname == '\0') |
| 986 |
return(1); |
| 987 |
*nlen = strlcpy(name, nname, nsize); |
| 988 |
} |
| 989 |
return(0); |
| 990 |
} |
| 991 |
|
| 992 |
/* |
| 993 |
* resub() |
| 994 |
* apply the replacement to the matched expression. expand out the old |
| 995 |
* style ed(1) subexpression expansion. |
| 996 |
* Return: |
| 997 |
* -1 if error, or the number of characters added to the destination. |
| 998 |
*/ |
| 999 |
|
| 1000 |
static int |
| 1001 |
resub(regex_t *rp, regmatch_t *pm, char *src, char *inpt, char *dest, |
| 1002 |
char *destend) |
| 1003 |
{ |
| 1004 |
char *spt; |
| 1005 |
char *dpt; |
| 1006 |
char c; |
| 1007 |
regmatch_t *pmpt; |
| 1008 |
int len; |
| 1009 |
int subexcnt; |
| 1010 |
|
| 1011 |
spt = src; |
| 1012 |
dpt = dest; |
| 1013 |
subexcnt = rp->re_nsub; |
| 1014 |
while ((dpt < destend) && ((c = *spt++) != '\0')) { |
| 1015 |
/* |
| 1016 |
* see if we just have an ordinary replacement character |
| 1017 |
* or we refer to a subexpression. |
| 1018 |
*/ |
| 1019 |
if (c == '&') { |
| 1020 |
pmpt = pm; |
| 1021 |
} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { |
| 1022 |
/* |
| 1023 |
* make sure there is a subexpression as specified |
| 1024 |
*/ |
| 1025 |
if ((len = *spt++ - '0') > subexcnt) |
| 1026 |
return(-1); |
| 1027 |
pmpt = pm + len; |
| 1028 |
} else { |
| 1029 |
/* |
| 1030 |
* Ordinary character, just copy it |
| 1031 |
*/ |
| 1032 |
if ((c == '\\') && (*spt != '\0')) |
| 1033 |
c = *spt++; |
| 1034 |
*dpt++ = c; |
| 1035 |
continue; |
| 1036 |
} |
| 1037 |
|
| 1038 |
/* |
| 1039 |
* continue if the subexpression is bogus |
| 1040 |
*/ |
| 1041 |
if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || |
| 1042 |
((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) |
| 1043 |
continue; |
| 1044 |
|
| 1045 |
/* |
| 1046 |
* copy the subexpression to the destination. |
| 1047 |
* fail if we run out of space or the match string is damaged |
| 1048 |
*/ |
| 1049 |
if (len > (destend - dpt)) |
| 1050 |
return (-1); |
| 1051 |
strncpy(dpt, inpt + pmpt->rm_so, len); |
| 1052 |
dpt += len; |
| 1053 |
} |
| 1054 |
return(dpt - dest); |
| 1055 |
} |