Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions docs/grep.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# grep

## NAME

grep - print lines that match patterns

## SYNOPSIS

```
grep [-R] [-F] [-v] _pattern_ [file...]
```

## DESCRIPTION

`grep` searches for the specified pattern in each file. If no file is provided,
`grep` matches against standard input. Patterns are a limited regular
expression, using `^` for the beginning of a line, `$` for the end of a line,
`.` to denote any character, and `\*` for Kleene stars.

If `-R` or multiple files are specified, then the path name is printed in red
before the matching line.

## OPTIONS

`-R` Read all files under each directory recursively.

`-F` Use a fixed string as the pattern rather than a regular expression. Can
yield slight performance boost.

`-v` Invert the matching, printing only the lines that _don't_ match

Any other flag will fail.

## EXIT STATUS

`grep` exits with 0 if at least one line matched (or didn't with `-v`), 1 if no
lines match (or all with `-v`), and 2 if an error occured.

## BUGS/CONSIDERATIONS

* Flags _must_ be specified before the pattern, and must be specified
individually.
* 2 is returned if _any_ error occurs, even if there is a match. Generally a
problem when matching against multiple files, and one matches while another
errors.
* Running `grep -R _pattern_` is **not** equivalent to `grep -R _pattern_ .`,
instead being equivalent to `grep _pattern_`. This is a bug.
* Recursion is depth-first, and is ordered by inode creation time rather than
lexically.
* Breaks up lines longer than 1024 characters. Shouldn't affect most people.

## EXAMPLES

`$ grep -R pattern dir`

matches `pattern` against all files in `dir` and its subdirectories (that can
be opened).

`$ grep -F F.g README.md`

matches README.md against the fixed string `F.g`. The pattern is not
interpreted as a regular expression, so `Fog` does not match.

`$ grep -v Fog README.md`

matches README.md and prints the lines that _don't_ match the pattern `Fog`.

`$ ls | grep README`

finds a file in the current directory whose name matches the pattern `README`
145 changes: 127 additions & 18 deletions user/grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,47 @@

#include "kernel/types.h"
#include "kernel/stat.h"
#include "kernel/fs.h"
#include "user/user.h"

char buf[1024];
int match(char*, char*);
int dirgrep(char*, int, char*);

// -F, -R, -v flags
// No bool type, so use uint8 instead
uint8 Fflag = 0;
uint8 Rflag = 0;
uint8 vflag = 0;

uint8 printPath = 0;

// 0 if a match, 1 if not (2 if error)
uint8 exitVal = 1;

void
grep(char *pattern, int fd)
grep(char *pattern, int fd, char *path)
{
int n, m;
char *p, *q;

m = 0;
// Write continuously to the buffer. If runs out of space, move current
// position to beginning of buffer and read again.
while((n = read(fd, buf+m, sizeof(buf)-m-1)) > 0){
m += n;
buf[m] = '\0';
p = buf;
while((q = strchr(p, '\n')) != 0){
*q = 0;
if(match(pattern, p)){
// Print line if match and no -v flag or if no match and -v flag
// Simple xor operation
if (match(pattern, p) != vflag) {
*q = '\n';
if (printPath)
fprintf(1, "\x1b[31m%s:\x1b[0m", path);
write(1, p, q+1 - p);
exitVal = 0;
}
p = q+1;
}
Expand All @@ -40,25 +60,110 @@ main(int argc, char *argv[])
char *pattern;

if(argc <= 1){
fprintf(2, "usage: grep pattern [file ...]\n");
exit(1);
fprintf(2, "usage: grep [-F] [-R] [-v] pattern [file ...]\n");
exit(2);
}

// check for flags
i = 1;
while(1) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 'F':
Fflag = 1;
break;
case 'R':
Rflag = 1;
break;
case 'v':
vflag = 1;
break;
default:
fprintf(2, "Unrecognized flag: -%c\n", argv[i][1]);
exit(2);
}
i++;
} else {
break;
}
}
pattern = argv[1];

if(argc <= 2){
grep(pattern, 0);
exit(0);
pattern = argv[i++];

if(argc <= i){
printPath = 0;
grep(pattern, 0, "\0");
exit(exitVal);
}

for(i = 2; i < argc; i++){
// Print path if recursion is used or if multiple files are passed
if(Rflag || i + 1 < argc) {
printPath = 1;
}

while(i < argc){
if((fd = open(argv[i], 0)) < 0){
printf("grep: cannot open %s\n", argv[i]);
fprintf(2, "grep: cannot open %s\n", argv[i]);
exit(1);
}
grep(pattern, fd);

// Recursion for -R
if (Rflag) {
dirgrep(pattern, fd, argv[i]);
} else {
grep(pattern, fd, argv[i]);
}

// cleanup
close(fd);
i++;
}
exit(0);
exit(exitVal);
}

// Recursively go through directories
int
dirgrep(char *pattern, int fd, char *path) {
struct stat st;

if (fstat(fd, &st) < 0) {
fprintf(2, "grep: cannot stat %s", path);
exitVal = 2;
return 1;
}

switch (st.type) {
case T_FILE:
grep(pattern, fd, path);
break;
case T_DIR:
char buf[128], *p;
struct dirent de;
int child_fd;

strcpy(buf, path);
p = buf + strlen(buf);
*p++ = '/';

while (read(fd, &de, sizeof(de)) == sizeof(de)) {
// skip if not a proper file or directory
if (de.inum == 0 || strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0)
continue;

memmove(p, de.name, DIRSIZ);

// run dirgrep on the new path
if ((child_fd = open(buf, 0)) < 0){
fprintf(2, "grep: cannot open %s\n", buf);
exitVal = 2;
continue;
}

dirgrep(pattern, child_fd, buf);
close(child_fd);
}
}
return 0;
}

// Regexp matcher from Kernighan & Pike,
Expand All @@ -71,7 +176,8 @@ int matchstar(int, char*, char*);
int
match(char *re, char *text)
{
if(re[0] == '^')
// Ignore regex stuff when -F is passed
if(re[0] == '^' && !Fflag)
return matchhere(re+1, text);
do{ // must look at empty string
if(matchhere(re, text))
Expand All @@ -85,11 +191,14 @@ int matchhere(char *re, char *text)
{
if(re[0] == '\0')
return 1;
if(re[1] == '*')
return matchstar(re[0], re+2, text);
if(re[0] == '$' && re[1] == '\0')
return *text == '\0';
if(*text!='\0' && (re[0]=='.' || re[0]==*text))
// Ignore regex stuff when -F is passed
if (!Fflag){
if(re[1] == '*')
return matchstar(re[0], re+2, text);
if(re[0] == '$' && re[1] == '\0')
return *text == '\0';
}
if(*text!='\0' && ((!Fflag && re[0]=='.') || re[0]==*text))
return matchhere(re+1, text+1);
return 0;
}
Expand Down