Compiler Design (CS-701) : Develop A Lexical Analyzer To Recognize A Few Patterns in C

Compiler Design (CS-701)
S .No. Name of Experiment

1 Write a Program Develop a lexical analyzer to recognize a few patterns in
C
2 Write a program when the generated parse is executed we get the following
3 Implement operator precedence parser in C
4 WAP to develop recursive descent parser in C.
5 Develop a lexical analyzer to recognize a few patterns in C.
6 Implement LL(1) operator precedence parser in C
EXPERMENT NO. 1
Develop a lexical analyzer to recognize a few patterns in C.
Write a Program Develop a lexical analyzer to recognize a few patterns in C

APPARATUS : Windows, C,
THEORY : MultiLex is a lexer generator designed to facilitate creation of lexical
analyzers, particularly lexical analyzers for LALR(1) parsers of legacy languages.
Innovative features of MultiLex include its pipeline architec- ture, lexical pattern-matching,
manipulation of a larger space of objects than just characters, reconfigurability for
languages that include sub- languages, and lexically-scoped dictionary mechanism. We
discuss the place of lexers in reengineering of legacy languages, the features of Mul-
tiLex, and compare it to prior work on lexers.
PROCEDURE :
This program is for creating a Lexical Analyzer in c .
Necessary Header files used in program.
#include<stdio.h>
#include<string.h>
#include<conio.h>
#include<ctype.h>
Functions prototype.
void Open_File();
void Demage_Lexeme();
int Search(char[256],int);
void analyze();
void Skip_Comment();
void Read_String();
void Is_Keyword_Or_Not();
void Is_Identifier_Or_Not();
void Is_Operator_Or_Not();
void Read_Number();
void Is_Special_Or_Not();
void Is_Comparison_Or_Not();
void Add_To_Lexical (char[256],int,char[256]);
void Print_ST();
void Print_TOKEN();
void Token_Attribute();
Data structure used in program.
struct lexical
{
char data[256]; //Value of token.
int line[256]; //Line # which token appear in input
file.
int times; //# of times that token appear in input
file.
char type[256]; //Type of each token.
struct lexical *next;
};
typedef struct lexical Lex;
typedef Lex *lex;
File pointer for accessing the file.
FILE *fp;
FILE *st;
FILE *token;
char lexeme[256],ch;
int f,flag,line=1,i=1;
lex head=NULL,tail=NULL;
Array holding all keywords for checking.
char
*keywords[]={"procedure","is","begin","end","var","cin","cout","if",
"then","else","and","or","not","loop","exit","when",
"while","until"};
Array holding all arithmetic operations for checking.
char arithmetic_operator[]={'+','-','*','/'};
Array holding all comparison operations for checking.
char *comparison_operator[]={"<",">","=","<=","<>",">="};
Array holding all special for checking.
char special[]={'%','!','@','~','$'};
************** MAIN PROGRAM **************
void main()
{
Open_File();
analyze();
fclose(fp);
Print_ST();
Print_TOKEN();
}
This function open input sourse file.

void Open_File()
{
fp=fopen("source.txt","r"); //provide path for source.txt here

if(fp==NULL)
{
printf("!!!Can't open input file - source.txt!!!");
getch();
exit(0);
}
}
Function to add item to structure of array to store data and

information of lexical items.
void Add_To_Lexical (char value[256],int line,char type[256])

{
lex new_lex;
if (!Search(value,line)) //When return 1 the token not found.

{
new_lex=malloc(sizeof(Lex));
if (new_lex!=NULL)
{
strcpy(new_lex->data,value);
new_lex->line[0]=line;
new_lex->times=1;
strcpy(new_lex->type,type);
new_lex->next=NULL;
if (head==NULL)
head=new_lex;
else
tail->next=new_lex;
tail=new_lex;
}
}
}
Function to search token.

int Search (char value[256],int line)
{
lex x=head;
int flag=0;
while (x->next!=NULL && !flag)

{
if (strcmp(x->data,value)==0)
{
x->line[x->times]=line;
x->times++;
flag=1;
}
x=x->next;
}
return flag;
}
Function to print the ST.TXT .
void Print_ST()
{
lex x=head;
int j;
if ((st=fopen("ST.TXT","w"))==NULL)
printf("The file ST.TXT cat not open.
");
else
{
fprintf(st," %s %s %s
","Line#","Lexeme","Type");
fprintf(st," ---- ------ ----
");
while (x!=NULL)
{
if ((strcmp(x->type,"num")==0) ||
(strcmp(x->type,"keyword")==0) ||
(strcmp(x->type,"identifier")==0))
{
fprintf(st," ");
for (j=0;j<x->times;j++)
{
fprintf(st,"%d",x->line[j]);
if (j!=x->times-1) //This condition to prevent the comma
fprintf(st,",",x->line[j]); //"," to not print after last line #.
}
fprintf(st," %-6s %-6s

",x->data,x->type);
}
x=x->next;
}
fclose(st);
}
}
Function to print the TOKENS.TXT .

void Print_TOKEN()
{
int flag=0;
fp=fopen("source.txt","r");
if(fp==NULL)
{
printf("!!!Can't open input file - source.txt!!!");
getch();
exit(0);
}
else
{
if ((token=fopen("TOKENS.TXT","w"))==NULL)
printf("The file ST.TXT cat not open.
");
else
{
ch=fgetc(fp);
while (!(feof(fp)))
{
if (ch==' ' && !flag)

{
do
ch=fgetc(fp);
while (ch==' ');
fseek(fp,-2,1);
ch=fgetc(fp);
flag=1;
}
if (ch!='
' && ch!=' ')
fprintf(token,"%c",ch);
if (ch=='
')
{
fprintf(token,"
");
Token_Attribute();
i++;
flag=0;
}
ch=fgetc(fp);
}
}
}
fclose(fp);
fclose(token);
}
Function to put the token and atrribute in TOKENS.TXT .

void Token_Attribute()
{
lex x=head;
int j;
while (x!=NULL)
{
if (x->line[0]==i)
{
fprintf(token,"token : %-4s ",x->type);
if ((strcmp(x->type,"num")==0) ||
(strcmp(x->type,"keyword")==0) ||
(strcmp(x->type,"identifier")==0))
{
fprintf(token,"attribute : line#=%-4d
",i);
}
else
{
fprintf(token,"attribute : %-4s
",x->data);
}
}
x=x->next;
}
fprintf(token,"
");
}
Function to create lexical analysis.
void analyze()
{
ch=fgetc(fp); //Read character.
while(!feof(fp)) //While the file is not end.

{
if(ch=='
') //Compute # of lines in source.txt
.
{
line++;
ch=fgetc(fp);
}
if(isspace(ch) && ch=='

')
{
line++;
ch=fgetc(fp);
}
if(isspace(ch) && ch!='
') //The character is space.
ch=fgetc(fp);
if(ch=='/' || ch=='"') //Function for skipping comments in the

file
Skip_Comment(); //and '"' with display statements.
if(isalpha(ch)) //The character is leter.

{
Read_String();
Is_Keyword_Or_Not();
Is_Operator_Or_Not();
Is_Identifier_Or_Not();
}
if(isdigit(ch)) //The character is digit.

Read_Number();
if (ch==';') //The character is semicolon.

Add_To_Lexical(";",line,"semicolon");
if (ch==':') //The character is colon.

Add_To_Lexical(":",line,"colon");
if (ch==',') //The character is comma.

Add_To_Lexical(",",line,"comma");
if (ch=='(') //The character is parenthesis.

Add_To_Lexical("(",line,"parenthesis");
if (ch==')') //The character is parenthesis.

Add_To_Lexical(")",line,"parenthesis");
//The character is
comparison_operator
if (ch=='<' || ch=='=' || ch=='>')
Is_Comparison_Or_Not();
Is_Special_Or_Not(); //After failed scaning in before cases

//check the character is special or
not.
Demage_Lexeme();
if(isspace(ch) && ch=='

')
{
line++;
ch=fgetc(fp);
}
else
ch=fgetc(fp);
}
}
This function read all character of strings.
void Read_String()
{
int j=0;
do
{
lexeme[j++]=ch;
ch=fgetc(fp);
} while(isalpha(ch));
fseek(fp,-1,1);
lexeme[j]=';
getch();
EXPERMENT NO. 2
Write a program when the generated parse is executed we get the
following
AIM : Write a program when the generated parse is executed we get the following
APPARATUS : Windows, C,
THEORY : A parser reads the input symbols from left to right, until it has read the
right- hand side of a production. Then it REDUCES the input, running the
production backwards and replacing the right-hand side with the left-hand side. This is the
point at which code is generated, and expressions evaluated: when a reduction occurs.
PROCEDURE :
% token NUM
%start S
%union {
douoble value;
char op;
}
%type<value> E T.F NUM
%type (op> op1 op2
%%
S:E { printf(“The value of Expression is %1f:\n”. $1);}
E:E op1 T{
switch($2)
{
case '+' : ($$=$1+$3;break;}
case '-' : ($$=$1-$3;break;}
}
}
! T{
$$=$1;
}
;
T:T op2 F {
switch($2)
{
case '*' :($$=$1*$3;break;}
case '/' : ($$=$1/$3;break;}
}}
!F {
$$=$1;
};
F:NUM {
$$=$1;
};
op1: '+' {
$$='+';
};
! '-' {
$$='-';
};
op2: '*' {
$$='*';
};
! '/' {
$$='/';
};
%%
#include “lex.yy.c”
main()
{
return(yyparse());}
EXPERMENT NO. 3
Implement operator precedence parser in C
AIM : Implement operator precedence parser in C
APPARATUS : Windows, C
THEORY : N operator precedence parser is a bottomup parser that interprets an operator
precedence grammar. For example, most calculators use operator precedence parsers to convert
from the humanreadable infix notation with order of operations format into an internally optimized
computerreadable format like Reverse Polish notation (RPN).
PROCEDURE :
#include <stdio.h>
int main(int argc, char *argv[]){
printf("((((");
for(int i=1;i!=argc;i++){
if(argv[i] && !argv[i][1]){
switch(argv[i]){
case '^': printf(")^("); continue;
case '*': printf("))*(("); continue;
case '/': printf("))/(("); continue;
case '+': printf(")))+((("); continue;
case '-': printf(")))-((("); continue;
}
}
printf("%s", argv[i]);
}
printf("))))\n");
return 0;
}
EXPERMENT NO. 4
WAP to develop recursive descent parser in C.
AIM : WAP to develop recursive descent parser in C.
THEORY : What is "parsing"? Parsing is processing a series of symbols to extract their
meaning. Typically, this means reading the words of a sentence and drawing information
from them. When application programs need to process data that is provided as text, they
must use some form of parsing logic. This logic scans the text characters and character
groups (words) and recognizes patterns of groups to extract the underlying commands or
information.
PROCEDURE :
typedef enum {ident, number, lparen, rparen, times, slash, plus,
minus, eql, neq, lss, leq, gtr, geq, callsym, beginsym, semicolon,
endsym, ifsym, whilesym, becomes, thensym, dosym, constsym, comma,
varsym, procsym, period, oddsym} Symbol;
Symbol sym;
void getsym(void);
void error(const char msg[]);
void expression(void);
int accept(Symbol s) {
if (sym == s) {
getsym();
return 1;
}
return 0;
}
int expect(Symbol s) {
if (accept(s))
return 1;
error("expect: unexpected symbol");
return 0;
}
void factor(void) {
if (accept(ident)) {
;
} else if (accept(number)) {
;
} else if (accept(lparen)) {
expression();
expect(rparen);
} else {
error("factor: syntax error");
getsym();
}
}
void term(void) {
factor();
while (sym == times || sym == slash) {
getsym();
factor();
}
}
void expression(void) {
if (sym == plus || sym == minus)
getsym();
term();
while (sym == plus || sym == minus) {
getsym();
term();
}
}
void condition(void) {
if (accept(oddsym)) {
expression();
} else {
expression();
if (sym == eql || sym == neq || sym == lss || sym == leq || sym == gtr || sym == geq) {
getsym();
expression();
} else {
error("condition: invalid operator");
getsym();
}
}
}
void statement(void) {
if (accept(ident)) {
expect(becomes);
expression();
} else if (accept(callsym)) {
expect(ident);
} else if (accept(beginsym)) {
do {
statement();
} while (accept(semicolon));
expect(endsym);
} else if (accept(ifsym)) {
condition();
expect(thensym);
statement();
} else if (accept(whilesym)) {
condition();
expect(dosym);
statement();
}
}
void block(void) {
if (accept(constsym)) {
do {
expect(ident);
expect(eql);
expect(number);
} while (accept(comma));
expect(semicolon);
}
if (accept(varsym)) {
do {
expect(ident);
} while (accept(comma));
expect(semicolon);
}
while (accept(procsym)) {
expect(ident);
expect(semicolon);
block();
expect(semicolon);
}
statement();
}
void program(void) {
getsym();
block();
expect(period);
}
EXPERMENT NO. 5
Develop a lexical analyzer to recognize a few patterns in C.
AIM : Develop a lexical analyzer to recognize a few patterns in C.
THEORY : data files but reading (and perhaps interpreting) a scripting language input
file, Lexical analysis is the lowest level translation activity. The purpose of a lexical
analyzer or scanner is to convert an incoming stream of characters into an outgoing
stream of tokens. The scanner operates by matching patterns of characters into lexemes.
Each pattern describes what an instance of a particular token must match. For example, a
common pattern for an identifier (for example, user-specified variable or constant) in a
script language is a letter followed by one or more occurrences of a letter or digit. Some
lexem
PROCEDURE :
%{
#include <stdio.h>
#include <string.h>
static int chars, words, lines;
%}
%option noyywrap
alpha [a-zA-Z]
word {alpha}+
%%
{word} { chars += strlen(yytext); ++words; }

\n { ++chars; ++lines; }
{ ++chars; }
%%
int main()
{
chars = words = lines = 0;
yylex();
printf("\t%d\t%d\t%d\n",lines,words,chars);
exit(0);
EXPERMENT NO. 6
Implement LL(1) operator precedence parser in C
THEORY : An operator precedence parser is a bottomup parser that interprets an operator
precedence grammar. For example, most calculators use operator precedence parsers to convert
from the humanreadable infix notation with order of operations format into an internally optimized
computerreadable format like Reverse Polish notation (RPN).
Edsger Dijkstra's shunting yard algorithm is commonly used to implement operator precedence
parsers which convert from infix notation to RPN.
PROCEDURE :
expression ::= equality-expression
equality-expression ::= additive-expression ( ( '==' | '!=' ) additive-expression ) *
additive-expression ::= multiplicative-expression ( ( '+' | '-' ) multiplicative-expression ) *
multiplicative-expression ::= primary ( ( '*' | '/' ) primary ) *
primary ::= '(' expression ')' | NUMBER | VARIABLE | '-' primary
parse_expression ()
return parse_expression_1 (parse_primary (), 0)
parse_expression_1 (lhs, min_precedence)

while the next token is a binary operator whose precedence is >= min_precedence
op := next token
rhs := parse_primary ()
while the next token is a binary operator whose precedence is greater
than op's, or a right-associative operator
whose precedence is equal to op's
lookahead := next token
rhs := parse_expression_1 (rhs, lookahead's precedence)
lhs := the result of applying op with operands lhs and rhs
return lhs
#include <stdio.h>
int main(int argc, char *argv[]){

int i;
printf("((((");
for(i=1;i!=argc;i++){
if(argv[i] && !argv[i][1]){
switch(*argv[i]){
case '^': printf(")^("); continue;
case '*': printf("))*(("); continue;
case '/': printf("))/(("); continue;
case '+': printf(")))+((("); continue;
case '-': printf(")))-((("); continue;
}
}
printf("%s", argv[i]);
}
printf("))))\n");return 0;
}

Compiler Design (CS-701) : Develop A Lexical Analyzer To Recognize A Few Patterns in C

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Compiler Design (CS-701) : Develop A Lexical Analyzer To Recognize A Few Patterns in C

Uploaded by

Copyright:

Available Formats

Compiler Design (CS-701)

S .No. Name of Experiment

Write a Program Develop a lexical analyzer to recognize a few patterns in C

Array holding all keywords for checking.

Array holding all arithmetic operations for checking.

Array holding all special for checking.

************** MAIN PROGRAM **************

This function open input sourse file.

fp=fopen("source.txt","r"); //provide path for source.txt here

Function to add item to structure of array to store data and

void Add_To_Lexical (char value[256],int line,char type[256])

if (!Search(value,line)) //When return 1 the token not found.

Function to search token.

while (x->next!=NULL && !flag)

Function to print the ST.TXT .

fprintf(st," %-6s %-6s

Function to print the TOKENS.TXT .

if (ch==' ' && !flag)

Function to put the token and atrribute in TOKENS.TXT .

Function to create lexical analysis.

ch=fgetc(fp); //Read character.

while(!feof(fp)) //While the file is not end.

if(isspace(ch) && ch=='

if(ch=='/' || ch=='"') //Function for skipping comments in the

if(isalpha(ch)) //The character is leter.

if(isdigit(ch)) //The character is digit.

if (ch==';') //The character is semicolon.

if (ch==':') //The character is colon.

if (ch==',') //The character is comma.

if (ch=='(') //The character is parenthesis.

if (ch==')') //The character is parenthesis.

Is_Special_Or_Not(); //After failed scaning in before cases

if(isspace(ch) && ch=='

This function read all character of strings.

static int chars, words, lines;

{word} { chars += strlen(yytext); ++words; }

parse_expression_1 (lhs, min_precedence)

int main(int argc, char *argv[]){

You might also like

MAIN PROGRAM