Compiler Design Lab work | 5th Sem

August 12, 2024

Compiler Design Lab work | 5th Sem | CSE

Date: 06/08/2024

EXPERIMENT NO. – 1

AIM: Write a C++ program to count the number of different types of tokens in a given program to simulate Tokenisation in Lexical phase of Compiler Design.

THEORY:

Tokenization in the lexical phase of compiler design is the process of breaking down the source code into fundamental units called tokens. These tokens represent the smallest elements of the code with meaningful semantics, such as keywords, identifiers, literals, operators, and special symbols. During this phase, the lexer (or lexical analyzer) scans the input source code sequentially, identifies these tokens based on predefined patterns, and categorizes them accordingly. This step is crucial as it transforms raw code into a structured format that can be further analyzed and processed by the subsequent phases of the compiler, such as syntax analysis and semantic analysis.

CODE:

#include <iostream>

#include <sstream>

#include <string>

#include <unordered_set>

#include <vector>

#include <cctype>

#include <algorithm>

using namespace std;

bool isKeyword(const string &token)

{

const vector<string> keywords = {

"int", "float", "double", "char", "void", "return", "if", "else", "for", "while", "do", "switch", "case", "default", "break", "continue", "class", "public", "private", "protected", "static", "const", "typedef", "namespace", "using", "template", "try", "catch", "throw", "virtual"};

return find(keywords.begin(), keywords.end(), token) != keywords.end();

}

bool isOperator(const string &token)

{

const vector<string> operators = {

"+", "-", "*", "/", "%", "++", "--", "==", "!=", "<", ">", "<=", ">=",

"&&", "||", "!", "=", "+=", "-=", "*=", "/=", "%=", ">>", "<<", "&", "|", "^", "~"};

return find(operators.begin(), operators.end(), token) != operators.end();

}

bool isSpecialSymbol(const string &token)

{

const vector<string> specialSymbols = {

"{", "}", "(", ")", ";", ",", "[", "]", "->", "::"};

return find(specialSymbols.begin(), specialSymbols.end(), token) != specialSymbols.end();

}

int main()

{

unordered_set<string> identifiers;

unordered_set<string> operators;

unordered_set<string> specialSymbols;

unordered_set<string> keywords;

unordered_set<string> literals;

string input;

stringstream ss;

cout << "Enter the C++ program (end input with two consecutive Enter presses):" << endl;

string line;

bool prevLineEmpty = false;

while (getline(cin, line))

{

if (line.empty())

{

if (prevLineEmpty)

{

break;

}

prevLineEmpty = true;

}

else

{

prevLineEmpty = false;

ss << line << '\n';

}

auto tokenize = [](const string &str)

{

vector<string> tokens;

string token;

bool inString = false;

for (char ch : str)

{

if (ch == '"')

{

if (inString)

{

token += ch;

tokens.push_back(token);

token.clear();

}

else

{

if (!token.empty())

{

tokens.push_back(token);

token.clear();

}

inString = true;

token += ch;

}

else if (isspace(ch) || ch == ';' || ch == ',' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '.' || ch == '-' || ch == '+' || ch == '*' || ch == '/' || ch == '%' || ch == '=' || ch == '!' || ch == '<' || ch == '>' || ch == '&' || ch == '|' || ch == '^' || ch == '~')

{

if (inString)

{

token += ch;

}

else

{

if (!token.empty())

{

tokens.push_back(token);

token.clear();

}

if (ch != ' ')

{

tokens.push_back(string(1, ch));

}

else

{

token += ch;

}

if (!token.empty())

{

tokens.push_back(token);

}

return tokens;

};

stringstream inputStream(ss.str());

string lineContent;

while (getline(inputStream, lineContent))

{

vector<string> tokens = tokenize(lineContent);

for (const string &token : tokens)

{

if (isKeyword(token))

{

keywords.insert(token);

}

else if (isdigit(token[0]) || (token[0] == '"' && token.back() == '"'))

{

literals.insert(token);

}

else if (isOperator(token))

{

operators.insert(token);

}

else if (isSpecialSymbol(token))

{

specialSymbols.insert(token);

}

else

{

identifiers.insert(token);

}

cout << "Identifiers (" << identifiers.size() << " distinct):" << endl;

for (const string &id : identifiers)

{

cout << id << endl;

}

cout << "\nOperators (" << operators.size() << " distinct):" << endl;

for (const string &op : operators)

{

cout << op << endl;

}

cout << "\nSpecial Symbols (" << specialSymbols.size() << " distinct):" << endl;

for (const string &sym : specialSymbols)

{

cout << sym << endl;

}

cout << "\nKeywords (" << keywords.size() << " distinct):" << endl;

for (const string &kw : keywords)

{

cout << kw << endl;

}

cout << "\nLiterals (" << literals.size() << " distinct):" << endl;

for (const string &lit : literals)

{

cout << lit << endl;

}

return 0;

}

OUTPUT:

Date: 13/08/2024

EXPERIMENT NO. – 2

AIM: Write a LEX program to count the number of different types of tokens in a given program to simulate Tokenisation in Lexical phase of Compiler Design.

SOFTWARE USED : FLEX

THEORY:

LEX is a tool used to generate lexical analyzers that process text and classify tokens based on predefined patterns. It is integral in compiling and interpreting as it breaks down input text into manageable components for further analysis.

Structure of a LEX Program:

// Definitions Section: Include headers and macros

// Rules Section: Define patterns and actions

// User Code Section: Implement main function and additional logic

Workflow:

Write a LEX File: Define patterns and actions in the ‘.l’ file.
Generate C Source File: Use flex ‘filename.l’ to produce ‘lex.yy.c’.
Compile the C File: Use ‘gcc lex.yy.c’ to create the executable ‘a.exe’.
Run the Lexer: Execute ‘./a.exe’ to process input and produce token output.

CODE:

#include <stdio.h>

#include <string.h>

#include <stdlib.h>

#define MAX_TOKENS 100

char *identifiers[MAX_TOKENS];

char *operators[MAX_TOKENS];

char *specialSymbols[MAX_TOKENS];

char *keywords[MAX_TOKENS];

char *literals[MAX_TOKENS];

int idCount = 0, opCount = 0, ssCount = 0, kwCount = 0, litCount = 0;

int exists(char **list, int count, char *token) {

for (int i = 0; i < count; i++) {

if (strcmp(list[i], token) == 0) {

return 1;

}

return 0;

}

void addToken(char **list, int *count, char *token) {

if (!exists(list, *count, token)) {

list[*count] = strdup(token);

(*count)++;

}

%option noyywrap

"int"|"float"|"double"|"char"|"void"|"return"|"if"|"else"|"for"|"while"|"do"|"switch"|"case"|"default"|"break"|"continue"|"class"|"public"|"private"|"protected"|"static"|"const"|"typedef"|"namespace"|"using"|"template"|"try"|"catch"|"throw"|"virtual" {

addToken(keywords, &kwCount, yytext);

}

[0-9]+(\.[0-9]+)?|"\"[^\"]*\"" {

addToken(literals, &litCount, yytext);

}

"+"|"-"|"*"|"/"|"%"|"++"|"--"|"=="|"!="|"<"|">"|"<="|">="|"&&"|"||"|"!"|"="|"+="|"-="|"*="|"/="|"%="|">>"|"<<"|"&"|"|"|"^"|"~" {

addToken(operators, &opCount, yytext);

}

"{"|"}"|"("|")"|";"|","|"["|"]"|"->"|"::" {

addToken(specialSymbols, &ssCount, yytext);

}

[a-zA-Z_][a-zA-Z0-9_]* {

addToken(identifiers, &idCount, yytext);

}

[ \t\n] { }

. { }

int main() {

printf("Enter the C++ code (end input by pressing Enter twice):\n");

char input[1024];

int emptyLineCount = 0;

while (fgets(input, sizeof(input), stdin)) {

if (strcmp(input, "\n") == 0) {

emptyLineCount++;

if (emptyLineCount == 2) {

break;

}

} else {

emptyLineCount = 0;

YY_BUFFER_STATE buffer = yy_scan_string(input);

yylex();

yy_delete_buffer(buffer);

}

printf("Identifiers (%d distinct):\n", idCount);

for (int i = 0; i < idCount; i++) {

printf("%s\n", identifiers[i]);

}

printf("\nOperators (%d distinct):\n", opCount);

for (int i = 0; i < opCount; i++) {

printf("%s\n", operators[i]);

}

printf("\nSpecial Symbols (%d distinct):\n", ssCount);

for (int i = 0; i < ssCount; i++) {

printf("%s\n", specialSymbols[i]);

}

printf("\nKeywords (%d distinct):\n", kwCount);

for (int i = 0; i < kwCount; i++) {

printf("%s\n", keywords[i]);

}

printf("\nLiterals (%d distinct):\n", litCount);

for (int i = 0; i < litCount; i++) {

printf("%s\n", literals[i]);

}

return 0;

}

OUTPUT:

Search This Blog

Velle Ventures: Mazaak Mein Master

Compiler Design Lab work | 5th Sem | CSE

Comments

Post a Comment

Popular Posts

Design and Analysis of Algorithms | Assignment 1 | DAA | 5th sem

6th sem syllabus | AIML | Btech CSE