Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • operating-systems/assignment-template
  • alin_andrei.enasoiu/assignment-async-web-server
  • operating-systems/assignment-parallel-graph
  • alexandru.braslasu/assignment-parallel-graph
  • operating-systems/assignment-async-web-server
  • bogdan.ciocea/assignment-mini-libc
  • rconstantinescu2006/assignment-mini-libc
  • alexandru.chirac/assignment-mini-libc
  • alexandru.bologan/assignment-mini-libc
  • rconstantinescu2006/assignment-parallel-graph
  • bogdan.ciocea/assignment-parallel-graph
  • matei.mantu/assignment-parallel-graph
  • alexandru.bologan/assignment-parallel-graph
  • vadim.plamadeala/assignment-parallel-graph
  • alexandru.chirac/assignment-parallel-graph
  • teodora.teodorescu/assignment-parallel-graph
  • radu.popescu0910/assignment-parallel-graph
  • bogdan.ciocea/assignment-mini-shell
  • andrei.miga/assignment-parallel-graph
  • alexandru.chirac/assignment-mini-shell
  • alexandru.chirac/assignment-async-web-server
  • rares_andrei.ticus/assignment-mini-shell
  • rconstantinescu2006/assignment-mini-shell
  • alexandru.braslasu/assignment-mini-shell
  • bogdan.ciocea/assignment-async-web-server
  • gheorghe.petrica/assignment-mini-shell
  • rconstantinescu2006/assignment-async-web-server
  • gheorghe.petrica/assignment-async-web-server
  • alexandru.braslasu/assignment-async-web-server
  • alexandru.bologan/assignment-mini-shell
  • alexandru.bologan/assignment-async-web-server
  • iudith_maria.sara/assignment-async-web-server
  • mircea.beznea/assignment-mini-libc
  • alexandru.dabija04/assignment-mini-libc
  • dinu.merceanu/assignment-mini-libc
  • george.simion2005/assignment-mini-libc
  • andrei.nicola/assignment-mini-libc
  • george.simion2005/assignment-mini-shell
  • operating-systems/assignment-parallel-firewall
  • sebastian.marcu/assignment-parallel-firewall
  • mihail.necula/assignment-parallel-firewall
  • alexandru.dabija04/assignment-parallel-firewall
  • george.simion2005/assignment-parallel-firewall
  • albert_mark.stan/assignment-parallel-firewall
  • alexandru.barbu2809/assignment-parallel-firewall
  • dana_maria.caruntu/assignment-parallel-firewall
  • rares.celescu/assignment-parallel-firewall
  • antonio.ciocodeica/assignment-parallel-firewall
  • bianca.perian/assignment-parallel-firewall
  • darius.constantin04/assignment-async-web-server
  • mihail.necula/assignment-async-web-server
  • antonio.ciocodeica/assignment-async-web-server
  • costin.spataru/assignment-async-web-server
  • dana_maria.caruntu/assignment-async-web-server
  • george.simion2005/assignment-async-web-server
  • albert_mark.stan/assignment-async-web-server
56 results
Show changes
Commits on Source (2)
Showing
with 2644 additions and 2 deletions
...@@ -29,3 +29,6 @@ ...@@ -29,3 +29,6 @@
# Ignore ARRAY_SIZE, let `sizeof()` be used to determine array size. # Ignore ARRAY_SIZE, let `sizeof()` be used to determine array size.
# This makes the code compatible with other compilers / environments. # This makes the code compatible with other compilers / environments.
--ignore ARRAY_SIZE --ignore ARRAY_SIZE
# Ignore SPLIT_STRING, quoted strings can be split on multiple lines.
--ignore SPLIT_STRING
...@@ -26,6 +26,6 @@ build: ...@@ -26,6 +26,6 @@ build:
checker: checker:
stage: test stage: test
image: image:
name: gitlab.cs.pub.ro:5050/operating-systems/assignment-template name: gitlab.cs.pub.ro:5050/operating-systems/assignment-async-web-server
script: script:
- echo "" - echo ""
FROM gitlab.cs.pub.ro:5050/operating-systems/assignments-docker-base FROM gitlab.cs.pub.ro:5050/operating-systems/assignments-docker-base
RUN apt update -yqq
RUN apt install -yqq libaio-dev
RUN apt install -yqq lsof
RUN apt install -yqq netcat
COPY ./checker ${CHECKER_DATA_DIRECTORY} COPY ./checker ${CHECKER_DATA_DIRECTORY}
RUN mkdir ${CHECKER_DATA_DIRECTORY}/../tests RUN mkdir ${CHECKER_DATA_DIRECTORY}/../tests
COPY ./tests ${CHECKER_DATA_DIRECTORY}/../tests COPY ./tests ${CHECKER_DATA_DIRECTORY}/../tests
# Asynchronous Web Server
## Objectives
- Deepening the concepts related to working with sockets.
- Developing skills in implementing and designing applications that use asynchronous operations and other advanced I/O operations.
- Deepening the use of the API for advanced I/O operations in the Linux operating system.
## Statement
Implement a web server that uses the following advanced I/O operations:
- Asynchronous operations on files
- Non-blocking operations on sockets
- Zero-copying
- Multiplexing I/O operations
The server implements a limited functionality of the HTTP protocol: passing files to clients.
The web server will use the multiplexing API to wait for connections from clients - [epoll](https://man7.org/linux/man-pages/man7/epoll.7.html).
On the established connections, requests from clients will be received and then responses will be distributed to them.
The server will serve files from the `AWS_DOCUMENT_ROOT` directory, defined within the assignments' [header](./skel/aws.h).
Files are only found in subdirectories `AWS_DOCUMENT_ROOT/static/` and `AWS_DOCUMENT_ROOT/dynamic/`.
The corresponding request paths will be, for example, `AWS_DOCUMENT_ROOT/static/test.dat` and `AWS_DOCUMENT_ROOT/dynamic/test.dat`.
The file processing will be:
- The files in the `AWS_DOCUMENT_ROOT/static/` directory are static files that will be transmitted to clients using the zero-copying API - [sendfile](https://man7.org/linux/man-pages/man2/sendfile.2.html)]
- Files in the `AWS_DOCUMENT_ROOT/dynamic/` directory are files that are supposed to require a server-side post-processing phase. These files will be read from disk using the asynchronous API and then pushed to the clients. Streaming will use non-blocking sockets (Linux)
- An [HTTP 404](https://en.wikipedia.org/wiki/HTTP_404) message will be sent for invalid request paths
After transmitting a file, according to the HTTP protocol, the connection is closed.
### Details and recommendations for the implementation
- Implementing the assignment requires having a state machine for each connection, which you periodically query and update as the transfer proceeds.
Check the `connection_state` data structure defined in the [assignment header](./skel/awh.h).
- Find the `connection` data structure defined in the [assignment header](./skel/awh.h).
This can be used to keep track of an open connection.
- Definitions of other useful macros and data structures can be found in the assignment header.
- HTTP responses will have the code `200` for existing files and `404` for not existing files.
- A valid response consists of the HTTP header, containing the related directives, two newlines (`\r\n\r\n`), followed by the actual content (the file).
- Sample answers can be found in the parser test file or in the provided sample.
- You can use predefined request directives such as `Date`, `Last-Modified`, etc.
- The `Content-Length` directive **must** specify the size of the HTTP content (actual data) in bytes.
- The `Connection` directive **must** be initialized to `close`.
- The port on which the web server listens for connections is defined within the assignment header: the `AWS_LISTEN_PORT` macro.
- The root directory relative to which the resources/files are searched is defined within the assignment header as the `AWS_DOCUMENT_ROOT` macro.
## Support Code
### HTTP Parser
The clients and server will communicate using the HTTP protocol.
For parsing HTTP requests from clients we recommend using [this HTTP parser](https://github.com/nodejs/http-parser), also available in the assignments' [http-parser](./skel/http-parser).
You will need to use a callback to get the path to the local resource requested by the client.
Find a simplified example of using the parser in the [samples directory](./skel/http-parser/samples/).
### API and Implementation Tasks
The [skel/aws.c](./skel/aws.c) file contains the code skelethon with several functions that have to be implemented.
Follow the `TODO` areas in the file to start your implementation.
> It can be reorganized as desired, as long as all the requirements of the assignment are implemented.
## Testing and Grading
The testing is automated.
Tests are located in the `tests/` directory.
To test your implementation, do the following steps:
- Run the `make` command inside the `skel/` directory and make sure it compiles with no errors and that the `aws` executable is generated.
- Run the `make check` command in the `tests/` directory.
There are 35 tests for this assignment, of which 13 are doubled by a memory leak check test.
A successful run looks as the following:
```
student@so:~/operating-systems/content/assignments/async-web-server/tests$ make check
make -C _test
make[1]: Entering directory '/home/student/operating-systems/content/assignments/async-web-server/tests/_test'
make[1]: Nothing to be done for 'all'.
make[1]: Leaving directory '/home/student/operating-systems/content/assignments/async-web-server/tests/_test'
= Testing - Asynchronous Web Server =
01) Test executable exists.............................................passed [01/90]
02) Test executable runs...............................................passed [01/90]
03) Test listening.....................................................passed [01/90]
04) Test listening on port.............................................passed [01/90]
05) Test accepts connections...........................................passed [01/90]
06) Test accepts multiple connections..................................passed [01/90]
07) Test epoll usage...................................................passed [01/90]
08) Test disconnect....................................................passed [01/90]
09) Test multiple disconnect...........................................passed [01/90]
10) Test connect disconnect connect....................................passed [01/90]
11) Test multiple connect disconnect connect...........................passed [01/90]
12) Test unordered connect disconnect connect..........................passed [01/90]
13) Test replies http request..........................................passed [02/90]
13) Test replies http request - memcheck...............................passed [01/90]
14) Test second replies http request...................................passed [01/90]
15) Test sendfile usage................................................passed [02/90]
16) Test small static file wget........................................passed [02/90]
17) Test small static file wget cmp....................................passed [04/90]
17) Test small static file wget cmp - memcheck.........................passed [01/90]
18) Test large static file wget........................................passed [02/90]
19) Test large static file wget cmp....................................passed [04/90]
19) Test large static file wget cmp - memcheck.........................passed [01/90]
20) Test bad static file 404...........................................passed [02/90]
21) Test bad path 404..................................................passed [02/90]
22) Test get one static file then another..............................passed [02/90]
22) Test get one static file then another - memcheck...................passed [01/90]
23) Test get two simultaneous static files.............................passed [03/90]
23) Test get two simultaneous static files - memcheck..................passed [01/90]
24) Test get multiple simultaneous static files........................passed [04/90]
24) Test get multiple simultaneous static files - memcheck.............passed [01/90]
25) Test io submit uses................................................passed [02/90]
26) Test small dynamic file wget.......................................passed [02/90]
27) Test small dynamic file wget cmp...................................passed [04/90]
27) Test small dynamic file wget cmp - memcheck........................passed [01/90]
28) Test large dynamic file wget.......................................passed [02/90]
29) Test large dynamic file wget cmp...................................passed [04/90]
29) Test large dynamic file wget cmp - memcheck........................passed [01/90]
30) Test bad dynamic file 404..........................................passed [02/90]
31) Test get one dynamic file then another.............................passed [03/90]
31) Test get one dynamic file then another - memcheck..................passed [01/90]
32) Test get two simultaneous dynamic files............................passed [04/90]
32) Test get two simultaneous dynamic files - memcheck.................passed [01/90]
33) Test get multiple simultaneous dynamic files.......................passed [05/90]
33) Test get multiple simultaneous dynamic files - memcheck............passed [01/90]
34) Test get two simultaneous static and dynamic files.................passed [03/90]
34) Test get two simultaneous static and dynamic files - memcheck......passed [01/90]
35) Test get multiple simultaneous static and dynamic files............passed [04/90]
35) Test get multiple simultaneous static and dynamic files - memcheck.passed [01/90]
Total: [90/100]
```
Individual tests can be run using the `./run_test.sh` bash script as the following:
```
student@so:~/operating-systems/content/assignments/async-web-server/tests$ ./_test/run_test.sh 3
03) Test listening.....................................................passed [01/90]
```
Where `3` is the test you want to run.
Some tests are doubled by a memory check test.
This will only run if the regular test passed.
For example, test 31 will output the following in case of success:
```
student@so:~/operating-systems/content/assignments/async-web-server/tests$ ./_test/run_test.sh 31
31) Test get one dynamic file then another.............................passed [03/90]
31) Test get one dynamic file then another - memcheck..................passed [01/90]
```
and one of the following in case of error:
```
# if the regular tests failed, the memory check tests is not performed
student@so:~/operating-systems/content/assignments/async-web-server/tests$ ./_test/run_test.sh 31
31) Test get one dynamic file then another.............................failed [ 0/90]
31) Test get one dynamic file then another - memcheck..................passed [01/90]
```
> Note: The memcheck test for failed regular tests will not be taken into consideration for the final score.
This output will be fixed in the next commit.
Tests use the `static/` and `dynamic/` folders.
These folders are created and removed using the `init` and `cleanup` arguments to `_test/run_test.sh`.
### Behind the Scenes
Tests are basically unit tests.
Each test function follows the unit test patter: initialization, action,
evaluation.
Each test starts the server, creates a given context, checks for validity and
then terminates the server process.
### Debugging
Logs are collected in `test.log` and `wget.log` files.
## Resources
- [sendfile](https://man7.org/linux/man-pages/man2/sendfile.2.html)
- [io_setup & friends](https://man7.org/linux/man-pages/man2/io_setup.2.html)
- [epoll](https://man7.org/linux/man-pages/man7/epoll.7.html)
...@@ -9,7 +9,7 @@ cd "$(dirname "$0")" || exit 1 ...@@ -9,7 +9,7 @@ cd "$(dirname "$0")" || exit 1
RED='\033[0;31m' RED='\033[0;31m'
NC='\033[0m' NC='\033[0m'
DEFAULT_IMAGE_NAME=operating-systems/assignment-tutorial DEFAULT_IMAGE_NAME=operating-systems/assignment-async-web-server
DEFAULT_TAG='latest' DEFAULT_TAG='latest'
DEFAULT_REGISTRY='gitlab.cs.pub.ro:5050' DEFAULT_REGISTRY='gitlab.cs.pub.ro:5050'
......
aws
*.o
utils/*.o
http-parser/*o
\ No newline at end of file
CC = gcc
CPPFLAGS = -DDEBUG -DLOG_LEVEL=LOG_DEBUG
CFLAGS = -Wall -g
LDLIBS = -laio
.PHONY: all build clean pack
build: all
all: aws
aws: aws.o sock_util.o http_parser.o
aws.o: aws.c utils/sock_util.h utils/debug.h utils/util.h http-parser/http_parser.h aws.h
http_parser.o: http-parser/http_parser.c http-parser/http_parser.h
$(CC) $(CPPFLAGS) -I. $(CFLAGS) -c -o $@ $<
sock_util.o: utils/sock_util.c utils/sock_util.h
$(CC) $(CPPFLAGS) -I. $(CFLAGS) -c -o $@ $<
pack: clean
-rm -f ../src.zip
zip -r ../src.zip aws.c aws.h http-parser/http_parser.c http-parser/http_parser.h \
utils/sock_util.c utils/sock_util.h utils/debug.h utils/util.h utils/w_epoll.h \
Makefile
clean:
-rm -f ../src.zip
-rm -f *.o
-rm -f aws
// SPDX-License-Identifier: BSD-3-Clause
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/epoll.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/sendfile.h>
#include <sys/eventfd.h>
#include <libaio.h>
#include <errno.h>
#include "aws.h"
#include "utils/util.h"
#include "utils/debug.h"
#include "utils/sock_util.h"
#include "utils/w_epoll.h"
/* server socket file descriptor */
static int listenfd;
/* epoll file descriptor */
static int epollfd;
static io_context_t ctx;
static int aws_on_path_cb(http_parser *p, const char *buf, size_t len)
{
struct connection *conn = (struct connection *)p->data;
memcpy(conn->request_path, buf, len);
conn->request_path[len] = '\0';
conn->have_path = 1;
return 0;
}
static void prepare_connection_send_reply_header(struct connection *conn)
{
/* TODO: Prepare the connection buffer to send the reply header. */
}
static void prepare_connection_send_404(struct connection *conn)
{
/* TODO: Prepare the connection buffer to send the 404 header. */
}
static enum resource_type connection_get_resource_type(struct connection *conn)
{
/*
* TODO: Get resource type depending on request path/filename. Filename should
* point to the static or dynamic folder.
*/
return RESOURCE_TYPE_NONE;
}
struct connection *connection_create(int sockfd)
{
/* TODO: Initialize connection structure on given socket. */
return NULL;
}
void connection_start_async_io(struct connection *conn)
{
/* TODO: Start asynchronous operation (read from file).
* Use io_submit(2) & friends for reading data asynchronously.
*/
}
void connection_remove(struct connection *conn)
{
/* TODO: Remove connection handler. */
}
void handle_new_connection(void)
{
/* TODO: Handle a new connection request on the server socket. */
/* TODO a: Accept new connection. */
/* TODO b: Set socket to be non-blocking. */
/* TODO c: Instantiate new connection handler. */
/* TODO d: Add socket to epoll. */
/* TODO e: Initialize HTTP_REQUEST parser. */
}
void receive_data(struct connection *conn)
{
/* TODO: Receive message on socket.
* Store message in recv_buffer in struct connection.
*/
}
int connection_open_file(struct connection *conn)
{
/* TODO: Open file and update connection fields. */
return 0;
}
void connection_complete_async_io(struct connection *conn)
{
/* TODO: Complete asynchronous operation; operation returns successfully.
* Prepare socket for sending.
*/
}
int parse_header(struct connection *conn)
{
/* TODO: Parse the HTTP header and extract the file path. */
/* Use mostly null settings except for on_path callback. */
http_parser_settings settings_on_path = {
.on_message_begin = 0,
.on_header_field = 0,
.on_header_value = 0,
.on_path = aws_on_path_cb,
.on_url = 0,
.on_fragment = 0,
.on_query_string = 0,
.on_body = 0,
.on_headers_complete = 0,
.on_message_complete = 0
};
return 0;
}
enum connection_state connection_send_static(struct connection *conn)
{
/* TODO: Send static data using sendfile(2). */
/* Return value is the state of the connection at the time this function reaches the end. */
return STATE_NO_STATE;
}
int connection_send_data(struct connection *conn)
{
/* May be used as a helper function.
* TODO: Send as much data as possible from the connection send buffer.
* Returns the number of bytes sent or -1 if an error occurred
*/
return 0;
}
int connection_send_dynamic(struct connection *conn)
{
/* TODO: Read data asynchronously.
* Returns 0 on success and -1 on error.
*/
return 0;
}
void handle_input(struct connection *conn)
{
/* TODO: Handle input information: may be a new message or notification of
* completion of an asynchronous I/O operation.
*/
switch (conn->state) {
default:
ERR("Unexpected state\n");
exit(1);
}
}
void handle_output(struct connection *conn)
{
/* TODO: Handle output information: may be a new valid requests or notification of
* completion of an asynchronous I/O operation or invalid requests.
*/
switch (conn->state) {
default:
ERR("Unexpected state\n");
exit(1);
}
}
void handle_client(uint32_t event, struct connection *conn)
{
/* TODO: Handle new client. There can be input and output connections.
* Take care of what happened at the end of a connection.
*/
}
int main(void)
{
/* TODO: Initialize asynchronous operations. */
/* TODO: Initialize multiplexing. */
/* TODO: Create server socket. */
/* TODO: Add server socket to epoll object*/
/* Uncomment the following line for debugging. */
// dlog(LOG_INFO, "Server waiting for connections on port %d\n", AWS_LISTEN_PORT);
/* server main loop */
while (1) {
struct epoll_event rev;
/* TODO: Wait for events. */
/*
* TODO: Switch event types; consider
* - new connection requests (on server socket)
* - socket communication (on connection sockets)
*/
switch (rev.data.fd) { }
}
return 0;
}
/* SPDX-License-Identifier: BSD-3-Clause */
#ifndef AWS_H_
#define AWS_H_ 1
#include "http-parser/http_parser.h"
#ifdef __cplusplus
extern "C" {
#endif
#define AWS_LISTEN_PORT 8888
#define AWS_DOCUMENT_ROOT "./"
#define AWS_REL_STATIC_FOLDER "static/"
#define AWS_REL_DYNAMIC_FOLDER "dynamic/"
#define AWS_ABS_STATIC_FOLDER (AWS_DOCUMENT_ROOT AWS_REL_STATIC_FOLDER)
#define AWS_ABS_DYNAMIC_FOLDER (AWS_DOCUMENT_ROOT AWS_REL_DYNAMIC_FOLDER)
enum connection_state {
STATE_INITIAL,
STATE_RECEIVING_DATA,
STATE_REQUEST_RECEIVED,
STATE_SENDING_DATA,
STATE_SENDING_HEADER,
STATE_SENDING_404,
STATE_ASYNC_ONGOING,
STATE_DATA_SENT,
STATE_HEADER_SENT,
STATE_404_SENT,
STATE_CONNECTION_CLOSED,
STATE_NO_STATE // Used for assignment skelethon
};
#define OUT_STATE(s) (((s) == STATE_SENDING_DATA) || \
((s) == STATE_SENDING_HEADER) || ((s) == STATE_SENDING_404))
/* Resource type request by HTTP (either static or dynamic) */
enum resource_type {
RESOURCE_TYPE_NONE,
RESOURCE_TYPE_STATIC,
RESOURCE_TYPE_DYNAMIC
};
/* Structure acting as a connection handler */
struct connection {
/* file to be sent */
int fd;
char filename[BUFSIZ];
/* asynchronous notification */
int eventfd;
int sockfd;
io_context_t ctx;
struct iocb iocb;
struct iocb *piocb[1];
size_t file_size;
/* buffers used for receiving messages */
char recv_buffer[BUFSIZ];
size_t recv_len;
/* Used for sending data (headers, 404 or data populated through async IO). */
char send_buffer[BUFSIZ];
size_t send_len;
size_t send_pos;
size_t file_pos;
size_t async_read_len;
/* HTTP request path */
int have_path;
char request_path[BUFSIZ];
enum resource_type res_type;
enum connection_state state;
/* HTTP_REQUEST parser */
http_parser request_parser;
};
void handle_client(uint32_t event, struct connection *conn);
void handle_new_connection(void);
void handle_input(struct connection *conn);
void handle_output(struct connection *conn);
struct connection *connection_create(int sockfd);
void connection_remove(struct connection *conn);
int connection_open_file(struct connection *conn);
int connection_send_dynamic(struct connection *conn);
void connection_start_async_io(struct connection *conn);
enum connection_state connection_send_static(struct connection *conn);
void connection_complete_async_io(struct connection *conn);
void connection_start_async_io(struct connection *conn);
int parse_header(struct connection *conn);
void receive_data(struct connection *conn);
#ifdef __cplusplus
}
#endif
#endif /* AWS_H_ */
tags
TAGS
*.o
test
test_g
Contributors must agree to the Contributor License Agreement before patches
can be accepted.
http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ
exclude_files=http_parser\.(c|h)
exclude_files=test\.c
exclude_files=README\.md
exclude_files=LICENSE-MIT
Copyright Joyent, Inc. and other Node contributors. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
OPT_DEBUG=-O0 -g -Wall -Wextra -Werror -I.
OPT_FAST=-O3 -DHTTP_PARSER_STRICT=0 -I.
CC?=gcc
test: test_g
./test_g
test_g: http_parser_g.o test_g.o
$(CC) $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
test_g.o: test.c http_parser.h Makefile
$(CC) $(OPT_DEBUG) -c test.c -o $@
test.o: test.c http_parser.h Makefile
$(CC) $(OPT_FAST) -c test.c -o $@
http_parser_g.o: http_parser.c http_parser.h Makefile
$(CC) $(OPT_DEBUG) -c http_parser.c -o $@
test-valgrind: test_g
valgrind ./test_g
http_parser.o: http_parser.c http_parser.h Makefile
$(CC) $(OPT_FAST) -c http_parser.c
test_fast: http_parser.o test.c http_parser.h
$(CC) $(OPT_FAST) http_parser.o test.c -o $@
test-run-timed: test_fast
while(true) do time ./test_fast > /dev/null; done
tags: http_parser.c http_parser.h test.c
ctags $^
clean:
rm -f *.o test test_fast test_g http_parser.tar tags
.PHONY: clean package test-run test-run-timed test-valgrind
HTTP Parser
===========
This is a parser for HTTP messages written in C. It parses both requests and
responses. The parser is designed to be used in performance HTTP
applications. It does not make any syscalls nor allocations, it does not
buffer data, it can be interrupted at anytime. Depending on your
architecture, it only requires about 40 bytes of data per message
stream (in a web server that is per connection).
Features:
* No dependencies
* Handles persistent streams (keep-alive).
* Decodes chunked encoding.
* Upgrade support
* Defends against buffer overflow attacks.
The parser extracts the following information from HTTP messages:
* Header fields and values
* Content-Length
* Request method
* Response status code
* Transfer-Encoding
* HTTP version
* Request path, query string, fragment
* Message body
Usage
-----
One `http_parser` object is used per TCP connection. Initialize the struct
using `http_parser_init()` and set the callbacks. That might look something
like this for a request parser:
http_parser_settings settings;
settings.on_path = my_path_callback;
settings.on_header_field = my_header_field_callback;
/* ... */
http_parser *parser = malloc(sizeof(http_parser));
http_parser_init(parser, HTTP_REQUEST);
parser->data = my_socket;
When data is received on the socket execute the parser and check for errors.
size_t len = 80*1024, nparsed;
char buf[len];
ssize_t recved;
recved = recv(fd, buf, len, 0);
if (recved < 0) {
/* Handle error. */
}
/* Start up / continue the parser.
* Note we pass recved==0 to signal that EOF has been recieved.
*/
nparsed = http_parser_execute(parser, &settings, buf, recved);
if (parser->upgrade) {
/* handle new protocol */
} else if (nparsed != recved) {
/* Handle error. Usually just close the connection. */
}
HTTP needs to know where the end of the stream is. For example, sometimes
servers send responses without Content-Length and expect the client to
consume input (for the body) until EOF. To tell http_parser about EOF, give
`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
can still be encountered during an EOF, so one must still be prepared
to receive them.
Scalar valued message information such as `status_code`, `method`, and the
HTTP version are stored in the parser structure. This data is only
temporally stored in `http_parser` and gets reset on each new message. If
this information is needed later, copy it out of the structure during the
`headers_complete` callback.
The parser decodes the transfer-encoding for both requests and responses
transparently. That is, a chunked encoding is decoded before being sent to
the on_body callback.
The Special Problem of Upgrade
------------------------------
HTTP supports upgrading the connection to a different protocol. An
increasingly common example of this is the Web Socket protocol which sends
a request like
GET /demo HTTP/1.1
Upgrade: WebSocket
Connection: Upgrade
Host: example.com
Origin: http://example.com
WebSocket-Protocol: sample
followed by non-HTTP data.
(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
information the Web Socket protocol.)
To support this, the parser will treat this as a normal HTTP message without a
body. Issuing both on_headers_complete and on_message_complete callbacks. However
http_parser_execute() will stop parsing at the end of the headers and return.
The user is expected to check if `parser->upgrade` has been set to 1 after
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
offset by the return value of `http_parser_execute()`.
Callbacks
---------
During the `http_parser_execute()` call, the callbacks set in
`http_parser_settings` will be executed. The parser maintains state and
never looks behind, so buffering the data is not necessary. If you need to
save certain data for later usage, you can do that from the callbacks.
There are two types of callbacks:
* notification `typedef int (*http_cb) (http_parser*);`
Callbacks: on_message_begin, on_headers_complete, on_message_complete.
* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
(common) on_header_field, on_header_value, on_body;
Callbacks must return 0 on success. Returning a non-zero value indicates
error to the parser, making it exit immediately.
In case you parse HTTP message in chunks (i.e. `read()` request line
from socket, parse, read half headers, parse, etc) your data callbacks
may be called more than once. Http-parser guarantees that data pointer is only
valid for the lifetime of callback. You can also `read()` into a heap allocated
buffer to avoid copying memory around if this fits your application.
Reading headers may be a tricky task if you read/parse headers partially.
Basically, you need to remember whether last header callback was field or value
and apply following logic:
(on_header_field and on_header_value shortened to on_h_*)
------------------------ ------------ --------------------------------------------
| State (prev. callback) | Callback | Description/action |
------------------------ ------------ --------------------------------------------
| nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
| | | into it |
------------------------ ------------ --------------------------------------------
| value | on_h_field | New header started. |
| | | Copy current name,value buffers to headers |
| | | list and allocate new buffer for new name |
------------------------ ------------ --------------------------------------------
| field | on_h_field | Previous name continues. Reallocate name |
| | | buffer and append callback data to it |
------------------------ ------------ --------------------------------------------
| field | on_h_value | Value for current header started. Allocate |
| | | new buffer and copy callback data to it |
------------------------ ------------ --------------------------------------------
| value | on_h_value | Value continues. Reallocate value buffer |
| | | and append callback data to it |
------------------------ ------------ --------------------------------------------
See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C
* [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
* [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript
This diff is collapsed.
/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/types.h>
#if defined(_WIN32) && !defined(__MINGW32__)
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef unsigned int size_t;
typedef int ssize_t;
#else
#include <stdint.h>
#endif
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
* faster
*/
#ifndef HTTP_PARSER_STRICT
# define HTTP_PARSER_STRICT 1
#else
# define HTTP_PARSER_STRICT 0
#endif
/* Maximium header size allowed */
#define HTTP_MAX_HEADER_SIZE (80*1024)
typedef struct http_parser http_parser;
typedef struct http_parser_settings http_parser_settings;
/* Callbacks should return non-zero to indicate an error. The parser will
* then halt execution.
*
* The one exception is on_headers_complete. In a HTTP_RESPONSE parser
* returning '1' from on_headers_complete will tell the parser that it
* should not expect a body. This is used when receiving a response to a
* HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
* chunked' headers that indicate the presence of a body.
*
* http_data_cb does not return data chunks. It will be call arbitrarally
* many times for each string. E.G. you might get 10 callbacks for "on_path"
* each providing just a few characters more data.
*/
typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
typedef int (*http_cb) (http_parser*);
/* Request Methods */
enum http_method
{ HTTP_DELETE = 0
, HTTP_GET
, HTTP_HEAD
, HTTP_POST
, HTTP_PUT
/* pathological */
, HTTP_CONNECT
, HTTP_OPTIONS
, HTTP_TRACE
/* webdav */
, HTTP_COPY
, HTTP_LOCK
, HTTP_MKCOL
, HTTP_MOVE
, HTTP_PROPFIND
, HTTP_PROPPATCH
, HTTP_UNLOCK
/* subversion */
, HTTP_REPORT
, HTTP_MKACTIVITY
, HTTP_CHECKOUT
, HTTP_MERGE
/* upnp */
, HTTP_MSEARCH
, HTTP_NOTIFY
, HTTP_SUBSCRIBE
, HTTP_UNSUBSCRIBE
};
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
struct http_parser {
/** PRIVATE **/
unsigned char type : 2;
unsigned char flags : 6;
unsigned char state;
unsigned char header_state;
unsigned char index;
uint32_t nread;
size_t content_length;
/** READ-ONLY **/
unsigned short http_major;
unsigned short http_minor;
unsigned short status_code; /* responses only */
unsigned char method; /* requests only */
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
char upgrade;
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};
struct http_parser_settings {
http_cb on_message_begin;
http_data_cb on_path;
http_data_cb on_query_string;
http_data_cb on_url;
http_data_cb on_fragment;
http_data_cb on_header_field;
http_data_cb on_header_value;
http_cb on_headers_complete;
http_data_cb on_body;
http_cb on_message_complete;
};
void http_parser_init(http_parser *parser, enum http_parser_type type);
size_t http_parser_execute(http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len);
/* If http_should_keep_alive() in the on_headers_complete or
* on_message_complete callback returns true, then this will be should be
* the last message on the connection.
* If you are the server, respond with the "Connection: close" header.
* If you are the client, close the connection.
*/
int http_should_keep_alive(http_parser *parser);
/* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method);
#ifdef __cplusplus
}
#endif
#endif
/test_get_request_path
/test_get_request_path.exe
/*.bat
exclude_files=test_get_request_path\.c
CFLAGS = -Wall -g -I..
.PHONY: all clean
all: test_get_request_path
test_get_request_path: test_get_request_path.o ../http_parser.o
../http_parser.o: ../http_parser.c ../http_parser.h
make -C .. http_parser.o
clean:
-rm -f *~
-rm -f *.o
-rm -f test_get_request_path