diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8d05dc2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# Bundle generated folders +.bundle +vendor/bundle + +# Jekyll generated folders +_site +.sass-cache/ +.jekyll-cache/ +.jekyll-metadata diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..82d3bda --- /dev/null +++ b/Gemfile @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +# gem "rails" + +gem "jekyll", "~> 4.3" + +gem "minima", "~> 2.5" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..9c3a50c --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,79 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.8.4) + public_suffix (>= 2.0.2, < 6.0) + colorator (1.1.0) + concurrent-ruby (1.2.2) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + eventmachine (1.2.7) + ffi (1.15.5) + forwardable-extended (2.6.0) + google-protobuf (3.23.2-x86_64-darwin) + http_parser.rb (0.8.0) + i18n (1.14.1) + concurrent-ruby (~> 1.0) + jekyll (4.3.2) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 1.0) + jekyll-sass-converter (>= 2.0, < 4.0) + jekyll-watch (~> 2.0) + kramdown (~> 2.3, >= 2.3.1) + kramdown-parser-gfm (~> 1.0) + liquid (~> 4.0) + mercenary (>= 0.3.6, < 0.5) + pathutil (~> 0.9) + rouge (>= 3.0, < 5.0) + safe_yaml (~> 1.0) + terminal-table (>= 1.8, < 4.0) + webrick (~> 1.7) + jekyll-feed (0.17.0) + jekyll (>= 3.7, < 5.0) + jekyll-sass-converter (3.0.0) + sass-embedded (~> 1.54) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + kramdown (2.4.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.4) + listen (3.8.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.4.0) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (5.0.1) + rb-fsevent (0.11.2) + rb-inotify (0.10.1) + ffi (~> 1.0) + rexml (3.2.5) + rouge (4.1.2) + safe_yaml (1.0.5) + sass-embedded (1.63.3-x86_64-darwin) + google-protobuf (~> 3.23) + terminal-table (3.0.2) + unicode-display_width (>= 1.1.1, < 3) + unicode-display_width (2.4.2) + webrick (1.8.1) + +PLATFORMS + x86_64-darwin-22 + +DEPENDENCIES + jekyll (~> 4.3) + minima (~> 2.5) + +BUNDLED WITH + 2.3.26 diff --git a/README.md b/README.md index 65c4d09..271ecb6 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,28 @@ Documentation for the JSON Lines text file format Visit https://jsonlines.org + +## Development + +### Jekyll installation + +Follow steps: +- https://jekyllrb.com/docs/installation/ +- https://jekyllrb.com/docs + +### Bundle installation + +```shell +bundle config set --local path 'vendor/bundle' +bundle install +``` + +### Build and serve the site + +```shell +# just build the website +bundle exec jekyll build + +# build and serve the website on default port +bundle exec jekyll serve +``` \ No newline at end of file diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..95b2013 --- /dev/null +++ b/_config.yml @@ -0,0 +1,43 @@ +# Site settings +# These are used to personalize your new site. If you look in the HTML files, +# you will see them accessed via {{ site.title }}, {{ site.description }}, and so on. +# +# You can create any custom variable you would like, and they will be accessible +# in the templates via {{ site.myvariable }}. + +title: JSON Lines +description: >- + This page describes the JSON Lines text format, also called newline-delimited JSON. + JSON Lines is a convenient format for storing structured data that may be processed + one record at a time. It works well with unix-style text processing tools and shell + pipelines. It's a great format for log files. It's also a flexible format for passing + messages between cooperating processes. +baseurl: "" # the subpath of your site, e.g. /blog +url: "https://jsonlines.org" # the base hostname & protocol for your site, e.g. http://example.com + +# Build settings +theme: minima + +# Disable warning related to https://sass-lang.com/d/slash-div +sass: + quiet_deps: true + +# Exclude from processing. +# The following items will not be processed, by default. +# Any item listed under the `exclude:` key here will be automatically added to +# the internal "default list". +# +# Excluded items can be processed by explicitly listing the directories or +# their entries' file path in the `include:` list. +# +# exclude: +# - .sass-cache/ +# - .jekyll-cache/ +# - gemfiles/ +# - Gemfile +# - Gemfile.lock +# - node_modules/ +# - vendor/bundle/ +# - vendor/cache/ +# - vendor/gems/ +# - vendor/ruby/ diff --git a/_data/menu.yml b/_data/menu.yml new file mode 100644 index 0000000..e110bf2 --- /dev/null +++ b/_data/menu.yml @@ -0,0 +1,11 @@ +items: + - label: Home + url: '/' + - label: Examples + url: '/examples/' + - label: 'Validator' + url: '/validator/' + - label: 'On the web' + url: '/on_the_web/' + - label: 'json.org' + url: 'https://json.org' diff --git a/_includes/footer.html b/_includes/footer.html new file mode 100644 index 0000000..100ba4a --- /dev/null +++ b/_includes/footer.html @@ -0,0 +1,7 @@ + diff --git a/_includes/head.html b/_includes/head.html new file mode 100644 index 0000000..a98ffdf --- /dev/null +++ b/_includes/head.html @@ -0,0 +1,18 @@ +
+ + + + + + + {% for css in page.custom-css-list %} + + {% endfor %} + + {% for javascript in page.custom-javascript-list %} + + {% endfor %} +["Name", "Session", "Score", "Completed"]
@@ -43,13 +13,13 @@
["Deloise", "2012A", 19, true]
CSV seems so easy that many programmers have written code to generate it themselves, and almost every implementation is different. Handling broken CSV files is a common and frustrating task. CSV has no standard encoding, no standard column separator and multiple character escaping standards. String is the only type supported for cell values, so some programs attempt to guess the correct types.
+CSV seems so easy that many programmers have written code to generate it themselves, and almost every implementation is different. Handling broken CSV files is a common and frustrating task. CSV has no standard encoding, no standard column separator and multiple character escaping standards. String is the only type supported for cell values, so some programs attempt to guess the correct types.
-JSON Lines handles tabular data cleanly and without ambiguity. Cells may use the standard JSON types.
+JSON Lines handles tabular data cleanly and without ambiguity. Cells may use the standard JSON types.
-The biggest missing piece is an import/export filter for popular spreadsheet programs so that non-programmers can use this format.
+The biggest missing piece is an import/export filter for popular spreadsheet programs so that non-programmers can use this format.
-{"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]}
@@ -58,11 +28,10 @@
{"name": "Deloise", "wins": [["three of a kind", "5♣"]]}
JSON Lines' biggest strength is in handling lots of similar nested data structures. One .jsonl
file is easier to work with than a directory full of XML files.
JSON Lines' biggest strength is in handling lots of similar nested data structures. One .jsonl
file is easier to work with than a directory full of XML files.
If you have large nested structures then reading the JSON Lines text directly isn't recommended. Use the "jq" tool to make viewing large structures easier:
+If you have large nested structures then reading the JSON Lines text directly isn't recommended. Use the "jq" tool to make viewing large structures easier:
grep pair winning_hands.jsonl | jq .
- This page describes the JSON Lines text format, also called newline-delimited JSON. - JSON Lines is a convenient format for storing structured data that may be processed - one record at a time. It works well with unix-style text processing tools and shell - pipelines. It's a great format for log files. It's also a flexible format for passing - messages between cooperating processes. -
-- The JSON Lines format has three requirements: -
-- JSON allows encoding Unicode strings with only ASCII escape sequences, however those - escapes will be hard to read when viewed in a text editor. The author of the JSON Lines - file may choose to escape characters to work with plain ASCII files. -
-- Encodings other than UTF-8 are very unlikely to be valid when decoded as UTF-8 so the chance - of accidentally misinterpreting characters - in JSON Lines files is low. -
-- The most common values will be objects or arrays, but any JSON value is permitted. -
-- See json.org for more information about JSON values. -
-'\n'
-
- This means '\r\n'
is also supported because surrounding white space is
- implicitly ignored when parsing JSON values.
-
- The last character in the file may be a line separator, and it will be treated - the same as if there was no line separator present. -
-
- JSON Lines files may be saved with the file extension .jsonl
.
-
- Stream compressors like gzip
or bzip2
are recommended for
- saving space, resulting in .jsonl.gz
or .jsonl.bz2
files.
-
- MIME type may be application/jsonl
, but this is not yet standardized; any help
- writing the RFC would be greatly appreciated (see issue).
-
- Text editing programs call the first line of a text file "line 1". The first value in a - JSON Lines file should also be called "value 1". -
-+ This page describes the JSON Lines text format, also called newline-delimited JSON. + JSON Lines is a convenient format for storing structured data that may be processed + one record at a time. It works well with unix-style text processing tools and shell + pipelines. It's a great format for log files. It's also a flexible format for passing + messages between cooperating processes. +
++ The JSON Lines format has three requirements: +
++ JSON allows encoding Unicode strings with only ASCII escape sequences, however those + escapes will be hard to read when viewed in a text editor. The author of the JSON Lines + file may choose to escape characters to work with plain ASCII files. +
++ Encodings other than UTF-8 are very unlikely to be valid when decoded as UTF-8 so the chance + of accidentally misinterpreting characters + in JSON Lines files is low. +
++ The most common values will be objects or arrays, but any JSON value is permitted. +
++ See json.org for more information about JSON values. +
+'\n'
+
+ This means '\r\n'
is also supported because surrounding white space is
+ implicitly ignored when parsing JSON values.
+
+ The last character in the file may be a line separator, and it will be treated + the same as if there was no line separator present. +
+
+ JSON Lines files may be saved with the file extension .jsonl
.
+
+ Stream compressors like gzip
or bzip2
are recommended for
+ saving space, resulting in .jsonl.gz
or .jsonl.bz2
files.
+
+ MIME type may be application/jsonl
, but this is not yet standardized; any help
+ writing the RFC would be greatly appreciated (see issue).
+
+ Text editing programs call the first line of a text file "line 1". The first value in a + JSON Lines file should also be called "value 1". +
diff --git a/on_the_web/index.html b/on_the_web/index.md similarity index 71% rename from on_the_web/index.html rename to on_the_web/index.md index d6d5c42..42334d4 100644 --- a/on_the_web/index.html +++ b/on_the_web/index.md @@ -1,41 +1,10 @@ - - - - - - - - - - -pytest-reportlog is a pytest plugin which writes testing report data in JSON lines format
- +NDJSON is a similar format that also allows blank lines
Bubbles supports JSON Lines datastores
@@ -47,17 +16,17 @@Graylog GELF is format for log messages, their stream is de-facto JSON lines.
Scrapy is a framework for web scraping & crawling, it supports and recommends JSON lines since long -- it might've even coined the term.
- +ClickHouse is an open source column-oriented DBMS. It supports JSON lines as JSONEachRow format for input and output.
-Dataflow kit is a web scraping open source framework written in Go. JSON Lines is one of the supported formats for storing results.
+Dataflow kit is a web scraping open source framework written in Go. JSON Lines is one of the supported formats for storing results.
dart uses JSON Lines as one of the possible reporters when running tests.
Apache Spark uses JSONL for reading and writing JSON data.
ArangoDB is an open source multi-model database. The JSON lines format allows to import huge amounts of documents sequentially (via arangoimport).
- +Rumble is a JSONiq engine that runs on top of Spark. It can process datasets in the JSON lines format that have billions of objects and more.
Neo4j the open-source graph database supports JSONL export and import via its standard library procedures apoc.export/import.json
to allow stream processing of nodes and relationships.
CSS HTML Validator for Windows v22.0211+ now supports JSON Lines syntax checking.
-Miller supports JSON Lines format as input.
-Miller supports JSON Lines format as input.
diff --git a/stylesheets/stylesheet.css b/stylesheets/stylesheet.css index 4e68f60..6744299 100644 --- a/stylesheets/stylesheet.css +++ b/stylesheets/stylesheet.css @@ -90,7 +90,7 @@ header h2 { } .inner { - width: 620px; + width: 700px; margin: 0 auto; } diff --git a/validator/index.html b/validator/index.html deleted file mode 100644 index 240b6ab..0000000 --- a/validator/index.html +++ /dev/null @@ -1,52 +0,0 @@ - - - - - - - - - - - - - - - - -