diff --git a/CMakeLists.txt b/CMakeLists.txt index f5a8a9c..37c71b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,16 @@ cmake_minimum_required(VERSION 3.1.0) -project(labelbuddy VERSION 0.0.1 LANGUAGES CXX) +project(labelbuddy VERSION 0.0.3 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# if (MSVC) +# add_compile_options(/W4 /WX) +# else() +# add_compile_options(-Wall -Wextra -pedantic -Werror) +# endif() + set(CMAKE_AUTOMOC ON) set(CMAKE_AUTORCC ON) set(CMAKE_AUTOUIC ON) @@ -39,13 +45,19 @@ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") set(CPACK_PACKAGE_NAME "labelbuddy") -set(CPACK_PACKAGE_VERSION "0.0.1") +set(CPACK_PACKAGE_VERSION "0.0.3") set(CPACK_PACKAGE_VERSION_MAJOR 0) set(CPACK_PACKAGE_VERSION_MINOR 0) -set(CPACK_PACKAGE_VERSION_PATCH 1) +set(CPACK_PACKAGE_VERSION_PATCH 3) set(CPACK_PACKAGE_CHECKSUM "MD5") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "gui tool for annotating documents") -set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_SOURCE_DIR}/docs/short-readme.txt") +set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/jeromedockes/labelbuddy") +if(CMAKE_VERSION VERSION_LESS "3.14.0") + file(READ "${CMAKE_SOURCE_DIR}/docs/Description" DESCR) + string(REGEX REPLACE "\n$" "" CPACK_DEBIAN_PACKAGE_DESCRIPTION "${DESCR}") +else() + set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "GUI tool for annotating documents") + set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_SOURCE_DIR}/docs/extended-description") +endif() set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE.txt") set(CPACK_SOURCE_GENERATOR "TGZ") @@ -86,7 +98,7 @@ set(CPACK_SOURCE_IGNORE_FILES set(CPACK_GENERATOR "DEB") set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") -set(CPACK_DEBIAN_PACKAGE_VERSION "0.0.1-1") +set(CPACK_DEBIAN_PACKAGE_VERSION "0.0.3-1") set(CPACK_DEBIAN_PACKAGE_SECTION "text") set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Jerome Dockes ") set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) diff --git a/data/VERSION.txt b/data/VERSION.txt index 8acdd82..bcab45a 100644 --- a/data/VERSION.txt +++ b/data/VERSION.txt @@ -1 +1 @@ -0.0.1 +0.0.3 diff --git a/docs/Description b/docs/Description new file mode 100644 index 0000000..a883b20 --- /dev/null +++ b/docs/Description @@ -0,0 +1,6 @@ +GUI tool for annotating documents + This is an application for annotating parts of documents with labels. + labelbuddy can be used for Part Of Speech tagging, + Named Entity Recognition, + sentiment analysis and document classification, etc. + It depends on Qt5. \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile index 0686c64..616b1c8 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -7,19 +7,24 @@ examples := $(wildcard example_data/*.txt) all: $(html) $(webhtml) example_data/example_documents.json -%.html: %.adoc +%.html: %.adoc ../data/VERSION.txt asciidoctor -b xhtml -a lbversion="$$(cat ../data/VERSION.txt)" $< gh-pages-site/%.html: %.html mkdir -p gh-pages-site xsltproc add_nav.xsl $< > $@ -documentation.html: README.adoc +documentation.html: README.adoc ../data/VERSION.txt asciidoctor -b xhtml -a lbversion="$$(cat ../data/VERSION.txt)" $< -o $@ +# example_data/documentation.txt: documentation.html +# lynx -dump -nolist -nonumbers -width=79 $< | \ +# sed 's/\xe2\x80\x8b//g;/^ *Last updated[-: 0-9]*$$/d' > $@ + example_data/documentation.txt: documentation.html - lynx -dump -nolist -nonumbers -width=79 $< | \ - sed 's/\xe2\x80\x8b//g;/^ *Last updated[-: 0-9]*$$/d' > $@ + pandoc $< -t plain | \ + sed '1s/^/\nThis doc is better read with a fixed width font: Preferences > monospace, or Help > documentation for html version\n\n/;s/\xe2\x80\x8b//g;/^ *Last updated[-: 0-9]*$$/d' > $@ + example_data/example_documents.json: $(examples) example_data/documentation.txt example_data/make_example_docs.py python3 example_data/make_example_docs.py diff --git a/docs/README.adoc b/docs/README.adoc index a64fc21..862d94b 100644 --- a/docs/README.adoc +++ b/docs/README.adoc @@ -59,7 +59,7 @@ labelbuddy --demo .... (You can also start {lb} without any options and then select menu:File[Demo] in the GUI.) -You can play around with {lb}’s features in this temporary project. +You can play around with {lb}’s features in this temporary database. If you decide to start creating annotations that you want to keep, open a new database and import your <> and <>. NOTE: a <> is actually just a regular file on your disk (an {sqlitehome}[SQLite] database). @@ -69,6 +69,7 @@ NOTE: a <> is actually just a regular fil Documents and labels can be imported into a {lb} database from various formats. Once this is done, you can annotate the documents and finally export your annotations. It is also possible to import annotations exported from {lb} or {doca}. +Importing and exporting data can be done from the graphical or the <> interface. Documents and labels that are already in the database are skipped if you try to import them again. @@ -143,14 +144,15 @@ Other attributes cannot be specified. When importing a `.json` file the whole file is read into memory before inserting the documents in the database. To read documents one by one and reduce memory usage, you can use https://jsonlines.org/[JSON Lines]. -It is almost the same as the JSON format, but instead of having one JSON array, the file must contain one JSON document per line. +It is similar to the JSON format, but instead of having one JSON array, the file must contain one JSON document per line. For example: [source,json] ---- {"text": "text of first doc", "meta": {"author": "me", "DOI": "123"}} {"text": "text of second doc", "short_title": "doc456"} ---- -(Note the outer brackets are removed and the documents are not separated by commas.) +Note the outer brackets are removed and the documents are not separated by commas. +The object representing each document must occupy exactly one line, unlike in `.json` where whitespace is not important. As for `.json`, `.jsonl` also allows <>. @@ -185,10 +187,13 @@ The same format can be used to <>. === Importing labels To import labels, click btn:[Import labels] in the {ietab}. -Labels have two attributes: a mandatory `text` (label name), and an optional `color`. -For compatibility with {doca}, `color` can also be specified as `background_color`. +Labels have three attributes: a mandatory `text` (label name), and an optional `color` and `shortcut_key`. +The `shortcut_key` is a lower-case ASCII letter (a-z) that helps quickly <> with that label. + +For compatibility with {doca}, `color` can also be specified as `background_color` and `shortcut_key` can be specified as `suffix_key`. + As for documents, the format is deduced from the filename extension when importing labels. -The label color can be changed from within the GUI application . +The label color and shortcut key can be changed from within the GUI application . ==== From `.txt` @@ -200,14 +205,13 @@ Verb Adjective .... -You can specify a color for each label (or labels that contain newlines) by using the `.json` format. +To specify a color or shortcut key (or to use labels that contain newlines), use the `.json` format. ==== From `.json` The file must contain a JSON array containing one JSON object per label. -Each label's object must have the key `text` and optionally `color` or the synonym `background_color`. -(If both `color` and `background_color` are provided `color` is used.) +Each label's object must have the key `text` and optionally `color` and `shortcut_key` (or their synonyms `background_color` and `suffix_key`, which have lower precedence). For example: @@ -215,7 +219,7 @@ For example: ---- [ {"text": "Noun", "color": "#ff0000"}, -{"text": "Verb", "color": "yellow"}, +{"text": "Verb", "color": "yellow", "shortcut_key": "v"}, {"text": "Adjective"} ] ---- @@ -237,37 +241,117 @@ In this case the first (mandatory) element of the array is the text (label name) === Annotating documents Once you have imported labels and documents you can see them in the {dstab}. -You can delete labels or documents and change the color associated with each label. +You can delete labels or documents and change the color and shortcut associated with each label. You then go to the {annotab}. (If you double-click a document it will be opened in the {annotab}.) To annotate a document, select the region you want to label with the mouse and click on the appropriate label. +It is also possible to do the same thing with the keyboard. +Search for the term you want to annotate and the first match will be selected. +The selection can be adusted with the keyboard using the bindings described <>. +Then press the shortcut key associated with the label you want to set. Once you have created annotations, you can select any of them by clicking it. It becomes bold and underlined and you can change its label by clicking on a different one or remove the annotation by clicking btn:[Remove]. - -TIP: If showing the selection in bold is annoying (depending on the font it can slightly change its size) you can disable it in menu:Preferences[Bold selected region]. +You can also do this with the keyboard: jump to the next annotation with the kbd:[Space] key and change its label with a label shortcut or remove it with kbd:[Backspace]. If you create a new annotation that overlaps with a previously existing one, the previously existing one is automatically removed. +TIP: If showing the selection in bold is annoying (depending on the font it can slightly change its size) you can disable it in menu:Preferences[Bold selected region]. + ==== Summary of key bindings in the {annotab} -* kbd:[Ctrl] and scroll the mouse: zoom or dezoom the text -* kbd:[Ctrl+F]: search -* kbd:[Enter]: next search match -* kbd:[Shift+Enter]: previous search match -* kbd:[Ctrl+J], kbd:[Ctrl+N], kbd:[Down]: scroll down one line -* kbd:[Ctrl+K], kbd:[Ctrl+P], kbd:[Up]: scroll up one line -* kbd:[Ctrl+D]: scroll down one page -* kbd:[Ctrl+U]: scroll up one page -* kbd:[\]]: move the *end* of the selection by one *word* to the *right* -* kbd:[[]: move the *end* of the selection by one *word* to the *left* -* kbd:[}]: move the *beginning* of the selection by one *word* to the *right* -* kbd:[{]: move the *beginning* of the selection by one *word* to the *left* -* kbd:[Ctrl+\]]: move the *end* of the selection by one *character* to the *right* -* kbd:[Ctrl+[]: move the *end* of the selection by one *character* to the *left* -* kbd:[Ctrl+}]: move the *beginning* of the selection by one *character* to the *right* -* kbd:[Ctrl+{]: move the *beginning* of the selection by one *character* to the *left* +[cols="1,2"] +|=== +2+| Searching and navigation + +| kbd:[Ctrl] and scroll the mouse +| zoom or dezoom the text + +| kbd:[Ctrl+F], kbd:[/] +| search + +| kbd:[Enter] +| next search match + +| kbd:[Shift+Enter] +| previous search match + +| kbd:[Ctrl+J], kbd:[Ctrl+N], kbd:[Down] +| scroll down one line + +| kbd:[Ctrl+K], kbd:[Ctrl+P], kbd:[Up] +| scroll up one line + +| kbd:[Ctrl+D] +| scroll down one page + +| kbd:[Ctrl+U] +| scroll up one page + +| kbd:[Ctrl+L] +| cycle between placing the cursor at the center, top and bottom of the window +|=== + +[cols="1,2"] +|=== +2+| Manipulating annotations + +| kbd:[a-z] (label's `shortcut_key`) +| set corresponding label for the currently selected region or annotation + +| kbd:[Backspace] +| remove selected annotation + +| kbd:[Space] +| jump to next annotation and select it + +| kbd:[Shift+Space] +| jump to previous annotation and select it + +| kbd:[Esc] +| un-select selected annotation +|=== + +[cols="1,2"] +|=== +2+| Manipulating the text selection + +| kbd:[\]] +| move the *end* of the selection by one *word* to the *right* + +| kbd:[[] +| move the *end* of the selection by one *word* to the *left* + +| kbd:[}] +| move the *beginning* of the selection by one *word* to the *right* + +| kbd:[{] +| move the *beginning* of the selection by one *word* to the *left* + +| kbd:[Ctrl+\]] +| move the *end* of the selection by one *character* to the *right* + +| kbd:[Ctrl+[] +| move the *end* of the selection by one *character* to the *left* + +| kbd:[Ctrl+}] +| move the *beginning* of the selection by one *character* to the *right* + +| kbd:[Ctrl+{] +| move the *beginning* of the selection by one *character* to the *left* +|=== + +[cols="1,2"] +|=== +2+| Navigating documents + +| kbd:[>] +| go to next document + +| kbd:[<] +| go to previous document +|=== === Exporting annotations @@ -284,7 +368,7 @@ When clicking btn:[Export docs & annotations] you are asked to select a file and The export format is the same as the import format. Exported documents and annotations can thus be imported back into a {lb} database. -However for exported documents: +Compared to previous description of the import format, in exported documents: * `text` is optional (you can choose not to export it to save space, in this case documents can be identified from their MD5 checksum or from the user metadata). * `document_md5_checksum` (containing the hex representation of the MD5 checksum of the text) and `labels` (containing the document’s annotations) are added. @@ -299,6 +383,8 @@ The optional attribute `annotation_approver` can also be added. For example if the text starts with "`hello`" and you highlighted exactly that word, and labelled it with `label_1`, the associated annotation will be `[0, 5, "label_1"]`. +NOTE: Documents are exported in the same order that they were imported. + ==== Exporting to `.json` JSON exported annotations might look like: @@ -311,7 +397,8 @@ JSON exported annotations might look like: ] ---- -Each document will always be on one separate line. +Each document will always be on one separate line; this makes it easy to parse the file incrementally. +Moreover as the documents are always in the same order, it gives line-oriented tools such as *diff* or *git* a better chance of producing useful output. ==== Exporting to `.jsonl` @@ -372,6 +459,10 @@ So it may look like: === Exporting labels You can also export labels by clicking "`Export labels`" and selecting a JSON file. +The resulting file will contain an array of json objects; the same format as the <>. +Unlike documents each label is not on a single line. +Both keys `color` and `background_color` are set to the label's colors. +Both keys `shortcut_key` and `suffix_key` are set to the label's shortcut key if it has one. === Importing annotations @@ -403,11 +494,11 @@ Moreover, {doca} allows duplicate documents so if the documents were already in === Managing projects -Each {lb} project (a set of documents, labels and annotations) is an {sqlitehome}[SQLite] database. +Each {lb} database (containing documents, labels and annotations) is an {sqlitehome}[SQLite] database. That is a single binary file on your disk that you can copy, backup, or share, like any other file. -TIP: Advanced users can also open a connection directly to the database to query it or even modify it. -Back it up before and set `PRAGMA foreign_keys = ON` +TIP: Using SQLite you can also open a connection directly to the database to query it or even modify it. +If you do so, set `PRAGMA foreign_keys = ON`. When you first start {lb} it creates a new database in `~/labelbuddy_data.sqlite3`. You can switch to a different one by selecting menu:File[Open] or menu:File[New]. @@ -418,13 +509,15 @@ The next time you start {lb}, it will open the last database that you opened. The database to open can also be specified when invoking {lb} from the command line: .... -labelbuddy /path/to/my_project.sqlite3 +labelbuddy /path/to/my_annotations.sqlite3 .... If you just want to give {lb} a try and don’t have documents or labels yet, you can also select menu:File[Demo] to open a temporary database pre-loaded with a few examples. As it is easy to create and delete databases (an empty {lb} database is just 48K), and to copy documents, labels and annotations from one to another, you have some freedom in the organization of annotation work. -In particular, it is possible to work with several databases rather than a monolithic one – as an example we could create a new database to annotate a fresh batch of documents, then export and merge into a main database once that batch is finished. +In particular, it is possible to work with several databases rather than a monolithic one. +You can break down the annotations into several files to reflect the structure of your project. +As an example you could also create a new database to annotate a fresh batch of documents, then export and merge into a main database once that batch is finished. === Command-line interface @@ -444,7 +537,7 @@ Options: --import-docs Docs & annotations file to import in database. --export-labels Labels file to export to. - --export-annotations Docs & annotations file to export + --export-docs Docs & annotations file to export to. --labelled-only Export only labelled documents --include-text Include doc text with exported @@ -461,6 +554,13 @@ Arguments: If any of the `import-` or `export-` options are used, {lb} doesn’t start a GUI but performs the required import or export operations and exits. It is possible to specify these options several times. To use these options, the database path must be provided explicitly. +Labels are imported first, then documents, then export operations are performed. + +As an example, to convert a previously exported file `docs.xml` to JSON and strip the documents' text, you could run: + +.... +tmpdb=$(mktemp) && labelbuddy $tmpdb --import-docs docs.xml --export-docs docs.json; rm $tmpdb +.... Regarding `vacuum`: when data is deleted from an `sqlite3` database, the file doesn’t shrink. The freed up space is not lost; it is kept and reused when new data is added to the database. diff --git a/docs/changelog b/docs/changelog index fa4b8a4..f52fb38 100644 --- a/docs/changelog +++ b/docs/changelog @@ -1,3 +1,17 @@ +labelbuddy (0.0.3) unstable; urgency=medium + + * minor packaging detail + + -- jerome dockes Sun, 28 Feb 2021 13:05:21 -0500 + +labelbuddy (0.0.2) unstable; urgency=medium + + * Add label shortcuts and misc keybindings + * Improve CLI + * Improve documentation + + -- jerome dockes Sat, 27 Feb 2021 17:37:28 -0500 + labelbuddy (0.0.1) unstable; urgency=low * Initial release. diff --git a/docs/example_data/documentation.txt b/docs/example_data/documentation.txt index d28d2b1..f3741d7 100644 --- a/docs/example_data/documentation.txt +++ b/docs/example_data/documentation.txt @@ -1,542 +1,697 @@ -labelbuddy documentation - - Jérôme Dockès - jerome@dockes.org - Table of Contents - * 1. Introduction - + 1.1. labelbuddy vs other annotation tools - + 1.2. Quick start - * 2. Using labelbuddy - + 2.1. Importing documents - + 2.2. Importing labels - + 2.3. Annotating documents - + 2.4. Exporting annotations - + 2.5. Exporting labels - + 2.6. Importing annotations - + 2.7. Managing projects - + 2.8. Command-line interface - * 3. Conclusion - - This document describes labelbuddy version 0.0.1. -1. Introduction +This doc is better read with a fixed width font: Preferences > monospace, or Help > documentation for html version - labelbuddy is an open-source desktop GUI application for annotating - documents. It can be used for example for Part Of Speech tagging, - Named Entity Recognition, sentiment analysis and document - classification … +labelbuddy documentation - It aims to be easy to install and use, and can efficiently handle many - documents, labels and annotations. +Jérôme Dockès +jerome@dockes.org -1.1. labelbuddy vs other annotation tools +Table of Contents - There exist several tools for annotating documents. Most of them, such - as doccano and labelstudio are meant to run on a web server and be - used online. If you are crowdsourcing annotations and want many users - to contribute annotations to a central database without installing - anything on their machine you should turn to one of these tools. +- 1. Introduction + - 1.1. labelbuddy vs other annotation tools + - 1.2. Quick start +- 2. Using labelbuddy + - 2.1. Importing documents + - 2.2. Importing labels + - 2.3. Annotating documents + - 2.4. Exporting annotations + - 2.5. Exporting labels + - 2.6. Importing annotations + - 2.7. Managing projects + - 2.8. Command-line interface +- 3. Conclusion - However if you do not plan to host such a tool on a server, it may not - be convenient for each annotator to install one of these rather - complex programs and run a local server and database management system - on their own machine in order to annotate documents. In this case, it - may be easier to rely on a desktop application such as labelbuddy, - which is a more lightweight solution. +This document describes labelbuddy version 0.0.3. - labelbuddy supports the input and output formats of doccano so it is - possible to switch from one to the other or to combine the work of - annotators that use either. +1. Introduction -1.2. Quick start +labelbuddy is an open-source desktop GUI application for annotating +documents. It can be used for example for Part Of Speech tagging, Named +Entity Recognition, sentiment analysis and document classification … - Start by installing labelbuddy. Then to give it a try, you can start - labelbuddy and open a (temporary) demo database by invoking it as: -labelbuddy --demo +It aims to be easy to install and use, and can efficiently handle many +documents, labels and annotations. - (You can also start labelbuddy without any options and then select - File › Demo in the GUI.) You can play around with labelbuddy’s - features in this temporary project. If you decide to start creating - annotations that you want to keep, open a new database and import your - documents and labels. - Note - a labelbuddy database is actually just a regular file on your disk (an - SQLite database). +1.1. labelbuddy vs other annotation tools -2. Using labelbuddy +There exist several tools for annotating documents. Most of them, such +as doccano and labelstudio are meant to run on a web server and be used +online. If you are crowdsourcing annotations and want many users to +contribute annotations to a central database without installing anything +on their machine you should turn to one of these tools. - Documents and labels can be imported into a labelbuddy database from - various formats. Once this is done, you can annotate the documents and - finally export your annotations. It is also possible to import - annotations exported from labelbuddy or doccano. +However if you do not plan to host such a tool on a server, it may not +be convenient for each annotator to install one of these rather complex +programs and run a local server and database management system on their +own machine in order to annotate documents. In this case, it may be +easier to rely on a desktop application such as labelbuddy, which is a +more lightweight solution. - Documents and labels that are already in the database are skipped if - you try to import them again. +labelbuddy supports the input and output formats of doccano so it is +possible to switch from one to the other or to combine the work of +annotators that use either. -2.1. Importing documents +1.2. Quick start - In the “Import / Export” tab, click Import docs & annotations and - select a file. +Start by installing labelbuddy. Then to give it a try, you can start +labelbuddy and open a (temporary) demo database by invoking it as: - When importing a new document into labelbuddy, several attributes can - be specified (exactly how will depend on the format used as described - in the following sections): - text + labelbuddy --demo - the content of the document – mandatory. +(You can also start labelbuddy without any options and then select +File › Demo in the GUI.) You can play around with labelbuddy’s features +in this temporary database. If you decide to start creating annotations +that you want to keep, open a new database and import your documents and +labels. - All other attributes are optional: - meta ++-----------------------------------+-----------------------------------+ +| Note | a labelbuddy database is actually | +| | just a regular file on your disk | +| | (an SQLite database). | ++-----------------------------------+-----------------------------------+ - a mapping of user-defined metadata. You can use it to associate some - information with the document, for example an identifier, DOI, - author… This data is not used by labelbuddy. It is stored and bundled - with the document when you export it. - short_title +2. Using labelbuddy - displayed in the “Annotate” tab when annotating the document. - long_title +Documents and labels can be imported into a labelbuddy database from +various formats. Once this is done, you can annotate the documents and +finally export your annotations. It is also possible to import +annotations exported from labelbuddy or doccano. Importing and exporting +data can be done from the graphical or the command line interface. - displayed in the document list in the “Dataset” tab - title - * displayed in the “Annotate” tab if short_title is missing (if both - are missing nothing is displayed). - * displayed in the “Dataset” tab if long_title is missing (if both - are missing the beginning of text is displayed). +Documents and labels that are already in the database are skipped if you +try to import them again. - Tip - You can use the short_title to display essential metadata or short - instructions specific to a document. It can contain links by using an - html tag. +2.1. Importing documents - This information can be provided in several plain text formats. The - format is deduced from the filename extension. +In the “Import / Export” tab, click Import docs & annotations and select +a file. + +When importing a new document into labelbuddy, several attributes can be +specified (exactly how will depend on the format used as described in +the following sections): + + ------ ------------------------------------------ + text the content of the document – mandatory. + ------ ------------------------------------------ + +All other attributes are optional: + ++-----------------------------------+-----------------------------------+ +| meta | a mapping of user-defined | +| | metadata. You can use it to | +| | associate some information with | +| | the document, for example an | +| | identifier, DOI, author… This | +| | data is not used by labelbuddy. | +| | It is stored and bundled with the | +| | document when you export it. | ++-----------------------------------+-----------------------------------+ +| short_title | displayed in the “Annotate” tab | +| | when annotating the document. | ++-----------------------------------+-----------------------------------+ +| long_title | displayed in the document list in | +| | the “Dataset” tab | ++-----------------------------------+-----------------------------------+ +| title | - displayed in the “Annotate” | +| | tab if short_title is missing | +| | (if both are missing nothing | +| | is displayed). | +| | | +| | - displayed in the “Dataset” | +| | tab if long_title is missing | +| | (if both are missing the | +| | beginning of text is | +| | displayed). | ++-----------------------------------+-----------------------------------+ + ++-----------------------------------+-----------------------------------+ +| Tip | You can use the short_title to | +| | display essential metadata or | +| | short instructions specific to a | +| | document. It can contain links by | +| | using an html tag. | ++-----------------------------------+-----------------------------------+ + +This information can be provided in several plain text formats. The +format is deduced from the filename extension. 2.1.1. From .txt - The simplest format you can use is a .txt. In this case, the file must - contain the text of one document per line. The newlines that separate - documents are not considered part of the document and are discarded. +The simplest format you can use is a .txt. In this case, the file must +contain the text of one document per line. The newlines that separate +documents are not considered part of the document and are discarded. - While convenient, this format has some limitations: you cannot specify - any other document attributes than the text, and the documents cannot - contain newlines. Moreover, the file’s encoding will be interpreted - based on your locale settings. The other import formats share none of - these limitations. +While convenient, this format has some limitations: you cannot specify +any other document attributes than the text, and the documents cannot +contain newlines. Moreover, the file’s encoding will be interpreted +based on your locale settings. The other import formats share none of +these limitations. 2.1.2. From .json - The file must be a JSON file containing one JSON array. Each element - of the array represents one document. These elements are JSON objects - containing at least the key text, and any of the optional attributes. - If provided, meta must be a JSON object containing user data about the - document. - - Therefore an imported JSON file might look like: -[ -{"text": "text of first doc", "meta": {"author": "me", "DOI": "123"}}, -{"text": "text of second doc", "short_title": "doc456"} -] - - Moreover, it is also possible to import annotations together with a - new document, or for a document already in the database. - Note - - Besides of the object format above, labelbuddy also accepts another - format, providing one JSON array per document. In this case the first - (mandatory) element of the array is the text, and the second - (optional) one is meta. Other attributes cannot be specified. -[ -["text of first doc", {"title": "doc 1", "DOI": "123"}], -["text of second doc"] -] +The file must be a JSON file containing one JSON array. Each element of +the array represents one document. These elements are JSON objects +containing at least the key text, and any of the optional attributes. If +provided, meta must be a JSON object containing user data about the +document. + +Therefore an imported JSON file might look like: + + [ + {"text": "text of first doc", "meta": {"author": "me", "DOI": "123"}}, + {"text": "text of second doc", "short_title": "doc456"} + ] + +Moreover, it is also possible to import annotations together with a new +document, or for a document already in the database. + ++-----------------------------------+-----------------------------------+ +| Note | Besides of the object format | +| | above, labelbuddy also accepts | +| | another format, providing one | +| | JSON array per document. In this | +| | case the first (mandatory) | +| | element of the array is the text, | +| | and the second (optional) one is | +| | meta. Other attributes cannot be | +| | specified. | +| | | +| | [ | +| | ["text of first doc", { | +| | "title": "doc 1", "DOI": "123"}], | +| | ["text of second doc"] | +| | ] | ++-----------------------------------+-----------------------------------+ 2.1.3. From .jsonl - When importing a .json file the whole file is read into memory before - inserting the documents in the database. To read documents one by one - and reduce memory usage, you can use JSON Lines. It is almost the same - as the JSON format, but instead of having one JSON array, the file - must contain one JSON document per line. For example: -{"text": "text of first doc", "meta": {"author": "me", "DOI": "123"}} -{"text": "text of second doc", "short_title": "doc456"} +When importing a .json file the whole file is read into memory before +inserting the documents in the database. To read documents one by one +and reduce memory usage, you can use JSON Lines. It is similar to the +JSON format, but instead of having one JSON array, the file must contain +one JSON document per line. For example: - (Note the outer brackets are removed and the documents are not - separated by commas.) + {"text": "text of first doc", "meta": {"author": "me", "DOI": "123"}} + {"text": "text of second doc", "short_title": "doc456"} - As for .json, .jsonl also allows importing annotations. +Note the outer brackets are removed and the documents are not separated +by commas. The object representing each document must occupy exactly one +line, unlike in .json where whitespace is not important. -2.1.4. From .xml +As for .json, .jsonl also allows importing annotations. - You can also use a simple XML format. In this case as well, the - documents are read one by one. The root element must be document_set - and contain any number of document elements. Each document contains - the text and any additional information. User metadata is provided in - the attributes of an element named meta. A document’s children can - appear in any order. - - For example: - - - - text of first doc - - - - text of second doc - doc456 - - - - The same format can be used to import annotations. +2.1.4. From .xml -2.2. Importing labels +You can also use a simple XML format. In this case as well, the +documents are read one by one. The root element must be document_set and +contain any number of document elements. Each document contains the text +and any additional information. User metadata is provided in the +attributes of an element named meta. A document’s children can appear in +any order. - To import labels, click Import labels in the “Import / Export” tab. - Labels have two attributes: a mandatory text (label name), and an - optional color. For compatibility with doccano, color can also be - specified as background_color. As for documents, the format is deduced - from the filename extension when importing labels. The label color can - be changed from within the GUI application . +For example: -2.2.1. From .txt + + + + text of first doc + + + + text of second doc + doc456 + + - The text file contains one label per line. For example: -Noun -Verb -Adjective +The same format can be used to import annotations. - You can specify a color for each label (or labels that contain - newlines) by using the .json format. +2.2. Importing labels -2.2.2. From .json +To import labels, click Import labels in the “Import / Export” tab. +Labels have three attributes: a mandatory text (label name), and an +optional color and shortcut_key. The shortcut_key is a lower-case ASCII +letter (a-z) that helps quickly annotating text with that label. - The file must contain a JSON array containing one JSON object per - label. +For compatibility with doccano, color can also be specified as +background_color and shortcut_key can be specified as suffix_key. - Each label’s object must have the key text and optionally color or the - synonym background_color. (If both color and background_color are - provided color is used.) +As for documents, the format is deduced from the filename extension when +importing labels. The label color and shortcut key can be changed from +within the GUI application . - For example: -[ -{"text": "Noun", "color": "#ff0000"}, -{"text": "Verb", "color": "yellow"}, -{"text": "Adjective"} -] +2.2.1. From .txt - Note +The text file contains one label per line. For example: - Besides of the object format above, labelbuddy also accepts another - format, providing one JSON array per label. In this case the first - (mandatory) element of the array is the text (label name), and the - second (optional) one is the color. -[ -["Noun", "#ff0000"], -["Verb", "yellow"], -["Adjective"] -] + Noun + Verb + Adjective -2.3. Annotating documents +To specify a color or shortcut key (or to use labels that contain +newlines), use the .json format. - Once you have imported labels and documents you can see them in the - “Dataset” tab. You can delete labels or documents and change the color - associated with each label. You then go to the “Annotate” tab. (If you - double-click a document it will be opened in the “Annotate” tab.) +2.2.2. From .json - To annotate a document, select the region you want to label with the - mouse and click on the appropriate label. +The file must contain a JSON array containing one JSON object per label. + +Each label’s object must have the key text and optionally color and +shortcut_key (or their synonyms background_color and suffix_key, which +have lower precedence). + +For example: + + [ + {"text": "Noun", "color": "#ff0000"}, + {"text": "Verb", "color": "yellow", "shortcut_key": "v"}, + {"text": "Adjective"} + ] + ++-----------------------------------+-----------------------------------+ +| Note | Besides of the object format | +| | above, labelbuddy also accepts | +| | another format, providing one | +| | JSON array per label. In this | +| | case the first (mandatory) | +| | element of the array is the text | +| | (label name), and the second | +| | (optional) one is the color. | +| | | +| | [ | +| | ["Noun", "#ff0000"], | +| | ["Verb", "yellow"], | +| | ["Adjective"] | +| | ] | ++-----------------------------------+-----------------------------------+ - Once you have created annotations, you can select any of them by - clicking it. It becomes bold and underlined and you can change its - label by clicking on a different one or remove the annotation by - clicking Remove. - Tip - If showing the selection in bold is annoying (depending on the font it - can slightly change its size) you can disable it in Preferences › Bold - selected region. +2.3. Annotating documents - If you create a new annotation that overlaps with a previously - existing one, the previously existing one is automatically removed. +Once you have imported labels and documents you can see them in the +“Dataset” tab. You can delete labels or documents and change the color +and shortcut associated with each label. You then go to the “Annotate” +tab. (If you double-click a document it will be opened in the “Annotate” +tab.) + +To annotate a document, select the region you want to label with the +mouse and click on the appropriate label. It is also possible to do the +same thing with the keyboard. Search for the term you want to annotate +and the first match will be selected. The selection can be adusted with +the keyboard using the bindings described below. Then press the shortcut +key associated with the label you want to set. + +Once you have created annotations, you can select any of them by +clicking it. It becomes bold and underlined and you can change its label +by clicking on a different one or remove the annotation by clicking +Remove. You can also do this with the keyboard: jump to the next +annotation with the Space key and change its label with a label shortcut +or remove it with Backspace. + +If you create a new annotation that overlaps with a previously existing +one, the previously existing one is automatically removed. + ++-----------------------------------+-----------------------------------+ +| Tip | If showing the selection in bold | +| | is annoying (depending on the | +| | font it can slightly change its | +| | size) you can disable it in | +| | Preferences › Bold selected | +| | region. | ++-----------------------------------+-----------------------------------+ 2.3.1. Summary of key bindings in the “Annotate” tab - * Ctrl and scroll the mouse: zoom or dezoom the text - * Ctrl+F: search - * Enter: next search match - * Shift+Enter: previous search match - * Ctrl+J, Ctrl+N, Down: scroll down one line - * Ctrl+K, Ctrl+P, Up: scroll up one line - * Ctrl+D: scroll down one page - * Ctrl+U: scroll up one page - * ]: move the end of the selection by one word to the right - * [: move the end of the selection by one word to the left - * }: move the beginning of the selection by one word to the right - * {: move the beginning of the selection by one word to the left - * Ctrl+]: move the end of the selection by one character to the - right - * Ctrl+[: move the end of the selection by one character to the left - * Ctrl+}: move the beginning of the selection by one character to - the right - * Ctrl+{: move the beginning of the selection by one character to - the left ++----------------------+-----------------------------------------------+ +| Searching and | | +| navigation | | ++======================+===============================================+ +| Ctrl and scroll the | zoom or dezoom the text | +| mouse | | ++----------------------+-----------------------------------------------+ +| Ctrl+F, / | search | ++----------------------+-----------------------------------------------+ +| Enter | next search match | ++----------------------+-----------------------------------------------+ +| Shift+Enter | previous search match | ++----------------------+-----------------------------------------------+ +| Ctrl+J, Ctrl+N, Down | scroll down one line | ++----------------------+-----------------------------------------------+ +| Ctrl+K, Ctrl+P, Up | scroll up one line | ++----------------------+-----------------------------------------------+ +| Ctrl+D | scroll down one page | ++----------------------+-----------------------------------------------+ +| Ctrl+U | scroll up one page | ++----------------------+-----------------------------------------------+ +| Ctrl+L | cycle between placing the cursor at the | +| | center, top and bottom of the window | ++----------------------+-----------------------------------------------+ + ++----------------------+-----------------------------------------------+ +| Manipulating | | +| annotations | | ++======================+===============================================+ +| a-z (label’s | set corresponding label for the currently | +| shortcut_key) | selected region or annotation | ++----------------------+-----------------------------------------------+ +| Backspace | remove selected annotation | ++----------------------+-----------------------------------------------+ +| Space | jump to next annotation and select it | ++----------------------+-----------------------------------------------+ +| Shift+Space | jump to previous annotation and select it | ++----------------------+-----------------------------------------------+ +| Esc | un-select selected annotation | ++----------------------+-----------------------------------------------+ + ++----------------------+-----------------------------------------------+ +| Manipulating the | | +| text selection | | ++======================+===============================================+ +| ] | move the end of the selection by one word to | +| | the right | ++----------------------+-----------------------------------------------+ +| [ | move the end of the selection by one word to | +| | the left | ++----------------------+-----------------------------------------------+ +| } | move the beginning of the selection by one | +| | word to the right | ++----------------------+-----------------------------------------------+ +| { | move the beginning of the selection by one | +| | word to the left | ++----------------------+-----------------------------------------------+ +| Ctrl+] | move the end of the selection by one | +| | character to the right | ++----------------------+-----------------------------------------------+ +| Ctrl+[ | move the end of the selection by one | +| | character to the left | ++----------------------+-----------------------------------------------+ +| Ctrl+} | move the beginning of the selection by one | +| | character to the right | ++----------------------+-----------------------------------------------+ +| Ctrl+{ | move the beginning of the selection by one | +| | character to the left | ++----------------------+-----------------------------------------------+ + ++----------------------+-----------------------------------------------+ +| Navigating documents | | ++======================+===============================================+ +| > | go to next document | ++----------------------+-----------------------------------------------+ +| < | go to previous document | ++----------------------+-----------------------------------------------+ 2.4. Exporting annotations - Once you are satisfied with your annotations you can export them to an - .json, .jsonl or .xml file to share them or use them in other - applications. - - Back in the “Import / Export” tab, click Export docs & annotations. - You can choose to export all documents or only those that have - annotations. You can choose to export the text of the documents or - not. If you don’t export the text, the documents can be identified - from metadata you may have associated with them, or by the MD5 - checksum of the text that is always exported. You can also provide an - “annotation approver” (user name), that will be exported as the - annotation_approver (used by doccano). - - When clicking Export docs & annotations you are asked to select a file - and the resulting format will depend on the filename extension. The - export format is the same as the import format. Exported documents and - annotations can thus be imported back into a labelbuddy database. - - However for exported documents: - * text is optional (you can choose not to export it to save space, - in this case documents can be identified from their MD5 checksum - or from the user metadata). - * document_md5_checksum (containing the hex representation of the - MD5 checksum of the text) and labels (containing the document’s - annotations) are added. The optional attribute annotation_approver - can also be added. - - labels is a list of annotations, each represented by a triplet of: - start_char - - the position of the first character (starting from 0 at the begining - of the text) - end_char - - the position of one past the last character - label - - the label name. - - For example if the text starts with “hello” and you highlighted - exactly that word, and labelled it with label_1, the associated - annotation will be [0, 5, "label_1"]. +Once you are satisfied with your annotations you can export them to an +.json, .jsonl or .xml file to share them or use them in other +applications. + +Back in the “Import / Export” tab, click Export docs & annotations. You +can choose to export all documents or only those that have annotations. +You can choose to export the text of the documents or not. If you don’t +export the text, the documents can be identified from metadata you may +have associated with them, or by the MD5 checksum of the text that is +always exported. You can also provide an “annotation approver” (user +name), that will be exported as the annotation_approver (used by +doccano). + +When clicking Export docs & annotations you are asked to select a file +and the resulting format will depend on the filename extension. The +export format is the same as the import format. Exported documents and +annotations can thus be imported back into a labelbuddy database. + +Compared to previous description of the import format, in exported +documents: + +- text is optional (you can choose not to export it to save space, in + this case documents can be identified from their MD5 checksum or + from the user metadata). + +- document_md5_checksum (containing the hex representation of the MD5 + checksum of the text) and labels (containing the document’s + annotations) are added. The optional attribute annotation_approver + can also be added. + +labels is a list of annotations, each represented by a triplet of: + + ------------ ----------------------------------------------------------------------------------- + start_char the position of the first character (starting from 0 at the begining of the text) + end_char the position of one past the last character + label the label name. + ------------ ----------------------------------------------------------------------------------- + +For example if the text starts with “hello” and you highlighted exactly +that word, and labelled it with label_1, the associated annotation will +be [0, 5, "label_1"]. + ++-----------------------------------+-----------------------------------+ +| Note | Documents are exported in the | +| | same order that they were | +| | imported. | ++-----------------------------------+-----------------------------------+ 2.4.1. Exporting to .json - JSON exported annotations might look like: -[ -{"annotation_approver":"jerome","document_md5_checksum":"f5a42de39848dbdadf79aa -de46135b7a","labels":[[0,4,"Noun"]],"meta":{"DOI":"123","author":"me"},"text":" -text of first doc"}, -{"annotation_approver":"jerome","document_md5_checksum":"d5c080bd4c6033f977182e -757a0059b1","labels":[[0,4,"Verb"],[8,14,"Adjective"]],"meta":{}, "text":"text -of second doc", "short_title": "doc456"} -] +JSON exported annotations might look like: + + [ + {"annotation_approver":"jerome","document_md5_checksum":"f5a42de39848dbdadf79aade46135b7a","labels":[[0,4,"Noun"]],"meta":{"DOI":"123","author":"me"},"text":"text of first doc"}, + {"annotation_approver":"jerome","document_md5_checksum":"d5c080bd4c6033f977182e757a0059b1","labels":[[0,4,"Verb"],[8,14,"Adjective"]],"meta":{}, "text":"text of second doc", "short_title": "doc456"} + ] - Each document will always be on one separate line. +Each document will always be on one separate line; this makes it easy to +parse the file incrementally. Moreover as the documents are always in +the same order, it gives line-oriented tools such as diff or git a +better chance of producing useful output. 2.4.2. Exporting to .jsonl - If you choose to export to a JSON lines file, the content will be - almost the same as the JSON one, but with just one JSON object per - line and not one JSON array containing all the documents: -{"annotation_approver":"jerome","document_md5_checksum":"f5a42de39848dbdadf79aa -de46135b7a","labels":[[0,4,"Noun"]],"meta":{"DOI":"123","author":"me"},"text":" -text of first doc"} -{"annotation_approver":"jerome","document_md5_checksum":"d5c080bd4c6033f977182e -757a0059b1","labels":[[0,4,"Verb"],[8,14,"Adjective"]],"meta":{}, "text":"text -of second doc", "short_title": "doc456"} +If you choose to export to a JSON lines file, the content will be almost +the same as the JSON one, but with just one JSON object per line and not +one JSON array containing all the documents: -2.4.3. Exporting to .xml + {"annotation_approver":"jerome","document_md5_checksum":"f5a42de39848dbdadf79aade46135b7a","labels":[[0,4,"Noun"]],"meta":{"DOI":"123","author":"me"},"text":"text of first doc"} + {"annotation_approver":"jerome","document_md5_checksum":"d5c080bd4c6033f977182e757a0059b1","labels":[[0,4,"Verb"],[8,14,"Adjective"]],"meta":{}, "text":"text of second doc", "short_title": "doc456"} - If you choose a .xml file the result is a UTF-8 encoded XML document. - Again the format is the same as for importing with some additional - elements (and possibly no text). +2.4.3. Exporting to .xml - So it may look like: - - - - text of first doc - f5a42de39848dbdadf79aade46135b7a - - jerome - - - 0 - 4 - - - - - - text of second doc - d5c080bd4c6033f977182e757a0059b1 - - jerome - doc456 - - - 0 - 4 - - - - 8 - 14 - - - - - +If you choose a .xml file the result is a UTF-8 encoded XML document. +Again the format is the same as for importing with some additional +elements (and possibly no text). + +So it may look like: + + + + + text of first doc + f5a42de39848dbdadf79aade46135b7a + + jerome + + + 0 + 4 + + + + + + text of second doc + d5c080bd4c6033f977182e757a0059b1 + + jerome + doc456 + + + 0 + 4 + + + + 8 + 14 + + + + + 2.5. Exporting labels - You can also export labels by clicking “Export labels” and selecting a - JSON file. +You can also export labels by clicking “Export labels” and selecting a +JSON file. The resulting file will contain an array of json objects; the +same format as the input format. Unlike documents each label is not on a +single line. Both keys color and background_color are set to the label’s +colors. Both keys shortcut_key and suffix_key are set to the label’s +shortcut key if it has one. 2.6. Importing annotations - Exported annotations can be imported back into the same or another - labelbuddy database. Simply use the Import docs & annotations button - and select the exported file. Labels used in the annotations that are - not in the database will be added (with an arbitrary color that can be - changed in the application). +Exported annotations can be imported back into the same or another +labelbuddy database. Simply use the Import docs & annotations button and +select the exported file. Labels used in the annotations that are not in +the database will be added (with an arbitrary color that can be changed +in the application). - For documents already in the database, annotations will be imported - whether the document’s text was exported together with the annotations - or not. If the text is not present in the exported file, the MD5 - checksum will be used to associate the annotations with the correct - document. +For documents already in the database, annotations will be imported +whether the document’s text was exported together with the annotations +or not. If the text is not present in the exported file, the MD5 +checksum will be used to associate the annotations with the correct +document. - To avoid mixing annotations from different sources, if the document - already contains annotations in the database, the new annotations will - not be added. +To avoid mixing annotations from different sources, if the document +already contains annotations in the database, the new annotations will +not be added. - For documents that are not in the database, their text must have been - exported together with the annotations and in this case both the - document and the annotations will be added to the database. +For documents that are not in the database, their text must have been +exported together with the annotations and in this case both the +document and the annotations will be added to the database. 2.6.1. Copying annotations to and from doccano - Documents and annotations exported from doccano can also be imported - into a labelbuddy database. To do so, when exporting from doccano - select the format “jsonl (text label)”. Make sure to save them in a - file with the .jsonl extension (not .json) otherwise labelbuddy will - try to parse it as JSON and JSON Lines is not valid JSON. - Caution - doccano strips leading and trailing whitespace from documents when - importing them. Therefore if you import the result into a labelbuddy - database that already contains the original documents, it may not be - recognized as being the same (labelbuddy doesn’t modify the imported - documents) and you might end up with (near) duplicate documents in the - database. - - Annotations exported from labelbuddy in the .jsonl format together - with the document’s text can also be imported into doccano (selecting - the “jsonl” import format). - Caution - if the original document contained leading whitespace, labelbuddy - annotations will appear shifted when doccano removes the whitespace. - Moreover, doccano allows duplicate documents so if the documents were - already in the doccano database, they will appear as new (duplicate) - documents rather than new annotations for existing documents. +Documents and annotations exported from doccano can also be imported +into a labelbuddy database. To do so, when exporting from doccano select +the format “jsonl (text label)”. Make sure to save them in a file with +the .jsonl extension (not .json) otherwise labelbuddy will try to parse +it as JSON and JSON Lines is not valid JSON. + ++-----------------------------------+-----------------------------------+ +| Caution | doccano strips leading and | +| | trailing whitespace from | +| | documents when importing them. | +| | Therefore if you import the | +| | result into a labelbuddy database | +| | that already contains the | +| | original documents, it may not be | +| | recognized as being the same | +| | (labelbuddy doesn’t modify the | +| | imported documents) and you might | +| | end up with (near) duplicate | +| | documents in the database. | ++-----------------------------------+-----------------------------------+ + +Annotations exported from labelbuddy in the .jsonl format together with +the document’s text can also be imported into doccano (selecting the +“jsonl” import format). + ++-----------------------------------+-----------------------------------+ +| Caution | if the original document | +| | contained leading whitespace, | +| | labelbuddy annotations will | +| | appear shifted when doccano | +| | removes the whitespace. Moreover, | +| | doccano allows duplicate | +| | documents so if the documents | +| | were already in the doccano | +| | database, they will appear as new | +| | (duplicate) documents rather than | +| | new annotations for existing | +| | documents. | ++-----------------------------------+-----------------------------------+ 2.7. Managing projects - Each labelbuddy project (a set of documents, labels and annotations) - is an SQLite database. That is a single binary file on your disk that - you can copy, backup, or share, like any other file. - Tip - Advanced users can also open a connection directly to the database to - query it or even modify it. Back it up before and set PRAGMA - foreign_keys = ON - - When you first start labelbuddy it creates a new database in - ~/labelbuddy_data.sqlite3. You can switch to a different one by - selecting File › Open or File › New. The path to the current database - is displayed in the “Import / Export” tab. - - The next time you start labelbuddy, it will open the last database - that you opened. - - The database to open can also be specified when invoking labelbuddy - from the command line: -labelbuddy /path/to/my_project.sqlite3 - - If you just want to give labelbuddy a try and don’t have documents or - labels yet, you can also select File › Demo to open a temporary - database pre-loaded with a few examples. - - As it is easy to create and delete databases (an empty labelbuddy - database is just 48K), and to copy documents, labels and annotations - from one to another, you have some freedom in the organization of - annotation work. In particular, it is possible to work with several - databases rather than a monolithic one – as an example we could create - a new database to annotate a fresh batch of documents, then export and - merge into a main database once that batch is finished. +Each labelbuddy database (containing documents, labels and annotations) +is an SQLite database. That is a single binary file on your disk that +you can copy, backup, or share, like any other file. + ++-----------------------------------+-----------------------------------+ +| Tip | Using SQLite you can also open a | +| | connection directly to the | +| | database to query it or even | +| | modify it. If you do so, set | +| | PRAGMA foreign_keys = ON. | ++-----------------------------------+-----------------------------------+ + +When you first start labelbuddy it creates a new database in +~/labelbuddy_data.sqlite3. You can switch to a different one by +selecting File › Open or File › New. The path to the current database is +displayed in the “Import / Export” tab. + +The next time you start labelbuddy, it will open the last database that +you opened. + +The database to open can also be specified when invoking labelbuddy from +the command line: + + labelbuddy /path/to/my_annotations.sqlite3 + +If you just want to give labelbuddy a try and don’t have documents or +labels yet, you can also select File › Demo to open a temporary database +pre-loaded with a few examples. + +As it is easy to create and delete databases (an empty labelbuddy +database is just 48K), and to copy documents, labels and annotations +from one to another, you have some freedom in the organization of +annotation work. In particular, it is possible to work with several +databases rather than a monolithic one. You can break down the +annotations into several files to reflect the structure of your project. +As an example you could also create a new database to annotate a fresh +batch of documents, then export and merge into a main database once that +batch is finished. 2.8. Command-line interface - labelbuddy can also be used from the command line to create databases, - import and export documents, labels and annotations without opening - the GUI. See the labelbuddy(1) man page, or labelbuddy -h for a short - list of options reproduced here: -Usage: ./labelbuddy [options] database -Annotate documents. - -Options: - -h, --help Displays this help. - -v, --version Displays version information. - --demo Open a temporary demo database - with pre-loaded docs - --import-labels Labels file to import in database. - --import-docs Docs & annotations file to import - in database. - --export-labels Labels file to export to. - --export-annotations Docs & annotations file to export - to. - --labelled-only Export only labelled documents - --include-text Include doc text with exported - annotations - --approver User or 'annotations approver' - name - --vacuum Repack database into minimal - amount of disk space. - -Arguments: - database Database to open. - - If any of the import- or export- options are used, labelbuddy doesn’t - start a GUI but performs the required import or export operations and - exits. It is possible to specify these options several times. To use - these options, the database path must be provided explicitly. - - Regarding vacuum: when data is deleted from an sqlite3 database, the - file doesn’t shrink. The freed up space is not lost; it is kept and - reused when new data is added to the database. To shrink the database - to occupy a minimal amount of disk space, we can use: -labelbuddy --vacuum /path/to/db.sqlite3 - - or equivalently: -sqlite3 /path/to/db.sqlite3 'VACUUM;' - - See more details here. When the vacuum option is used, other options - are ignored and labelbuddy shrinks the database then exits without - starting the GUI. +labelbuddy can also be used from the command line to create databases, +import and export documents, labels and annotations without opening the +GUI. See the labelbuddy(1) man page, or labelbuddy -h for a short list +of options reproduced here: + + Usage: ./labelbuddy [options] database + Annotate documents. + + Options: + -h, --help Displays this help. + -v, --version Displays version information. + --demo Open a temporary demo database + with pre-loaded docs + --import-labels Labels file to import in database. + --import-docs Docs & annotations file to import + in database. + --export-labels Labels file to export to. + --export-docs Docs & annotations file to export + to. + --labelled-only Export only labelled documents + --include-text Include doc text with exported + annotations + --approver User or 'annotations approver' + name + --vacuum Repack database into minimal + amount of disk space. + + Arguments: + database Database to open. + +If any of the import- or export- options are used, labelbuddy doesn’t +start a GUI but performs the required import or export operations and +exits. It is possible to specify these options several times. To use +these options, the database path must be provided explicitly. Labels are +imported first, then documents, then export operations are performed. + +As an example, to convert a previously exported file docs.xml to JSON +and strip the documents' text, you could run: + + tmpdb=$(mktemp) && labelbuddy $tmpdb --import-docs docs.xml --export-docs docs.json; rm $tmpdb + +Regarding vacuum: when data is deleted from an sqlite3 database, the +file doesn’t shrink. The freed up space is not lost; it is kept and +reused when new data is added to the database. To shrink the database to +occupy a minimal amount of disk space, we can use: + + labelbuddy --vacuum /path/to/db.sqlite3 + +or equivalently: + + sqlite3 /path/to/db.sqlite3 'VACUUM;' + +See more details here. When the vacuum option is used, other options are +ignored and labelbuddy shrinks the database then exits without starting +the GUI. 3. Conclusion - labelbuddy was created using C++, Qt, SQLite, tools from the GNU - project, and more. +labelbuddy was created using C++, Qt, SQLite, tools from the GNU +project, and more. - If you find a bug, kindly open an issue on the labelbuddy GitHub - repository. +If you find a bug, kindly open an issue on the labelbuddy GitHub +repository. diff --git a/docs/example_data/example_documents.json b/docs/example_data/example_documents.json index c4e0a8b..2944f2c 100644 --- a/docs/example_data/example_documents.json +++ b/docs/example_data/example_documents.json @@ -1 +1 @@ -[{"text": "THIS IS A TEMPORARY DATABASE.\n\nIt will disappear when you close the application. This database contains a few documents and labels to allow trying out labelbuddy. You can create annotations, import more documents and labels and export annotations, but the database itself will be removed when labelbuddy exits.\n\nIf you decide to start a real project you want to keep, from the File menu select \"New\" and choose a location on your filesystem to create a persistent database. You can also close labelbuddy and open it again and by default it will open a database in your home directory: ~/labelbuddy_data.sqlite3.\n\nTime to start annotating documents! Select some text with the mouse and click on one of the labels on the left to annotate it.\n\nOnce you have made annotations you can select any of them by clicking it. It becomes underlined and bold and you can then change its label or remove it.\n\nIf you create a new annotation that overlaps with a previously existing one, the existing one is automatically removed.\n\nAnnotations are inserted in the database as you create them so there is no need for a \"Save\" button.\n\nHere is a summary of the keybindings available in this page:\n\nCtrl and scroll the mouse : (un-)zoom the text\nCtrl+F : search\nEnter : next search match\nShift+Enter : previous search match\nCtrl+J, Ctrl+N, Down : scroll down one line\nCtrl+K, Ctrl+P, Up : scroll up one line\nCtrl+D : scroll down one page\nCtrl+U : scroll up one page\n] : move the *end* of the selection by one *word* to the *right*\n[ : move the *end* of the selection by one *word* to the *left*\n} : move the *beginning* of the selection by one *word* to the *right*\n{ : move the *beginning* of the selection by one *word* to the *left*\nCtrl+] : move the *end* of the selection by one *character* to the *right*\nCtrl+[ : move the *end* of the selection by one *character* to the *left*\nCtrl+} : move the *beginning* of the selection by one *character* to the *right*\nCtrl+{ : move the *beginning* of the selection by one *character* to the *left*\n\nIf you prefer to read these documents with a fixed width font, select in the menu bar \"Preferences\" > \"monospace font\". You can prevent the text becoming bold by deselecting \"Preferences\" > \"Bold selected region\".\n\nOnce you are done annotating this document, you can click \"Next\" above to move on to another one.\n\nThe next document is the labelbuddy documentation, and then a few extracts from Wikipedia (https://en.wikipedia.org/wiki/Main_Page) to illustrate using labelbuddy with different languages and writing systems. The documentation in HTML format can be seen by clicking \"Help\" > \"Documentation\".\n\nTo see the list of documents, remove documents or labels, or change the labels' colors, go to the \"Dataset\" tab.\n\nTo export your annotations or import more documents or labels, go to the \"Import / Export\" tab.\n", "meta": {"title": "hello_annotations.txt", "md5": "9175772992c5fd6f67e0b004e13d723f"}, "short_title": "Welcome to labelbuddy demo!", "long_title": "Welcome to labelbuddy demo!"}, {"text": "\n J\u00e9r\u00f4me Dock\u00e8s\n jerome@dockes.org\n Table of Contents\n * 1. Introduction\n + 1.1. labelbuddy vs other annotation tools\n + 1.2. Quick start\n * 2. Using labelbuddy\n + 2.1. Importing documents\n + 2.2. Importing labels\n + 2.3. Annotating documents\n + 2.4. Exporting annotations\n + 2.5. Exporting labels\n + 2.6. Importing annotations\n + 2.7. Managing projects\n + 2.8. Command-line interface\n * 3. Conclusion\n\n This document describes labelbuddy version 0.0.1.\n\n1. Introduction\n\n labelbuddy is an open-source desktop GUI application for annotating\n documents. It can be used for example for Part Of Speech tagging,\n Named Entity Recognition, sentiment analysis and document\n classification \u2026\n\n It aims to be easy to install and use, and can efficiently handle many\n documents, labels and annotations.\n\n1.1. labelbuddy vs other annotation tools\n\n There exist several tools for annotating documents. Most of them, such\n as doccano and labelstudio are meant to run on a web server and be\n used online. If you are crowdsourcing annotations and want many users\n to contribute annotations to a central database without installing\n anything on their machine you should turn to one of these tools.\n\n However if you do not plan to host such a tool on a server, it may not\n be convenient for each annotator to install one of these rather\n complex programs and run a local server and database management system\n on their own machine in order to annotate documents. In this case, it\n may be easier to rely on a desktop application such as labelbuddy,\n which is a more lightweight solution.\n\n labelbuddy supports the input and output formats of doccano so it is\n possible to switch from one to the other or to combine the work of\n annotators that use either.\n\n1.2. Quick start\n\n Start by installing labelbuddy. Then to give it a try, you can start\n labelbuddy and open a (temporary) demo database by invoking it as:\nlabelbuddy --demo\n\n (You can also start labelbuddy without any options and then select\n File \u203a Demo in the GUI.) You can play around with labelbuddy\u2019s\n features in this temporary project. If you decide to start creating\n annotations that you want to keep, open a new database and import your\n documents and labels.\n Note\n a labelbuddy database is actually just a regular file on your disk (an\n SQLite database).\n\n2. Using labelbuddy\n\n Documents and labels can be imported into a labelbuddy database from\n various formats. Once this is done, you can annotate the documents and\n finally export your annotations. It is also possible to import\n annotations exported from labelbuddy or doccano.\n\n Documents and labels that are already in the database are skipped if\n you try to import them again.\n\n2.1. Importing documents\n\n In the \u201cImport / Export\u201d tab, click Import docs & annotations and\n select a file.\n\n When importing a new document into labelbuddy, several attributes can\n be specified (exactly how will depend on the format used as described\n in the following sections):\n text\n\n the content of the document \u2013 mandatory.\n\n All other attributes are optional:\n meta\n\n a mapping of user-defined metadata. You can use it to associate some\n information with the document, for example an identifier, DOI,\n author\u2026 This data is not used by labelbuddy. It is stored and bundled\n with the document when you export it.\n short_title\n\n displayed in the \u201cAnnotate\u201d tab when annotating the document.\n long_title\n\n displayed in the document list in the \u201cDataset\u201d tab\n title\n * displayed in the \u201cAnnotate\u201d tab if short_title is missing (if both\n are missing nothing is displayed).\n * displayed in the \u201cDataset\u201d tab if long_title is missing (if both\n are missing the beginning of text is displayed).\n\n Tip\n You can use the short_title to display essential metadata or short\n instructions specific to a document. It can contain links by using an\n html tag.\n\n This information can be provided in several plain text formats. The\n format is deduced from the filename extension.\n\n2.1.1. From .txt\n\n The simplest format you can use is a .txt. In this case, the file must\n contain the text of one document per line. The newlines that separate\n documents are not considered part of the document and are discarded.\n\n While convenient, this format has some limitations: you cannot specify\n any other document attributes than the text, and the documents cannot\n contain newlines. Moreover, the file\u2019s encoding will be interpreted\n based on your locale settings. The other import formats share none of\n these limitations.\n\n2.1.2. From .json\n\n The file must be a JSON file containing one JSON array. Each element\n of the array represents one document. These elements are JSON objects\n containing at least the key text, and any of the optional attributes.\n If provided, meta must be a JSON object containing user data about the\n document.\n\n Therefore an imported JSON file might look like:\n[\n{\"text\": \"text of first doc\", \"meta\": {\"author\": \"me\", \"DOI\": \"123\"}},\n{\"text\": \"text of second doc\", \"short_title\": \"doc456\"}\n]\n\n Moreover, it is also possible to import annotations together with a\n new document, or for a document already in the database.\n Note\n\n Besides of the object format above, labelbuddy also accepts another\n format, providing one JSON array per document. In this case the first\n (mandatory) element of the array is the text, and the second\n (optional) one is meta. Other attributes cannot be specified.\n[\n[\"text of first doc\", {\"title\": \"doc 1\", \"DOI\": \"123\"}],\n[\"text of second doc\"]\n]\n\n2.1.3. From .jsonl\n\n When importing a .json file the whole file is read into memory before\n inserting the documents in the database. To read documents one by one\n and reduce memory usage, you can use JSON Lines. It is almost the same\n as the JSON format, but instead of having one JSON array, the file\n must contain one JSON document per line. For example:\n{\"text\": \"text of first doc\", \"meta\": {\"author\": \"me\", \"DOI\": \"123\"}}\n{\"text\": \"text of second doc\", \"short_title\": \"doc456\"}\n\n (Note the outer brackets are removed and the documents are not\n separated by commas.)\n\n As for .json, .jsonl also allows importing annotations.\n\n2.1.4. From .xml\n\n You can also use a simple XML format. In this case as well, the\n documents are read one by one. The root element must be document_set\n and contain any number of document elements. Each document contains\n the text and any additional information. User metadata is provided in\n the attributes of an element named meta. A document\u2019s children can\n appear in any order.\n\n For example:\n\n\n \n text of first doc\n \n \n \n text of second doc\n doc456\n \n\n\n The same format can be used to import annotations.\n\n2.2. Importing labels\n\n To import labels, click Import labels in the \u201cImport / Export\u201d tab.\n Labels have two attributes: a mandatory text (label name), and an\n optional color. For compatibility with doccano, color can also be\n specified as background_color. As for documents, the format is deduced\n from the filename extension when importing labels. The label color can\n be changed from within the GUI application .\n\n2.2.1. From .txt\n\n The text file contains one label per line. For example:\nNoun\nVerb\nAdjective\n\n You can specify a color for each label (or labels that contain\n newlines) by using the .json format.\n\n2.2.2. From .json\n\n The file must contain a JSON array containing one JSON object per\n label.\n\n Each label\u2019s object must have the key text and optionally color or the\n synonym background_color. (If both color and background_color are\n provided color is used.)\n\n For example:\n[\n{\"text\": \"Noun\", \"color\": \"#ff0000\"},\n{\"text\": \"Verb\", \"color\": \"yellow\"},\n{\"text\": \"Adjective\"}\n]\n\n Note\n\n Besides of the object format above, labelbuddy also accepts another\n format, providing one JSON array per label. In this case the first\n (mandatory) element of the array is the text (label name), and the\n second (optional) one is the color.\n[\n[\"Noun\", \"#ff0000\"],\n[\"Verb\", \"yellow\"],\n[\"Adjective\"]\n]\n\n2.3. Annotating documents\n\n Once you have imported labels and documents you can see them in the\n \u201cDataset\u201d tab. You can delete labels or documents and change the color\n associated with each label. You then go to the \u201cAnnotate\u201d tab. (If you\n double-click a document it will be opened in the \u201cAnnotate\u201d tab.)\n\n To annotate a document, select the region you want to label with the\n mouse and click on the appropriate label.\n\n Once you have created annotations, you can select any of them by\n clicking it. It becomes bold and underlined and you can change its\n label by clicking on a different one or remove the annotation by\n clicking Remove.\n Tip\n If showing the selection in bold is annoying (depending on the font it\n can slightly change its size) you can disable it in Preferences \u203a Bold\n selected region.\n\n If you create a new annotation that overlaps with a previously\n existing one, the previously existing one is automatically removed.\n\n2.3.1. Summary of key bindings in the \u201cAnnotate\u201d tab\n\n * Ctrl and scroll the mouse: zoom or dezoom the text\n * Ctrl+F: search\n * Enter: next search match\n * Shift+Enter: previous search match\n * Ctrl+J, Ctrl+N, Down: scroll down one line\n * Ctrl+K, Ctrl+P, Up: scroll up one line\n * Ctrl+D: scroll down one page\n * Ctrl+U: scroll up one page\n * ]: move the end of the selection by one word to the right\n * [: move the end of the selection by one word to the left\n * }: move the beginning of the selection by one word to the right\n * {: move the beginning of the selection by one word to the left\n * Ctrl+]: move the end of the selection by one character to the\n right\n * Ctrl+[: move the end of the selection by one character to the left\n * Ctrl+}: move the beginning of the selection by one character to\n the right\n * Ctrl+{: move the beginning of the selection by one character to\n the left\n\n2.4. Exporting annotations\n\n Once you are satisfied with your annotations you can export them to an\n .json, .jsonl or .xml file to share them or use them in other\n applications.\n\n Back in the \u201cImport / Export\u201d tab, click Export docs & annotations.\n You can choose to export all documents or only those that have\n annotations. You can choose to export the text of the documents or\n not. If you don\u2019t export the text, the documents can be identified\n from metadata you may have associated with them, or by the MD5\n checksum of the text that is always exported. You can also provide an\n \u201cannotation approver\u201d (user name), that will be exported as the\n annotation_approver (used by doccano).\n\n When clicking Export docs & annotations you are asked to select a file\n and the resulting format will depend on the filename extension. The\n export format is the same as the import format. Exported documents and\n annotations can thus be imported back into a labelbuddy database.\n\n However for exported documents:\n * text is optional (you can choose not to export it to save space,\n in this case documents can be identified from their MD5 checksum\n or from the user metadata).\n * document_md5_checksum (containing the hex representation of the\n MD5 checksum of the text) and labels (containing the document\u2019s\n annotations) are added. The optional attribute annotation_approver\n can also be added.\n\n labels is a list of annotations, each represented by a triplet of:\n start_char\n\n the position of the first character (starting from 0 at the begining\n of the text)\n end_char\n\n the position of one past the last character\n label\n\n the label name.\n\n For example if the text starts with \u201chello\u201d and you highlighted\n exactly that word, and labelled it with label_1, the associated\n annotation will be [0, 5, \"label_1\"].\n\n2.4.1. Exporting to .json\n\n JSON exported annotations might look like:\n[\n{\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"f5a42de39848dbdadf79aa\nde46135b7a\",\"labels\":[[0,4,\"Noun\"]],\"meta\":{\"DOI\":\"123\",\"author\":\"me\"},\"text\":\"\ntext of first doc\"},\n{\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"d5c080bd4c6033f977182e\n757a0059b1\",\"labels\":[[0,4,\"Verb\"],[8,14,\"Adjective\"]],\"meta\":{}, \"text\":\"text\nof second doc\", \"short_title\": \"doc456\"}\n]\n\n Each document will always be on one separate line.\n\n2.4.2. Exporting to .jsonl\n\n If you choose to export to a JSON lines file, the content will be\n almost the same as the JSON one, but with just one JSON object per\n line and not one JSON array containing all the documents:\n{\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"f5a42de39848dbdadf79aa\nde46135b7a\",\"labels\":[[0,4,\"Noun\"]],\"meta\":{\"DOI\":\"123\",\"author\":\"me\"},\"text\":\"\ntext of first doc\"}\n{\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"d5c080bd4c6033f977182e\n757a0059b1\",\"labels\":[[0,4,\"Verb\"],[8,14,\"Adjective\"]],\"meta\":{}, \"text\":\"text\nof second doc\", \"short_title\": \"doc456\"}\n\n2.4.3. Exporting to .xml\n\n If you choose a .xml file the result is a UTF-8 encoded XML document.\n Again the format is the same as for importing with some additional\n elements (and possibly no text).\n\n So it may look like:\n\n\n \n text of first doc\n f5a42de39848dbdadf79aade46135b7a\n \n jerome\n \n \n 0\n 4\n \n \n \n \n \n text of second doc\n d5c080bd4c6033f977182e757a0059b1\n \n jerome\n doc456\n \n \n 0\n 4\n \n \n \n 8\n 14\n \n \n \n \n\n\n2.5. Exporting labels\n\n You can also export labels by clicking \u201cExport labels\u201d and selecting a\n JSON file.\n\n2.6. Importing annotations\n\n Exported annotations can be imported back into the same or another\n labelbuddy database. Simply use the Import docs & annotations button\n and select the exported file. Labels used in the annotations that are\n not in the database will be added (with an arbitrary color that can be\n changed in the application).\n\n For documents already in the database, annotations will be imported\n whether the document\u2019s text was exported together with the annotations\n or not. If the text is not present in the exported file, the MD5\n checksum will be used to associate the annotations with the correct\n document.\n\n To avoid mixing annotations from different sources, if the document\n already contains annotations in the database, the new annotations will\n not be added.\n\n For documents that are not in the database, their text must have been\n exported together with the annotations and in this case both the\n document and the annotations will be added to the database.\n\n2.6.1. Copying annotations to and from doccano\n\n Documents and annotations exported from doccano can also be imported\n into a labelbuddy database. To do so, when exporting from doccano\n select the format \u201cjsonl (text label)\u201d. Make sure to save them in a\n file with the .jsonl extension (not .json) otherwise labelbuddy will\n try to parse it as JSON and JSON Lines is not valid JSON.\n Caution\n doccano strips leading and trailing whitespace from documents when\n importing them. Therefore if you import the result into a labelbuddy\n database that already contains the original documents, it may not be\n recognized as being the same (labelbuddy doesn\u2019t modify the imported\n documents) and you might end up with (near) duplicate documents in the\n database.\n\n Annotations exported from labelbuddy in the .jsonl format together\n with the document\u2019s text can also be imported into doccano (selecting\n the \u201cjsonl\u201d import format).\n Caution\n if the original document contained leading whitespace, labelbuddy\n annotations will appear shifted when doccano removes the whitespace.\n Moreover, doccano allows duplicate documents so if the documents were\n already in the doccano database, they will appear as new (duplicate)\n documents rather than new annotations for existing documents.\n\n2.7. Managing projects\n\n Each labelbuddy project (a set of documents, labels and annotations)\n is an SQLite database. That is a single binary file on your disk that\n you can copy, backup, or share, like any other file.\n Tip\n Advanced users can also open a connection directly to the database to\n query it or even modify it. Back it up before and set PRAGMA\n foreign_keys = ON\n\n When you first start labelbuddy it creates a new database in\n ~/labelbuddy_data.sqlite3. You can switch to a different one by\n selecting File \u203a Open or File \u203a New. The path to the current database\n is displayed in the \u201cImport / Export\u201d tab.\n\n The next time you start labelbuddy, it will open the last database\n that you opened.\n\n The database to open can also be specified when invoking labelbuddy\n from the command line:\nlabelbuddy /path/to/my_project.sqlite3\n\n If you just want to give labelbuddy a try and don\u2019t have documents or\n labels yet, you can also select File \u203a Demo to open a temporary\n database pre-loaded with a few examples.\n\n As it is easy to create and delete databases (an empty labelbuddy\n database is just 48K), and to copy documents, labels and annotations\n from one to another, you have some freedom in the organization of\n annotation work. In particular, it is possible to work with several\n databases rather than a monolithic one \u2013 as an example we could create\n a new database to annotate a fresh batch of documents, then export and\n merge into a main database once that batch is finished.\n\n2.8. Command-line interface\n\n labelbuddy can also be used from the command line to create databases,\n import and export documents, labels and annotations without opening\n the GUI. See the labelbuddy(1) man page, or labelbuddy -h for a short\n list of options reproduced here:\nUsage: ./labelbuddy [options] database\nAnnotate documents.\n\nOptions:\n -h, --help Displays this help.\n -v, --version Displays version information.\n --demo Open a temporary demo database\n with pre-loaded docs\n --import-labels Labels file to import in database.\n --import-docs Docs & annotations file to import\n in database.\n --export-labels Labels file to export to.\n --export-annotations Docs & annotations file to export\n to.\n --labelled-only Export only labelled documents\n --include-text Include doc text with exported\n annotations\n --approver User or 'annotations approver'\n name\n --vacuum Repack database into minimal\n amount of disk space.\n\nArguments:\n database Database to open.\n\n If any of the import- or export- options are used, labelbuddy doesn\u2019t\n start a GUI but performs the required import or export operations and\n exits. It is possible to specify these options several times. To use\n these options, the database path must be provided explicitly.\n\n Regarding vacuum: when data is deleted from an sqlite3 database, the\n file doesn\u2019t shrink. The freed up space is not lost; it is kept and\n reused when new data is added to the database. To shrink the database\n to occupy a minimal amount of disk space, we can use:\nlabelbuddy --vacuum /path/to/db.sqlite3\n\n or equivalently:\nsqlite3 /path/to/db.sqlite3 'VACUUM;'\n\n See more details here. When the vacuum option is used, other options\n are ignored and labelbuddy shrinks the database then exits without\n starting the GUI.\n\n3. Conclusion\n\n labelbuddy was created using C++, Qt, SQLite, tools from the GNU\n project, and more.\n\n If you find a bug, kindly open an issue on the labelbuddy GitHub\n repository.\n\n", "meta": {"title": "documentation.txt", "md5": "1d9cf27270643211aa29509b877e2234"}, "short_title": "labelbuddy documentation \u2014 online version", "long_title": "labelbuddy documentation"}, {"text": "\nExtract from Wikipedia:\nhttps://ar.wikipedia.org/wiki/%D9%84%D8%BA%D8%A9\n\n\u0627\u0644\u0644\u063a\u0629 \u0647\u064a \u0646\u0633\u0642 \u0639\u0644\u0649 \u0645\u0646 \u0627\u0644\u0625\u0634\u0627\u0631\u0627\u062a \u0648\u0627\u0644\u0631\u0645\u0648\u0632\u060c \u062a\u0634\u0643\u0644 \u0623\u062f\u0627\u0629 \u0645\u0646 \u0623\u062f\u0648\u0627\u062a \u0627\u0644\u0645\u0639\u0631\u0641\u0629\u060c \u0648\u062a\u0639\u062a\u0628\u0631 \u0627\u0644\u0644\u063a\u0629 \u0623\u0647\u0645 \u0648\u0633\u0627\u0626\u0644 \u0627\u0644\u062a\u0641\u0627\u0647\u0645 \u0648\u0627\u0644\u0627\u062d\u062a\u0643\u0627\u0643 \u0628\u064a\u0646 \u0623\u0641\u0631\u0627\u062f \u0627\u0644\u0645\u062c\u062a\u0645\u0639 \u0641\u064a \u062c\u0645\u064a\u0639 \u0645\u064a\u0627\u062f\u064a\u0646 \u0627\u0644\u062d\u064a\u0627\u0629.\n\n\u0648\u0628\u062f\u0648\u0646 \u0627\u0644\u0644\u063a\u0629 \u064a\u062a\u0639\u0630\u0631 \u0646\u0634\u0627\u0637 \u0627\u0644\u0646\u0627\u0633 \u0627\u0644\u0645\u0639\u0631\u0641\u064a. \u062a\u0631\u062a\u0628\u0637 \u0627\u0644\u0644\u063a\u0629 \u0628\u0627\u0644\u062a\u0641\u0643\u064a\u0631 \u0627\u0631\u062a\u0628\u0627\u0637\u064b\u0627 \u0648\u062b\u064a\u0642\u064b\u0627\u061b \u0641\u0623\u0641\u0643\u0627\u0631 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u062a\u0635\u0627\u063a \u062f\u0648\u0645\u064b\u0627 \u0641\u064a \u0642\u0627\u0644\u0628 \u0644\u063a\u0648\u064a\u060c \u062d\u062a\u0649 \u0641\u064a \u062d\u0627\u0644 \u062a\u0641\u0643\u064a\u0631\u0647 \u0627\u0644\u0628\u0627\u0637\u0646\u064a. \u0648\u0645\u0646 \u062e\u0644\u0627\u0644 \u0627\u0644\u0644\u063a\u0629 \u062a\u062d\u0635\u0644 \u0627\u0644\u0641\u0643\u0631\u0629 \u0641\u0642\u0637 \u0639\u0644\u0649 \u0648\u062c\u0648\u062f\u0647\u0627 \u0627\u0644\u0648\u0627\u0642\u0639\u064a. \u0643\u0645\u0627 \u062a\u0631\u0645\u0632 \u0627\u0644\u0644\u063a\u0629 \u0625\u0644\u0649 \u0627\u0644\u0623\u0634\u064a\u0627\u0621 \u0627\u0644\u0645\u0646\u0639\u0643\u0633\u0629 \u0641\u064a\u0647\u0627\u060c \u0641\u0627\u0644\u0644\u063a\u0629 \u0647\u064a \u0627\u0644\u0642\u062f\u0631\u0629 \u0639\u0644\u0649 \u0627\u0643\u062a\u0633\u0627\u0628 \u0648\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0646\u0638\u0627\u0645 \u0645\u0639\u0642\u062f \u0644\u0644\u062a\u0648\u0627\u0635\u0644 \u0648\u062e\u0627\u0635\u0629 \u0642\u062f\u0631\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u0639\u0644\u0649 \u0627\u0644\u0642\u064a\u0627\u0645 \u0628\u0630\u0644\u0643\u060c \u0648\u0627\u0644\u0644\u063a\u0629 \u0647\u064a \u0623\u062d\u062f \u0627\u0644\u0623\u0645\u062b\u0644\u0629 \u0627\u0644\u0645\u062d\u062f\u062f\u0629 \u0645\u0646 \u0647\u0630\u0627 \u0627\u0644\u0646\u0638\u0627\u0645\u060c \u0648\u062a\u0633\u0645\u0649 \u0627\u0644\u062f\u0631\u0627\u0633\u0629 \u0627\u0644\u0639\u0644\u0645\u064a\u0629 \u0644\u0644\u063a\u0629 \u0628\u0639\u0644\u0645 \u0627\u0644\u0644\u063a\u0648\u064a\u0627\u062a.\n\n\u0647\u0646\u0627\u0643 \u062a\u0633\u0627\u0624\u0644\u0627\u062a \u062d\u0648\u0644 \u0641\u0644\u0633\u0641\u0629 \u0627\u0644\u0644\u063a\u0629 \u0646\u0648\u0642\u0634\u062a \u0645\u0646 \u0642\u0628\u0644 \u062c\u0648\u0631\u062c\u064a\u0627\u0633 \u0648\u0628\u0644\u0627\u062a\u0648 \u0641\u064a \u0627\u0644\u064a\u0648\u0646\u0627\u0646 \u0627\u0644\u0642\u062f\u064a\u0645\u0629 \u0645\u062b\u0644 \u0645\u0627 \u0625\u0630\u0627 \u0643\u0627\u0646 \u0644\u0644\u0643\u0644\u0645\u0627\u062a \u064a\u0645\u0643\u0646 \u0623\u0646 \u062a\u0639\u0628\u0631 \u0639\u0646 \u062e\u0628\u0631\u0629 \u0645\u0627\u060c \u0641\u064a\u0642\u0648\u0644 \u0628\u0639\u0636 \u0645\u0646 \u0627\u0644\u0645\u0641\u0643\u0631\u064a\u0646 \u0645\u062b\u0644 \u0631\u0648\u0633\u0648 \u0623\u0646 \u0627\u0644\u0644\u063a\u0629 \u0646\u0634\u0626\u062a \u0645\u0646 \u0627\u0644\u0639\u0648\u0627\u0637\u0641\u060c \u0628\u064a\u0646\u0645\u0627 \u0622\u062e\u0631\u0648\u0646 \u0645\u062b\u0644 \u0643\u0627\u0646\u062a \u064a\u0631\u0649 \u0623\u0646\u0647\u0627 \u0646\u0634\u0626\u062a \u0645\u0646 \u0627\u0644\u062a\u0641\u0643\u064a\u0631 \u0627\u0644\u0639\u0642\u0644\u0627\u0646\u064a \u0648\u0627\u0644\u0645\u0646\u0637\u0642\u064a\u060c \u0648\u0645\u0646 \u0641\u0644\u0627\u0633\u0641\u0629 \u0627\u0644\u0642\u0631\u0646 \u0627\u0644\u0639\u0634\u0631\u064a\u0646 \u0645\u062b\u0644 \u0648\u064a\u062a\u064a\u0646\u0633\u062a\u0627\u064a\u0646 \u0642\u062f \u0642\u0627\u0644 \u0628\u0623\u0646 \u0627\u0644\u0641\u0644\u0633\u0641\u0629 \u0647\u064a \u062d\u0642\u0627\u064b \u062f\u0631\u0627\u0633\u0629 \u0627\u0644\u0644\u063a\u0629\u060c \u0648\u062a\u0634\u0645\u0644 \u0627\u0644\u0634\u062e\u0635\u064a\u0627\u062a \u0627\u0644\u0631\u0626\u064a\u0633\u064a\u0629 \u0641\u064a \u0639\u0644\u0645 \u0627\u0644\u0644\u063a\u0648\u064a\u0627\u062a \u0641\u0631\u062f\u064a\u0646\u0627\u0646\u062f \u062f\u064a \u0633\u0648\u0633\u064a\u0631\u060c \u0648\u0646\u0648\u0645 \u0646\u0634\u0648\u0645\u0633\u0643\u064a\u060c \u0648\u0648\u064a\u0644\u064a\u0627\u0645 \u0633\u062a\u0648\u0643\u064a\u0648.\n\n\u064a\u062a\u0641\u0627\u0648\u062a \u062a\u0642\u062f\u064a\u0631 \u0639\u062f\u062f \u0627\u0644\u0644\u063a\u0627\u062a \u0641\u064a \u0627\u0644\u0639\u0627\u0644\u0645 \u0628\u064a\u0646 5000 \u06487000 \u0644\u063a\u0629\u060c \u0648\u0645\u0639 \u0630\u0644\u0643 \u0641\u0625\u0646 \u0623\u064a \u062a\u0642\u062f\u064a\u0631 \u062f\u0642\u064a\u0642 \u064a\u0639\u062a\u0645\u062f \u062c\u0632\u0626\u064a\u0627 \u0639\u0644\u0649 \u0627\u0644\u062a\u0645\u064a\u064a\u0632 \u0627\u0644\u062a\u0639\u0633\u0641\u064a \u0628\u064a\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0648\u0627\u0644\u0644\u0647\u062c\u0627\u062a\u060c \u0641\u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0637\u0628\u064a\u0639\u064a\u0629 \u062a\u0643\u0648\u0646 \u0625\u0645\u0627 \u0644\u063a\u0629 \u0645\u0646\u0637\u0648\u0642\u0629 \u0623\u0648 \u0644\u063a\u0629 \u0627\u0644\u0625\u0634\u0627\u0631\u0629\u060c \u0648\u0644\u0643\u0646 \u064a\u0643\u0646 \u062a\u0631\u0645\u064a\u0632 \u0623\u064a \u0644\u063a\u0629 \u0625\u0644\u0649 \u0648\u0633\u0627\u0626\u0644 \u0627\u0644\u0625\u0639\u0644\u0627\u0645 \u0627\u0644\u062b\u0627\u0646\u0648\u064a\u0629 \u0628\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0627\u0644\u0645\u0646\u0628\u0647\u0627\u062a \u0627\u0644\u0633\u0645\u0639\u064a\u0629 \u0648\u0627\u0644\u0628\u0635\u0631\u064a\u0629 \u0623\u0648 \u0627\u0644\u0644\u0645\u0633\u064a\u0629 \u0639\u0644\u0649 \u0633\u0628\u064a\u0644 \u0627\u0644\u0645\u062b\u0627\u0644\u060c \u0641\u064a \u0627\u0644\u0643\u062a\u0627\u0628\u0629 \u0627\u0644\u062a\u0635\u0648\u064a\u0631\u064a\u0629\u060c \u0623\u0648 \u0637\u0631\u064a\u0642\u0629 \u0628\u0631\u064a\u0644 \u0644\u0644\u0645\u0643\u0641\u0648\u0641\u064a\u0646\u060c \u0623\u0648 \u0627\u0644\u0635\u0641\u064a\u0631\u060c \u0648\u0647\u0630\u0627 \u0644\u0623\u0646 \u0644\u063a\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u0647\u064a \u0644\u063a\u0629 \u0645\u0633\u062a\u0642\u0644\u0629. \u0648\u064a\u0645\u0643\u0646 \u0644\u0645\u0635\u0637\u0644\u062d \"\u0627\u0644\u0644\u063a\u0629\" \u0639\u0646\u062f\u0645\u0627 \u064a\u0633\u062a\u062e\u062f\u0645 \u0639\u0644\u0649 \u0627\u0644\u0645\u0641\u0647\u0648\u0645 \u0627\u0644\u0639\u0627\u0645 \u0623\u0646 \u064a\u0631\u062c\u0639 \u0625\u0644\u0649 \u0627\u0644\u0642\u062f\u0631\u0629 \u0627\u0644\u0625\u062f\u0631\u0627\u0643\u064a\u0629 \u0644\u062a\u0639\u0644\u064a\u0645 \u0648\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0646\u0638\u0627\u0645 \u0627\u0644\u062a\u0648\u0627\u0635\u0644 \u0627\u0644\u0645\u0639\u0642\u062f\u060c \u0623\u0648 \u0625\u0644\u0649 \u0648\u0635\u0641 \u0645\u062c\u0645\u0648\u0639\u0629 \u0645\u0646 \u0627\u0644\u0642\u0648\u0627\u0639\u062f \u0627\u0644\u062a\u064a \u062a\u0634\u0643\u0644 \u0647\u0630\u0647 \u0627\u0644\u0623\u0646\u0638\u0645\u0629\u060c \u0623\u0648 \u0645\u062c\u0645\u0648\u0639\u0629 \u0645\u0646 \u0627\u0644\u062a\u0635\u0631\u064a\u062d\u0627\u062a \u0627\u0644\u062a\u064a \u064a\u0645\u0643\u0646 \u0623\u0646 \u062a\u0646\u062a\u062c \u0645\u0646 \u062a\u0644\u0643 \u0627\u0644\u0642\u0648\u0627\u0639\u062f \u0627\u0639\u062a\u0645\u0627\u062f\u0627 \u0639\u0644\u0649 \u0648\u062c\u0647\u0627\u062a \u0627\u0644\u0646\u0638\u0631 \u0627\u0644\u0641\u0644\u0633\u0641\u064a\u0629 \u0627\u0644\u0645\u062a\u0639\u0644\u0642\u0629 \u0628\u062a\u0639\u0631\u064a\u0641 \u0627\u0644\u0644\u063a\u0629 \u0648\u0627\u0644\u0645\u0639\u0646\u0649\u060c \u0641\u062a\u0639\u062a\u0645\u062f \u062c\u0645\u064a\u0639 \u0627\u0644\u0644\u063a\u0627\u062a \u0639\u0644\u0649 \u0639\u0645\u0644\u064a\u0629 \u0635\u064a\u0631\u0648\u0631\u0629 \u0627\u0644\u0639\u0644\u0627\u0645\u0627\u062a \u0644\u062a\u0634\u064a\u0631 \u0625\u0644\u0649 \u0645\u0639\u0627\u0646\u064a \u0645\u0639\u064a\u0646\u0629 \u0641\u062a\u062d\u062a\u0648\u064a \u0627\u0644\u0644\u063a\u0629 \u0627\u0644\u0645\u0634\u0644\u0641\u0647\u0629 \u0623\u0648 \u0627\u0644\u0645\u0646\u0637\u0648\u0642\u0629 \u0648\u0644\u063a\u0629 \u0627\u0644\u0625\u0634\u0627\u0631\u0629 \u0639\u0644\u0649 \u0646\u0638\u0627\u0645 \u0635\u0648\u062a\u064a \u064a\u062a\u062d\u0643\u0645 \u0628\u0643\u064a\u0641\u064a\u0629 \u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0627\u0644\u0631\u0645\u0648\u0632 \u0644\u062a\u0634\u0643\u064a\u0644 \u0633\u0644\u0627\u0633\u0644 \u062a\u0639\u0631\u0641 \u0628\u0627\u0633\u0645 \u0627\u0644\u0643\u0644\u0645\u0627\u062a \u0623\u0648 \u0627\u0644\u0635\u0631\u0641\u064a\u0629\u060c \u0648\u0646\u0638\u0627\u0645 \u0646\u062d\u0648\u064a \u0648\u0627\u0644\u0630\u064a \u064a\u062a\u062d\u0643\u0645 \u0628\u0643\u064a\u0641\u064a\u0629 \u0627\u0644\u062c\u0645\u0639 \u0628\u064a\u0646 \u0627\u0644\u0643\u0644\u0645\u0627\u062a \u0648\u0627\u0644\u0635\u0631\u0641 \u0644\u062a\u0634\u0643\u0644 \u0627\u0644\u0639\u0628\u0627\u0631\u0627\u062a \u0648\u0627\u0644\u0623\u0642\u0648\u0627\u0644.\n\n\u0644\u062f\u0649 \u0644\u063a\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u062e\u0635\u0627\u0626\u0635 \u0639\u062f\u064a\u062f\u0629 \u0643\u0627\u0644\u0625\u0646\u062a\u0627\u062c\u064a\u0629\u060c \u0648\u0627\u0644\u062a\u0643\u0631\u0627\u0631\u060c \u0648\u0627\u0644\u0625\u0632\u0627\u062d\u0629\u060c \u0648\u0627\u0644\u062a\u064a \u062a\u0639\u062a\u0645\u062f \u0639\u0644\u0649 \u0627\u0644\u0627\u062a\u0641\u0627\u0642\u064a\u0629 \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0648\u0627\u0644\u062a\u0639\u0644\u0645. \u0641\u0647\u064a \u0628\u0646\u064a\u0629 \u0645\u0639\u0642\u062f\u0629 \u062a\u062a\u064a\u062d \u0627\u0644\u062d\u0635\u0648\u0644 \u0639\u0644\u0649 \u0645\u062c\u0645\u0648\u0639\u0629 \u0623\u0648\u0633\u0639 \u0628\u0643\u062b\u064a\u0631 \u0645\u0646 \u0627\u0644\u0639\u0628\u0627\u0631\u0627\u062a \u0645\u0646 \u0623\u064a \u0646\u0638\u0627\u0645 \u0645\u0639\u0631\u0648\u0641 \u0645\u0646 \u0627\u0644\u062a\u0648\u0627\u0635\u0644 \u0627\u0644\u062d\u064a\u0648\u0627\u0646\u064a\u060c \u0648\u064a\u0639\u062a\u0642\u062f \u0623\u0646 \u0627\u0644\u0644\u063a\u0629 \u0642\u062f \u0646\u0634\u0623\u062a \u0639\u0646\u062f\u0645\u0627 \u0628\u062f\u0623\u062a \u0643\u0627\u0626\u0646\u0627\u062a \u0634\u0628\u064a\u0647\u0629 \u0628\u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u0641\u064a \u0648\u0642\u062a \u0645\u0628\u0643\u0631 \u0628\u0627\u0644\u062a\u063a\u064a\u064a\u0631 \u062a\u062f\u0631\u064a\u062c\u064a\u0627 \u0641\u064a \u0646\u0638\u0645 \u0627\u0644\u062a\u0648\u0627\u0635\u0644 \u0627\u0644\u0631\u0626\u064a\u0633\u064a\u0629\u060c \u0648\u0627\u0643\u062a\u0633\u0627\u0628 \u0627\u0644\u0642\u062f\u0631\u0629 \u0639\u0644\u0649 \u062a\u0634\u0643\u064a\u0644 \u0646\u0638\u0631\u064a\u0629 \u0627\u0644\u0639\u0642\u0648\u0644 \u0627\u0644\u0623\u062e\u0631\u0649 \u0648\u0627\u0644\u0642\u0635\u062f \u0627\u0644\u0645\u0634\u062a\u0631\u0643\u060c \u0648\u064a\u0639\u062a\u0642\u062f \u0623\u0646 \u0647\u0630\u0627 \u0627\u0644\u062a\u0637\u0648\u0631 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u0642\u062f \u062a\u0632\u0627\u0645\u0646 \u0645\u0639 \u0632\u064a\u0627\u062f\u0629 \u0641\u064a \u062d\u062c\u0645 \u0627\u0644\u0645\u062e\u060c \u0648\u064a\u0631\u0649 \u0627\u0644\u0643\u062b\u064a\u0631 \u0645\u0646 \u0627\u0644\u0644\u063a\u0648\u064a\u064a\u0646 \u0647\u064a\u0627\u0643\u0644 \u0627\u0644\u0644\u063a\u0629 \u0628\u0623\u0646\u0647\u0627 \u062a\u0637\u0648\u0631\u062a \u0644\u062a\u062e\u062f\u0645 \u0648\u0638\u0627\u0626\u0641 \u062a\u0648\u0627\u0635\u0644\u064a\u0629 \u0648\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0645\u062d\u062f\u062f\u0629\u060c \u0641\u062a\u062a\u0645 \u0645\u0639\u0627\u0644\u062c\u0629 \u0627\u0644\u0644\u063a\u0629 \u0641\u064a \u0627\u0644\u0639\u062f\u064a\u062f \u0645\u0646 \u0627\u0644\u0645\u0648\u0627\u0642\u0639 \u0627\u0644\u0645\u062e\u062a\u0644\u0641\u0629 \u0641\u064a \u0627\u0644\u062f\u0645\u0627\u063a \u0627\u0644\u0628\u0634\u0631\u064a\u060c \u0648\u0644\u0643\u0646 \u062e\u0635\u0648\u0635\u0627 \u0641\u064a \u0645\u0646\u0627\u0637\u0642 \u0628\u0631\u0648\u0643\u0627 \u0648\u0641\u064a\u0631\u0646\u064a\u0643\u060c \u0648\u064a\u0643\u062a\u0633\u0628 \u0627\u0644\u0628\u0634\u0631 \u0627\u0644\u0644\u063a\u0629 \u0645\u0646 \u062e\u0644\u0627\u0644 \u0627\u0644\u062a\u0641\u0627\u0639\u0644 \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a \u0641\u064a \u0645\u0631\u062d\u0644\u0629 \u0627\u0644\u0637\u0641\u0648\u0644\u0629 \u0627\u0644\u0645\u0628\u0643\u0631\u0629\u060c \u0648\u064a\u062a\u062d\u062f\u062b \u0627\u0644\u0623\u0637\u0641\u0627\u0644 \u0639\u0645\u0648\u0645\u0627 \u0628\u0637\u0644\u0627\u0642\u0629 \u0639\u0646\u062f\u0645\u0627 \u064a\u0628\u0644\u063a\u0648\u0646 \u0645\u0627 \u064a\u0642\u0631\u0628 \u0627\u0644\u062b\u0644\u0627\u062b \u0633\u0646\u0648\u0627\u062a \u0645\u0646 \u0627\u0644\u0639\u0645\u0631\u060c \u0648\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0627\u0644\u0644\u063a\u0629 \u0645\u062a\u0623\u0635\u0644 \u0641\u064a \u062b\u0642\u0627\u0641\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646\u060c \u0628\u0627\u0644\u0625\u0636\u0627\u0641\u0629 \u0625\u0644\u0649 \u0627\u0633\u062a\u062e\u062f\u0627\u0645\u0627\u062a\u0647 \u0644\u0644\u062a\u0648\u0627\u0635\u0644 \u0628\u0634\u0643\u0644 \u0635\u0627\u0631\u0645\u060c \u0648\u0623\u064a\u0636\u0627 \u0644\u0644\u063a\u0647 \u0627\u0644\u0639\u062f\u064a\u062f \u0645\u0646 \u0627\u0644\u0627\u0633\u062a\u062e\u062f\u0627\u0645\u0627\u062a \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0648\u0627\u0644\u062b\u0642\u0627\u0641\u064a\u0629\u060c \u0645\u062b\u0644 \u0627\u0644\u062f\u0644\u0627\u0644\u0629 \u0639\u0644\u0649 \u0647\u0648\u064a\u0629 \u0627\u0644\u0645\u062c\u0645\u0648\u0639\u0629\u060c \u0648\u0627\u0644\u0637\u0628\u0642\u0627\u062a \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629\u060c \u0648\u0643\u0630\u0644\u0643 \u0627\u0644\u0627\u0633\u062a\u0645\u0627\u0644\u0629 \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0648\u0627\u0644\u062a\u0631\u0641\u064a\u0647\u064a\u0629.\n\n\u062a\u062a\u0637\u0648\u0631\u0627\u0644\u0644\u063a\u0627\u062a \u0648\u062a\u062a\u0646\u0648\u0639 \u0645\u0639 \u0645\u0631\u0648\u0631 \u0627\u0644\u0648\u0642\u062a\u060c \u0648\u064a\u0645\u0643\u0646 \u0625\u0639\u0627\u062f\u0629 \u062a\u0627\u0631\u064a\u062e \u062a\u0637\u0648\u0631\u0647\u0627 \u0648\u0628\u0646\u0627\u0626\u0647\u0627 \u0645\u0646 \u062e\u0644\u0627\u0644 \u0645\u0642\u0627\u0631\u0646\u0629 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u062d\u062f\u064a\u062b\u0629 \u0644\u062a\u062d\u062f\u064a\u062f \u0633\u0645\u0627\u062a \u0644\u063a\u0627\u062a \u0623\u062c\u062f\u0627\u062f\u0647\u0645 \u0627\u0644\u062a\u064a \u064a\u062c\u0628 \u0623\u0646 \u062a\u0643\u0648\u0646 \u0645\u0646 \u0623\u062c\u0644 \u0627\u0644\u0645\u0631\u0627\u062d\u0644 \u0627\u0644\u062a\u0646\u0645\u0648\u064a\u0629 \u0627\u0644\u062a\u064a \u064a\u0645\u0643\u0646 \u0627\u0646 \u062a\u062d\u062f\u062b \u0641\u064a \u0648\u0642\u062a \u0644\u0627\u062d\u0642\u060c \u0648\u0645\u0646 \u0627\u0644\u0645\u0639\u0631\u0648\u0641 \u0623\u0646 \u0645\u062c\u0645\u0648\u0639\u0629 \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u062a\u064a \u062a\u0646\u062d\u062f\u0631 \u0645\u0646 \u0633\u0644\u0641 \u0645\u0634\u062a\u0631\u0643 \u062a\u0646\u062f\u0631\u062c \u0643\u0644\u063a\u0629 \u0627\u0644\u0623\u0633\u0631\u0629 \u0645\u062b\u0644 \u0639\u0627\u0626\u0644\u0629 \u0627\u0644\u0647\u0646\u062f\u0648 \u0623\u0648\u0631\u0648\u0628\u064a\u0629 \u0648\u0647\u064a \u0627\u0644\u0623\u0643\u062b\u0631 \u0627\u0646\u062a\u0634\u0627\u0631\u0627 \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0644\u063a\u0627\u062a \u0639\u062f\u0629 \u0645\u062b\u0644 \u0627\u0644\u0625\u0646\u062c\u0644\u064a\u0632\u064a\u0629 \u0648\u0627\u0644\u0631\u0648\u0633\u064a\u0629 \u0648\u0627\u0644\u0647\u0646\u062f\u064a\u0629\u060c \u0648\u0627\u0644\u0623\u0633\u0631\u0629 \u0628\u064a\u0646 \u0627\u0644\u0635\u064a\u0646 \u0648\u0627\u0644\u062a\u0628\u062a\u060c \u0648\u0627\u0644\u0630\u064a \u062a\u062a\u0636\u0645\u0646 \u0627\u0644\u0641\u0635\u062d\u0649 \u0648\u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0635\u064a\u0646\u064a\u0629 \u0627\u0644\u0623\u062e\u0631\u0649\u060c \u0648\u0627\u0644\u062a\u0628\u062a\u060c \u0648\u0639\u0627\u0626\u0644\u0629 \u0627\u0644\u0623\u0641\u0631\u0648 \u0622\u0633\u064a\u0648\u064a\u0629\u060c \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0627\u0644\u0639\u0631\u0628\u064a\u0629 \u0648\u0627\u0644\u0635\u0648\u0645\u0627\u0644\u064a\u0629\u060c \u0648\u0627\u0644\u0639\u0628\u0631\u064a\u0629. \u0623\u064a\u0636\u0627 \u0644\u063a\u0627\u062a \u0627\u0644\u0628\u0627\u0646\u062a\u0648 \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0627\u0644\u0633\u0648\u0627\u062d\u0644\u064a\u0629\u060c \u0648\u0627\u0644\u0632\u0648\u0644\u0648\u060c \u0648\u0645\u0626\u0627\u062a \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0623\u062e\u0631\u0649 \u0627\u0644\u0645\u0633\u062a\u062e\u062f\u0645\u0629 \u0641\u064a \u062c\u0645\u064a\u0639 \u0623\u0646\u062d\u0627\u0621 \u0623\u0641\u0631\u064a\u0642\u064a\u0627. \u0648\u0644\u063a\u0627\u062a \u0645\u0627\u0644\u0627\u064a\u0648-\u0627\u0644\u0628\u0648\u0644\u064a\u0646\u064a\u0632\u064a\u0629\u060c \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0627\u0644\u0625\u0646\u062f\u0648\u0646\u064a\u0633\u064a\u0629 \u0648\u0627\u0644\u0645\u0627\u0644\u064a\u0632\u064a\u0629 \u0648\u0627\u0644\u062a\u063a\u0627\u0644\u0648\u063a\u060c \u0648\u0645\u0626\u0627\u062a \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0623\u062e\u0631\u0649 \u0627\u0644\u0645\u0633\u062a\u062e\u062f\u0645\u0629 \u0641\u064a \u062c\u0645\u064a\u0639 \u0623\u0646\u062d\u0627\u0621 \u0627\u0644\u0645\u062d\u064a\u0637 \u0627\u0644\u0647\u0627\u062f\u064a. \u0648 \u0644\u063a\u0627\u062a \u0623\u0633\u0631\u0629 \u062f\u0631\u0627\u0641\u064a\u062f\u064a\u0648\u0646 \u0627\u0644\u062a\u064a \u063a\u0627\u0644\u0628\u0627 \u064a\u062a\u062d\u062f\u062b \u0628\u0647\u0627 \u062c\u0646\u0648\u0628 \u0627\u0644\u0647\u0646\u062f \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0644\u063a\u0629 \u0627\u0644\u062a\u0627\u0645\u064a\u0644 \u0648\u0627\u0644\u062a\u064a\u0644\u062c\u0648. \u064a\u0630\u0647\u0628 \u0627\u0644\u062a\u0648\u0627\u0641\u0642 \u0627\u0644\u062f\u0631\u0627\u0633\u064a \u0625\u0644\u0649 \u0623\u0646 \u0645\u0627 \u0628\u064a\u0646 50\u066a \u0648 90\u066a \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u062a\u064a \u064a\u062a\u062d\u062f\u062b \u0628\u0647\u0627 \u0641\u064a \u0628\u062f\u0627\u064a\u0629 \u0627\u0644\u0642\u0631\u0646 \u0627\u064421 \u0639\u0644\u0649 \u0648\u0634\u0643 \u0623\u0646 \u062a\u0646\u0642\u0631\u0636 \u0628\u062d\u0644\u0648\u0644 \u0639\u0627\u0645 2100.\n", "meta": {"title": "wikipedia_language_ar.txt", "md5": "2912510120a2fccfdcb04e47d26a5d82"}, "short_title": "\u0644\u063a\u0629", "long_title": "\u0644\u063a\u0629"}, {"text": "\nExtract from Wikipedia:\nhttps://el.wikipedia.org/wiki/%CE%93%CE%BB%CF%8E%CF%83%CF%83%CE%B1\n\n\u039f \u03cc\u03c1\u03bf\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1 \u03c7\u03c1\u03b7\u03c3\u03b9\u03bc\u03bf\u03c0\u03bf\u03b9\u03b5\u03af\u03c4\u03b1\u03b9 \u03ba\u03c5\u03c1\u03af\u03c9\u03c2 \u03b3\u03b9\u03b1 \u03bd\u03b1 \u03b1\u03bd\u03b1\u03c6\u03b5\u03c1\u03b8\u03bf\u03cd\u03bc\u03b5 \u03c3\u03c4\u03bf\u03bd \u03c4\u03c1\u03cc\u03c0\u03bf \u03b5\u03c0\u03b9\u03ba\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1\u03c2, \u03b9\u03b4\u03af\u03c9\u03c2 \u03c4\u03bf\u03c5 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03bf\u03c5, \u03b1\u03bb\u03bb\u03ac \u03bc\u03c0\u03bf\u03c1\u03b5\u03af \u03bd\u03b1 \u03b1\u03bd\u03b1\u03c6\u03ad\u03c1\u03b5\u03c4\u03b1\u03b9 \u03b5\u03c0\u03af\u03c3\u03b7\u03c2 \u03ba\u03b1\u03b9 \u03c3\u03b5 \u03c0\u03b5\u03c1\u03b9\u03c0\u03c4\u03ce\u03c3\u03b5\u03b9\u03c2 \u03c4\u03b5\u03c7\u03bd\u03b7\u03c4\u03ce\u03bd \u03ba\u03b1\u03b9 \u03bc\u03b7 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03b9\u03bd\u03c9\u03bd \u03c3\u03b7\u03bc\u03b5\u03b9\u03b1\u03ba\u03ce\u03bd \u03c3\u03c5\u03c3\u03c4\u03b7\u03bc\u03ac\u03c4\u03c9\u03bd (\u03bc\u03b9\u03bc\u03cc\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1, \u03c4\u03c5\u03c0\u03b9\u03ba\u03ad\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b5\u03c2 \u03c4\u03c9\u03bd \u03bc\u03b1\u03b8\u03b7\u03bc\u03b1\u03c4\u03b9\u03ba\u03ce\u03bd \u03ba\u03b1\u03b9 \u03c4\u03b7\u03c2 \u03c0\u03bb\u03b7\u03c1\u03bf\u03c6\u03bf\u03c1\u03b9\u03ba\u03ae, \u03b3\u03bb\u03ce\u03c3\u03c3\u03b5\u03c2 \u03c0\u03c1\u03bf\u03b3\u03c1\u03b1\u03bc\u03bc\u03b1\u03c4\u03b9\u03c3\u03bc\u03bf\u03cd, \u03c3\u03c5\u03c3\u03c4\u03ae\u03bc\u03b1\u03c4\u03b1 \u03b5\u03c0\u03b9\u03ba\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1\u03c2 \u03b6\u03ce\u03c9\u03bd \u03ba.\u03bb\u03c0.).\n\n\u0397 \u03c6\u03c5\u03c3\u03b9\u03ba\u03ae \u03b4\u03b9\u03b1\u03b4\u03b9\u03ba\u03b1\u03c3\u03af\u03b1 \u03b1\u03c0\u03cc\u03ba\u03c4\u03b7\u03c3\u03b7\u03c2 \u03bc\u03b9\u03b1\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2, \u03c0\u03bf\u03c5 \u03c3\u03c5\u03bc\u03b2\u03b1\u03af\u03bd\u03b5\u03b9 \u03ba\u03b1\u03c4\u03ac \u03c4\u03b1 4-5 \u03c0\u03c1\u03ce\u03c4\u03b1 \u03c7\u03c1\u03cc\u03bd\u03b9\u03b1 \u03b6\u03c9\u03ae\u03c2 \u03c4\u03bf\u03c5 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03bf\u03c5, \u03b1\u03bd\u03b1\u03c6\u03ad\u03c1\u03b5\u03c4\u03b1\u03b9 \u03c9\u03c2 \u03bc\u03b7\u03c4\u03c1\u03b9\u03ba\u03ae \u03ba\u03b1\u03b9 \u03b5\u03af\u03bd\u03b1\u03b9 \u03bc\u03ad\u03c1\u03bf\u03c2 \u03c4\u03b7\u03c2 \u03c6\u03c5\u03c3\u03b9\u03ba\u03ae\u03c2 \u03b5\u03be\u03ad\u03bb\u03b9\u03be\u03b7\u03c2 \u03c4\u03bf\u03c5 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03bf\u03c5. \u0397 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1 \u03b3\u03af\u03bd\u03b5\u03c4\u03b1\u03b9 \u03c4\u03bf \u03b1\u03c0\u03b1\u03c1\u03b1\u03af\u03c4\u03b7\u03c4\u03bf \u03bc\u03ad\u03c3\u03bf \u03bc\u03b5 \u03c4\u03bf \u03bf\u03c0\u03bf\u03af\u03bf \u03bf \u03ac\u03bd\u03b8\u03c1\u03c9\u03c0\u03bf\u03c2 \u03b5\u03c0\u03b9\u03ba\u03bf\u03b9\u03bd\u03c9\u03bd\u03b5\u03af \u03ba\u03b1\u03b9 \u03b3\u03bd\u03c9\u03c1\u03af\u03b6\u03b5\u03b9 \u03c4\u03bf \u03c0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd \u03c4\u03bf\u03c5. \u039c\u03b5 \u03bc\u03b9\u03b1 \u03bf\u03bb\u03bf\u03ba\u03bb\u03b7\u03c1\u03c9\u03bc\u03ad\u03bd\u03b7 \u03b3\u03bb\u03c9\u03c3\u03c3\u03b9\u03ba\u03ae \u03b1\u03b3\u03c9\u03b3\u03ae, \u03bf \u03bf\u03bc\u03b9\u03bb\u03b7\u03c4\u03ae\u03c2 \u03ba\u03ac\u03b8\u03b5 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2 \u03b5\u03af\u03bd\u03b1\u03b9 \u03c3\u03b5 \u03b8\u03ad\u03c3\u03b7 \u03bd\u03b1 \u03c3\u03c5\u03bd\u03b4\u03c5\u03ac\u03b6\u03b5\u03b9 \u03c4\u03b7 \u03b3\u03bd\u03ce\u03c3\u03b7 \u03c4\u03bf\u03c5 \u03c3\u03c5\u03c3\u03c4\u03ae\u03bc\u03b1\u03c4\u03bf\u03c2 \u03c4\u03b7\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2 \u03b3\u03b9\u03b1 \u03c4\u03b7\u03bd \u03c0\u03b1\u03c1\u03b1\u03b3\u03c9\u03b3\u03ae \u03ba\u03b1\u03b9 \u03c4\u03b7\u03bd \u03c0\u03c1\u03cc\u03c3\u03bb\u03b7\u03c8\u03b7 \u03bc\u03b7\u03bd\u03c5\u03bc\u03ac\u03c4\u03c9\u03bd.\n\n\u03a0\u03b9\u03c3\u03c4\u03b5\u03cd\u03b5\u03c4\u03b1\u03b9 \u03cc\u03c4\u03b9 \u03b4\u03b5\u03bd \u03c5\u03c0\u03ac\u03c1\u03c7\u03b5\u03b9 \u03c3\u03b1\u03c6\u03ae\u03c2 \u03b4\u03b9\u03b1\u03c7\u03c9\u03c1\u03b9\u03c3\u03c4\u03b9\u03ba\u03ae \u03b3\u03c1\u03b1\u03bc\u03bc\u03ae \u03b1\u03bd\u03ac\u03bc\u03b5\u03c3\u03b1 \u03c3\u03c4\u03bf\u03bd \u03cc\u03c1\u03bf \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1, \u03bc\u03b5 \u03c4\u03b7 \u03c3\u03b7\u03bc\u03b1\u03c3\u03af\u03b1 \u03c4\u03b7\u03c2 \u03c3\u03c5\u03b3\u03ba\u03b5\u03ba\u03c1\u03b9\u03bc\u03ad\u03bd\u03b7\u03c2 \u03b5\u03b8\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2, \u03ba\u03b1\u03b9 \u03c3\u03c4\u03bf\u03bd \u03cc\u03c1\u03bf \u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03c2. \u03a3\u03cd\u03bc\u03c6\u03c9\u03bd\u03b1 \u03bc\u03b5 \u03c4\u03b7 \u03b4\u03b9\u03b1\u03c4\u03cd\u03c0\u03c9\u03c3\u03b7 \u03c4\u03bf\u03c5 Max Weinrich, \u00ab\u03b3\u03bb\u03ce\u03c3\u03c3\u03b1 \u03b5\u03af\u03bd\u03b1\u03b9 \u03bc\u03b9\u03b1 \u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03c2 \u03bc\u03b5 \u03c3\u03c4\u03c1\u03b1\u03c4\u03cc \u03ba\u03b1\u03b9 \u03c3\u03c4\u03cc\u03bb\u03bf\u00bb, \u03b5\u03bd\u03ce \u2014\u03c3\u03cd\u03bc\u03c6\u03c9\u03bd\u03b1 \u03bc\u03b5 \u03ac\u03bb\u03bb\u03b7 \u03b4\u03b9\u03b1\u03c4\u03cd\u03c0\u03c9\u03c3\u03b7\u2014 \u00ab\u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03c2 \u03b5\u03af\u03bd\u03b1\u03b9 \u03bc\u03b9\u03b1 \u03b7\u03c4\u03c4\u03b7\u03bc\u03ad\u03bd\u03b7 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\". \u03a0\u03b9\u03b8\u03b1\u03bd\u03cc\u03c4\u03b1\u03c4\u03b1, \u00ab\u03cc\u03bb\u03b5\u03c2 \u03bf\u03b9 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b5\u03c2 \u03c4\u03bf\u03c5 \u03ba\u03cc\u03c3\u03bc\u03bf\u03c5, \u03c3\u03b5 \u03cc\u03bb\u03bf\u03c5\u03c2 \u03c4\u03bf\u03c5\u03c2 \u03ba\u03b1\u03b9\u03c1\u03bf\u03cd\u03c2, \u03b5\u03af\u03bd\u03b1\u03b9 \u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03b9 \u03bc\u03b9\u03b1\u03c2 \u03c0\u03c1\u03ce\u03c4\u03b7\u03c2 \u03ba\u03bf\u03b9\u03bd\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2 \u03c0\u03bf\u03c5 \u03c0\u03c1\u03b9\u03bd \u03c7\u03b9\u03bb\u03b9\u03ac\u03b4\u03b5\u03c2 \u03c7\u03c1\u03cc\u03bd\u03b9\u03b1 \u03ae\u03c4\u03b1\u03bd \u03c0\u03b1\u03b3\u03ba\u03cc\u03c3\u03bc\u03b9\u03b1 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u00bb\n", "meta": {"title": "wikipedia_language_el.txt", "md5": "26e477ca3f55320204f109bb07e79198"}, "short_title": "\u0393\u03bb\u03ce\u03c3\u03c3\u03b1", "long_title": "\u0393\u03bb\u03ce\u03c3\u03c3\u03b1"}, {"text": "\nExtract from Wikipedia:\nhttps://zh.wikipedia.org/wiki/%E8%AA%9E%E8%A8%80\n\n\u8bed\u8a00\u662f\u4e00\u7c7b\u590d\u5408\u4ea4\u6d41\u7cfb\u7edf\uff0c\u4e3b\u8981\u5305\u542b\u5176\u5f62\u6210\u3001\u4e60\u5f97\u3001\u7ef4\u62a4\u53ca\u5e94\u7528\uff0c\u7279\u522b\u662f\u76f8\u5e94\u7684\u4eba\u7c7b\u80fd\u529b\u3002\u67d0\u4e00\u95e8\u8bed\u8a00\u5219\u662f\u8fd9\u7c7b\u7cfb\u7edf\u7684\u5177\u4f53\u4f8b\u5b50\u3002\u9664\u4e86\u4ea4\u6d41\u6e9d\u901a\u5916\uff0c\u8a9e\u8a00\u4e5f\u662f\u4e00\u500b\u4eba\u7684\u8eab\u5206\u8a8d\u540c\u4e2d\u4e3b\u8981\u7684\u69cb\u6210\u90e8\u5206\uff0c\u4e5f\u662f\u4e00\u500b\u6587\u5316\u7684\u4e3b\u8981\u6210\u5206\u4e4b\u4e00\u3002\n\n\u8bed\u8a00\u5b66\u662f\u5bf9\u8bed\u8a00\u7684\u79d1\u5b66\u7814\u7a76\u3002\u8bed\u8a00\u54f2\u5b66\u65b9\u9762\u76f8\u5173\u7684\u8fa9\u8bba\uff0c\u6bd4\u5982\u8bcd\u6c47\u80fd\u5426\u80fd\u8868\u8ff0\u7ecf\u5386\uff0c\u81f3\u5c11\u53ef\u8ffd\u6eaf\u81f3\u53e4\u5e0c\u814a\u7684\u9ad8\u723e\u5409\u4e9e\u4e0e\u67cf\u62c9\u56fe\u3002\u4ee5\u5362\u68ad\u4e3a\u4ee3\u8868\u7684\u4e00\u4e9b\u601d\u60f3\u5bb6\u8a8d\u70ba\u8a9e\u8a00\u6e90\u81ea\u60c5\u7eea\uff0c\u800c\u5eb7\u5fb7\u8ba4\u4e3a\u5176\u6e90\u4e8e\u7406\u6027\u548c\u903b\u8f91\u7684\u601d\u8fa8\u3002\u8bf8\u5982\u7ef4\u7279\u6839\u65af\u5766\u7b4920\u4e16\u7d00\u7684\u54f2\u5b78\u5bb6\u8bba\u8bc1\u8bf4\u54f2\u5b78\u5b9e\u8d28\u4e0a\u5c31\u662f\u5bf9\u8a9e\u8a00\u7684\u7814\u7a76\u3002\u8457\u540d\u7684\u8bed\u8a00\u5b66\u5bb6\u6709\u5f17\u8fea\u5357\u00b7\u5fb7\u00b7\u7d22\u7dd2\u723e\u3001\u8bfa\u59c6\u00b7\u4e54\u59c6\u65af\u57fa\u7b49\u3002\n\n\u4eba\u7c7b\u8bed\u8a00\u636e\u4f30\u7b97\u7ea6\u67095000\u52307000\u79cd\uff0c\u7531\u4e8e\u8bed\u8a00\u4e0e\u65b9\u8a00\u7684\u5dee\u522b\u96be\u4ee5\u5b9a\u4e49\uff0c\u96be\u4ee5\u6709\u51c6\u786e\u7684\u7edf\u8ba1\u3002\u81ea\u7136\u8bed\u8a00\u5e38\u4ee5\u53e3\u8ff0\u6216\u624b\u8a9e\u65b9\u5f0f\u8868\u9054\uff0c\u4f46\u4efb\u4f55\u8a9e\u8a00\u90fd\u53ef\u4ee5\u7528\u8072\u97f3\uff08\u5982\u53e3\u54e8\u8a9e\uff09\u3001\u8996\u89ba\uff08\u5982\u624b\u8a9e\uff09\u6216\u662f\u89f8\u89ba\u524c\u6fc0\uff08\u5982\u76f2\u6587\uff09\u6765\u8868\u73b0\u3002\u7b26\u865f\u5b78\u4e2d\u5c06\u8fd9\u4e00\u73b0\u8c61\u63cf\u8ff0\u4e3a\u6a21\u6001\u65e0\u5173\uff08modality-independent\uff09\u3002\u4f9d\u7167\u8bed\u8a00\u54f2\u5b66\u5c0d\u8bed\u8a00\u4e0e\u610f\u4e49\u7684\u5b9a\u4e49\uff0c\u5e7f\u4e49\u4e0a\uff0c\u201c\u8bed\u8a00\u201d\u53ef\u4ee5\u6307\u4ee3\u5b66\u4e60\u5e76\u4f7f\u7528\u8be5\u590d\u6742\u4ea4\u6d41\u7cfb\u7edf\u7684\u8ba4\u77e5\u80fd\u529b\uff0c\u4e5f\u53ef\u4ee5\u63cf\u8ff0\u6784\u6210\u8fd9\u4e00\u7cfb\u7edf\u7684\u89c4\u5219\u96c6\u5408\uff0c\u8fd8\u53ef\u4ee5\u6307\u7531\u8bed\u8a00\u89c4\u5219\u751f\u6210\uff08produce\uff09\u7684\u8bcd\u53e5\u7684\u96c6\u5408\u3002\u6240\u6709\u7684\u8a9e\u8a00\u90fd\u4f9d\u9760\u7b26\u53f7\u8fc7\u7a0b\u4f86\u5c07\u7279\u5b9a\u7684\u8bb0\u53f7\u548c\u610f\u4e49\u76f8\u9023\u7d50\u3002\u53e3\u8bed\u3001\u624b\u8a9e\u53ca\u89f8\u89ba\u8a9e\u8a00\u90fd\u6709\u97f3\u4f4d\u5b78\u7cfb\u7d71\u4f86\u5c07\u7b26\u865f\u7d44\u5408\u6210\u8a5e\u6216\u662f\u8a9e\u7d20\u7684\u7cfb\u7d71\uff0c\u4e5f\u6709\u8bed\u6cd5\u5b66\u7cfb\u7d71\u4f86\u5c07\u8a5e\u53ca\u8a9e\u7d20\u7d44\u5408\u6210\u77ed\u8bed\u548c\u8bdd\u8bed\u3002\n\n\u4eba\u985e\u8a9e\u8a00\u5177\u6709\u521b\u9020\u6027\u548c\u79fb\u4f4d\u6027\uff0c\u5b8c\u5168\u9760\u793e\u6703\u7fd2\u4fd7\u53ca\u5b78\u7fd2\u800c\u4f86\u3002\u8a9e\u8a00\u7684\u8907\u96dc\u7d50\u69cb\u4f7f\u5f97\u5176\u53ef\u8868\u9054\u7684\u7bc4\u570d\u6bd4\u4efb\u4f55\u5df2\u77e5\u7684\u52d5\u7269\u4ea4\u6d41\u7cfb\u7d71\u90fd\u8981\u5ee3\u3002\u4f9d\u5fc3\u7406\u5b78\u7684\u89c0\u9ede\uff0c\u8a9e\u8a00\u662f\u8d77\u6e90\u65bc\u4eba\u65cf\u6709\u4e86\u5f62\u6210\u5fc3\u667a\u7406\u8ad6\u7684\u80fd\u529b\uff0c\u4ee5\u53ca\u6709\u5206\u4eab\u7684\u610f\u5411\uff0c\u4e4b\u5f8c\u6f38\u6f38\u7531\u5176\u539f\u59cb\u7684\u6e9d\u901a\u7cfb\u7d71\u6f14\u8b8a\u800c\u4f86[1][2]\u3002\u9019\u500b\u767c\u5c55\u51fa\u73fe\u7684\u6642\u671f\u4e5f\u548c\u4eba\u985e\u8166\u5bb9\u91cf\u64f4\u5c55\u7684\u6642\u671f\u5927\u81f4\u76f8\u540c\uff0c\u8a31\u591a\u8a9e\u8a00\u5b78\u5bb6\u8a8d\u70ba\u8a9e\u8a00\u7684\u7d50\u69cb\u6709\u56e0\u70ba\u4e00\u4e9b\u7279\u5b9a\u7684\u6e9d\u901a\u53ca\u793e\u6703\u6a5f\u80fd\u800c\u6f14\u8b8a\u3002\u4eba\u8166\u6709\u8a31\u591a\u90e8\u4efd\u6703\u8655\u7406\u8a9e\u8a00\uff0c\u4f46\u4e3b\u8981\u662f\u5728\u5e03\u82e5\u5361\u6c0f\u533a\u53ca\u97e6\u5c3c\u514b\u533a\u3002\u4eba\u985e\u7684\u8bed\u8a00\u4e60\u5f97\u662f\u5728\u7ae5\u5e74\u65e9\u671f\u7684\u793e\u6703\u4e92\u52d5\u4e2d\u5b78\u7fd2\u7684\uff0c\u5c0f\u5b69\u5927\u7d04\u4e09\u6b72\u5c31\u53ef\u4ee5\u6d41\u5229\u7684\u8aaa\u8a71\u4e86\u3002\u8a9e\u8a00\u7684\u4f7f\u7528\u5df2\u6df1\u6df1\u7d2e\u6839\u65bc\u4eba\u985e\u6587\u5316\u4e4b\u4e2d\u3002\u56e0\u6b64\u8a9e\u8a00\u9664\u4e86\u7528\u5728\u6e9d\u901a\u4e0a\uff0c\u4e5f\u6709\u8a31\u591a\u793e\u6703\u53ca\u6587\u5316\u4e0a\u7684\u7528\u9014\uff0c\u4f8b\u5982\u5f37\u5316\u7fa4\u9ad4\u8eab\u4efd\u8ba4\u540c\u53ca\u793e\u6703\u968e\u5c64\uff0c\u4e5f\u7528\u4f86\u793e\u6703\u6027\u68b3\u7406\u53ca\u5a1b\u6a02\u3002\n\n\u8a9e\u8a00\u6703\u96a8\u6642\u9593\u6f14\u8fdb\u4e0e\u5206\u5316\u3002\u5176\u6f14\u5316\u6b77\u53f2\u53ef\u4ee5\u901a\u8fc7\u4e0e\u73b0\u4ee3\u8bed\u8a00\u7684\u6bd4\u8f03\u4f86\u6784\u62df\uff1a\u4ece\u73b0\u4ee3\u8bed\u8a00\u4e2d\u786e\u5b9a\u54ea\u4e9b\u7279\u8d28\u662f\u7956\u8bed\u5b58\u5728\u7684\uff0c\u7531\u6b64\u5728\u8bed\u8a00\u7684\u540e\u7eed\u53d1\u5c55\u9636\u6bb5\u5f97\u4ee5\u5b58\u7eed\u3002\u6709\u5171\u540c\u7956\u8bed\u7684\u4e00\u7ec4\u8a9e\u8a00\u7a31\u70ba\u8bed\u7cfb\u3002\u5370\u6b27\u8bed\u7cfb\u7684\u8bed\u8a00\u5728\u4eca\u5929\u4e3a\u4f7f\u7528\u4eba\u6570\u4e4b\u6700\uff0c\u5176\u4e3b\u8981\u6210\u5458\u6709\u82f1\u8bed\u3001\u4fc4\u8bed\u548c\u5370\u5730\u8bed\u3002\u6c49\u85cf\u8bed\u7cfb\u5305\u62ec\u6c49\u8bed\u8bf8\u65b9\u8a00[\u8a3b 1]\u3001\u85cf\u8bed\u3001\u535a\u591a\u8bed\u7b49\u3002\u4e9a\u975e\u8bed\u7cfb\u5305\u62ec\u963f\u62c9\u4f2f\u8bed\u3001\u7d22\u99ac\u91cc\u8a9e\u53ca\u5e0c\u4f2f\u6765\u8bed\u3002\u73ed\u56fe\u8bed\u652f\u4e2d\u6709\u65af\u74e6\u5e0c\u91cc\u8bed\u3001\u7956\u9c81\u8bed\u548c\u5176\u4ed6\u6578\u767e\u7a2e\u975e\u6d32\u8a9e\u8a00\u3002\u9a6c\u6765\uff0d\u6ce2\u5229\u5c3c\u897f\u4e9a\u8bed\u65cf\u5305\u62ec\u5370\u5c3c\u8bed\u3001\u9a6c\u6765\u8bed\u3001\u4ed6\u52a0\u797f\u8a9e\u53ca\u6570\u767e\u79cd\u904d\u5e03\u592a\u5e73\u6d0b\u7684\u8a9e\u8a00\u3002\u8fbe\u7f57\u6bd7\u837c\u8bed\u7cfb\u4e3b\u8981\u5206\u5e03\u4e8e\u5370\u5ea6\u5357\u90e8\uff0c\u5176\u4e2d\u6709\u6cf0\u7c73\u5c14\u8bed\u548c\u6cf0\u5362\u56fa\u8bed\u3002\u5b78\u8853\u754c\u8ba4\u4e3a\u73b0\u5b58\u53e3\u8bed\u4e2d\u670950%\u81f390%\u6703\u57282100\u5e74\u524d\u6ec5\u7d55[3]\u3002\n", "meta": {"title": "wikipedia_language_zh.txt", "md5": "5be6d87531d4a1b4a2e70fe4ae0ed111"}, "short_title": "\u8a9e\u8a00", "long_title": "\u8a9e\u8a00"}] \ No newline at end of file +[{"text": "THIS IS A TEMPORARY DATABASE.\n\nIt will disappear when you close the application. This database contains a few documents and labels to allow trying out labelbuddy. You can create annotations, import more documents and labels and export annotations, but the database itself will be removed when labelbuddy exits.\n\n(You don't have to actually read this text, try annotating it instead!)\n\nIf you decide to start a real project you want to keep, from the File menu select \"New\" and choose a location on your filesystem to create a persistent database. You can also close labelbuddy and open it again and by default it will open a database in your home directory: ~/labelbuddy_data.sqlite3.\n\nTime to start annotating documents! Select some text with the mouse and click on one of the labels on the left to annotate it.\n\nOnce you have made annotations you can select any of them by clicking it. It becomes underlined and bold and you can then change its label or remove it.\n\nIf you create a new annotation that overlaps with a previously existing one, the existing one is automatically removed.\n\nAnnotations are inserted in the database as you create them so there is no need for a \"Save\" button.\n\nOnce you are done annotating this document, you can click \"Next\" above to move on to another one.\n\nThe next document is the labelbuddy documentation, and then a few extracts from Wikipedia (https://en.wikipedia.org/wiki/Main_Page) to illustrate using labelbuddy with different languages and writing systems. The documentation in HTML format can be seen by clicking \"Help\" > \"Documentation\".\n\nTo see the list of documents, remove documents or labels, or change the labels' colors, go to the \"Dataset\" tab.\n\nTo export your annotations or import more documents or labels, go to the \"Import / Export\" tab.\n\n\nHere is a summary of the keybindings available in this page:\n\nCtrl and scroll the mouse: zoom or dezoom the text\nCtrl+F, /: search\nEnter: next search match\nShift+Enter: previous search match\nCtrl+J, Ctrl+N, Down: scroll down one line\nCtrl+K, Ctrl+P, Up: scroll up one line\nCtrl+D: scroll down one page\nCtrl+U: scroll up one page\nCtrl+L: cycle between placing the cursor at the center, top and bottom of the\na-z (label\u2019s shortcut_key): set corresponding label for the currently selected region or annotation\nBackspace: remove selected annotation\nSpace: jump to next annotation and select it\nShift+Space: jump to previous annotation and select it\nEsc: un-select selected annotation\n]: move the end of the selection by one word to the right\n[: move the end of the selection by one word to the left\n}: move the beginning of the selection by one word to the right\n{: move the beginning of the selection by one word to the left\nCtrl+]: move the end of the selection by one character to the right\nCtrl+[: move the end of the selection by one character to the left\nCtrl+}: move the beginning of the selection by one character to the right\nCtrl+{: move the beginning of the selection by one character to the left\n>: go to next document\n<: go to previous document\n", "meta": {"title": "hello_annotations.txt", "md5": "688b2b588edebecfb567a703453de2e3"}, "short_title": "Welcome to labelbuddy demo!", "long_title": "Welcome to labelbuddy demo!"}, {"text": "\nThis doc is better read with a fixed width font: Preferences > monospace, or Help > documentation for html version\n\nlabelbuddy documentation\n\nJ\u00e9r\u00f4me Dock\u00e8s\njerome@dockes.org\n\nTable of Contents\n\n- 1. Introduction\n - 1.1. labelbuddy vs other annotation tools\n - 1.2. Quick start\n- 2. Using labelbuddy\n - 2.1. Importing documents\n - 2.2. Importing labels\n - 2.3. Annotating documents\n - 2.4. Exporting annotations\n - 2.5. Exporting labels\n - 2.6. Importing annotations\n - 2.7. Managing projects\n - 2.8. Command-line interface\n- 3. Conclusion\n\nThis document describes labelbuddy version 0.0.3.\n\n1. Introduction\n\nlabelbuddy is an open-source desktop GUI application for annotating\ndocuments. It can be used for example for Part Of Speech tagging, Named\nEntity Recognition, sentiment analysis and document classification \u2026\n\nIt aims to be easy to install and use, and can efficiently handle many\ndocuments, labels and annotations.\n\n1.1. labelbuddy vs other annotation tools\n\nThere exist several tools for annotating documents. Most of them, such\nas doccano and labelstudio are meant to run on a web server and be used\nonline. If you are crowdsourcing annotations and want many users to\ncontribute annotations to a central database without installing anything\non their machine you should turn to one of these tools.\n\nHowever if you do not plan to host such a tool on a server, it may not\nbe convenient for each annotator to install one of these rather complex\nprograms and run a local server and database management system on their\nown machine in order to annotate documents. In this case, it may be\neasier to rely on a desktop application such as labelbuddy, which is a\nmore lightweight solution.\n\nlabelbuddy supports the input and output formats of doccano so it is\npossible to switch from one to the other or to combine the work of\nannotators that use either.\n\n1.2. Quick start\n\nStart by installing labelbuddy. Then to give it a try, you can start\nlabelbuddy and open a (temporary) demo database by invoking it as:\n\n labelbuddy --demo\n\n(You can also start labelbuddy without any options and then select\nFile\u00a0\u203a Demo in the GUI.) You can play around with labelbuddy\u2019s features\nin this temporary database. If you decide to start creating annotations\nthat you want to keep, open a new database and import your documents and\nlabels.\n\n+-----------------------------------+-----------------------------------+\n| Note | a labelbuddy database is actually |\n| | just a regular file on your disk |\n| | (an SQLite database). |\n+-----------------------------------+-----------------------------------+\n\n2. Using labelbuddy\n\nDocuments and labels can be imported into a labelbuddy database from\nvarious formats. Once this is done, you can annotate the documents and\nfinally export your annotations. It is also possible to import\nannotations exported from labelbuddy or doccano. Importing and exporting\ndata can be done from the graphical or the command line interface.\n\nDocuments and labels that are already in the database are skipped if you\ntry to import them again.\n\n2.1. Importing documents\n\nIn the \u201cImport / Export\u201d tab, click Import docs & annotations and select\na file.\n\nWhen importing a new document into labelbuddy, several attributes can be\nspecified (exactly how will depend on the format used as described in\nthe following sections):\n\n ------ ------------------------------------------\n text the content of the document \u2013 mandatory.\n ------ ------------------------------------------\n\nAll other attributes are optional:\n\n+-----------------------------------+-----------------------------------+\n| meta | a mapping of user-defined |\n| | metadata. You can use it to |\n| | associate some information with |\n| | the document, for example an |\n| | identifier, DOI, author\u2026 This |\n| | data is not used by labelbuddy. |\n| | It is stored and bundled with the |\n| | document when you export it. |\n+-----------------------------------+-----------------------------------+\n| short_title | displayed in the \u201cAnnotate\u201d tab |\n| | when annotating the document. |\n+-----------------------------------+-----------------------------------+\n| long_title | displayed in the document list in |\n| | the \u201cDataset\u201d tab |\n+-----------------------------------+-----------------------------------+\n| title | - displayed in the \u201cAnnotate\u201d |\n| | tab if short_title is missing |\n| | (if both are missing nothing |\n| | is displayed). |\n| | |\n| | - displayed in the \u201cDataset\u201d |\n| | tab if long_title is missing |\n| | (if both are missing the |\n| | beginning of text is |\n| | displayed). |\n+-----------------------------------+-----------------------------------+\n\n+-----------------------------------+-----------------------------------+\n| Tip | You can use the short_title to |\n| | display essential metadata or |\n| | short instructions specific to a |\n| | document. It can contain links by |\n| | using an html tag. |\n+-----------------------------------+-----------------------------------+\n\nThis information can be provided in several plain text formats. The\nformat is deduced from the filename extension.\n\n2.1.1. From .txt\n\nThe simplest format you can use is a .txt. In this case, the file must\ncontain the text of one document per line. The newlines that separate\ndocuments are not considered part of the document and are discarded.\n\nWhile convenient, this format has some limitations: you cannot specify\nany other document attributes than the text, and the documents cannot\ncontain newlines. Moreover, the file\u2019s encoding will be interpreted\nbased on your locale settings. The other import formats share none of\nthese limitations.\n\n2.1.2. From .json\n\nThe file must be a JSON file containing one JSON array. Each element of\nthe array represents one document. These elements are JSON objects\ncontaining at least the key text, and any of the optional attributes. If\nprovided, meta must be a JSON object containing user data about the\ndocument.\n\nTherefore an imported JSON file might look like:\n\n [\n {\"text\": \"text of first doc\", \"meta\": {\"author\": \"me\", \"DOI\": \"123\"}},\n {\"text\": \"text of second doc\", \"short_title\": \"doc456\"}\n ]\n\nMoreover, it is also possible to import annotations together with a new\ndocument, or for a document already in the database.\n\n+-----------------------------------+-----------------------------------+\n| Note | Besides of the object format |\n| | above, labelbuddy also accepts |\n| | another format, providing one |\n| | JSON array per document. In this |\n| | case the first (mandatory) |\n| | element of the array is the text, |\n| | and the second (optional) one is |\n| | meta. Other attributes cannot be |\n| | specified. |\n| | |\n| | [ |\n| | [\"text of first doc\", { |\n| | \"title\": \"doc 1\", \"DOI\": \"123\"}], |\n| | [\"text of second doc\"] |\n| | ] |\n+-----------------------------------+-----------------------------------+\n\n2.1.3. From .jsonl\n\nWhen importing a .json file the whole file is read into memory before\ninserting the documents in the database. To read documents one by one\nand reduce memory usage, you can use JSON Lines. It is similar to the\nJSON format, but instead of having one JSON array, the file must contain\none JSON document per line. For example:\n\n {\"text\": \"text of first doc\", \"meta\": {\"author\": \"me\", \"DOI\": \"123\"}}\n {\"text\": \"text of second doc\", \"short_title\": \"doc456\"}\n\nNote the outer brackets are removed and the documents are not separated\nby commas. The object representing each document must occupy exactly one\nline, unlike in .json where whitespace is not important.\n\nAs for .json, .jsonl also allows importing annotations.\n\n2.1.4. From .xml\n\nYou can also use a simple XML format. In this case as well, the\ndocuments are read one by one. The root element must be document_set and\ncontain any number of document elements. Each document contains the text\nand any additional information. User metadata is provided in the\nattributes of an element named meta. A document\u2019s children can appear in\nany order.\n\nFor example:\n\n \n \n \n text of first doc\n \n \n \n text of second doc\n doc456\n \n \n\nThe same format can be used to import annotations.\n\n2.2. Importing labels\n\nTo import labels, click Import labels in the \u201cImport / Export\u201d tab.\nLabels have three attributes: a mandatory text (label name), and an\noptional color and shortcut_key. The shortcut_key is a lower-case ASCII\nletter (a-z) that helps quickly annotating text with that label.\n\nFor compatibility with doccano, color can also be specified as\nbackground_color and shortcut_key can be specified as suffix_key.\n\nAs for documents, the format is deduced from the filename extension when\nimporting labels. The label color and shortcut key can be changed from\nwithin the GUI application .\n\n2.2.1. From .txt\n\nThe text file contains one label per line. For example:\n\n Noun\n Verb\n Adjective\n\nTo specify a color or shortcut key (or to use labels that contain\nnewlines), use the .json format.\n\n2.2.2. From .json\n\nThe file must contain a JSON array containing one JSON object per label.\n\nEach label\u2019s object must have the key text and optionally color and\nshortcut_key (or their synonyms background_color and suffix_key, which\nhave lower precedence).\n\nFor example:\n\n [\n {\"text\": \"Noun\", \"color\": \"#ff0000\"},\n {\"text\": \"Verb\", \"color\": \"yellow\", \"shortcut_key\": \"v\"},\n {\"text\": \"Adjective\"}\n ]\n\n+-----------------------------------+-----------------------------------+\n| Note | Besides of the object format |\n| | above, labelbuddy also accepts |\n| | another format, providing one |\n| | JSON array per label. In this |\n| | case the first (mandatory) |\n| | element of the array is the text |\n| | (label name), and the second |\n| | (optional) one is the color. |\n| | |\n| | [ |\n| | [\"Noun\", \"#ff0000\"], |\n| | [\"Verb\", \"yellow\"], |\n| | [\"Adjective\"] |\n| | ] |\n+-----------------------------------+-----------------------------------+\n\n2.3. Annotating documents\n\nOnce you have imported labels and documents you can see them in the\n\u201cDataset\u201d tab. You can delete labels or documents and change the color\nand shortcut associated with each label. You then go to the \u201cAnnotate\u201d\ntab. (If you double-click a document it will be opened in the \u201cAnnotate\u201d\ntab.)\n\nTo annotate a document, select the region you want to label with the\nmouse and click on the appropriate label. It is also possible to do the\nsame thing with the keyboard. Search for the term you want to annotate\nand the first match will be selected. The selection can be adusted with\nthe keyboard using the bindings described below. Then press the shortcut\nkey associated with the label you want to set.\n\nOnce you have created annotations, you can select any of them by\nclicking it. It becomes bold and underlined and you can change its label\nby clicking on a different one or remove the annotation by clicking\nRemove. You can also do this with the keyboard: jump to the next\nannotation with the Space key and change its label with a label shortcut\nor remove it with Backspace.\n\nIf you create a new annotation that overlaps with a previously existing\none, the previously existing one is automatically removed.\n\n+-----------------------------------+-----------------------------------+\n| Tip | If showing the selection in bold |\n| | is annoying (depending on the |\n| | font it can slightly change its |\n| | size) you can disable it in |\n| | Preferences\u00a0\u203a Bold selected |\n| | region. |\n+-----------------------------------+-----------------------------------+\n\n2.3.1. Summary of key bindings in the \u201cAnnotate\u201d tab\n\n+----------------------+-----------------------------------------------+\n| Searching and | |\n| navigation | |\n+======================+===============================================+\n| Ctrl and scroll the | zoom or dezoom the text |\n| mouse | |\n+----------------------+-----------------------------------------------+\n| Ctrl+F, / | search |\n+----------------------+-----------------------------------------------+\n| Enter | next search match |\n+----------------------+-----------------------------------------------+\n| Shift+Enter | previous search match |\n+----------------------+-----------------------------------------------+\n| Ctrl+J, Ctrl+N, Down | scroll down one line |\n+----------------------+-----------------------------------------------+\n| Ctrl+K, Ctrl+P, Up | scroll up one line |\n+----------------------+-----------------------------------------------+\n| Ctrl+D | scroll down one page |\n+----------------------+-----------------------------------------------+\n| Ctrl+U | scroll up one page |\n+----------------------+-----------------------------------------------+\n| Ctrl+L | cycle between placing the cursor at the |\n| | center, top and bottom of the window |\n+----------------------+-----------------------------------------------+\n\n+----------------------+-----------------------------------------------+\n| Manipulating | |\n| annotations | |\n+======================+===============================================+\n| a-z (label\u2019s | set corresponding label for the currently |\n| shortcut_key) | selected region or annotation |\n+----------------------+-----------------------------------------------+\n| Backspace | remove selected annotation |\n+----------------------+-----------------------------------------------+\n| Space | jump to next annotation and select it |\n+----------------------+-----------------------------------------------+\n| Shift+Space | jump to previous annotation and select it |\n+----------------------+-----------------------------------------------+\n| Esc | un-select selected annotation |\n+----------------------+-----------------------------------------------+\n\n+----------------------+-----------------------------------------------+\n| Manipulating the | |\n| text selection | |\n+======================+===============================================+\n| ] | move the end of the selection by one word to |\n| | the right |\n+----------------------+-----------------------------------------------+\n| [ | move the end of the selection by one word to |\n| | the left |\n+----------------------+-----------------------------------------------+\n| } | move the beginning of the selection by one |\n| | word to the right |\n+----------------------+-----------------------------------------------+\n| { | move the beginning of the selection by one |\n| | word to the left |\n+----------------------+-----------------------------------------------+\n| Ctrl+] | move the end of the selection by one |\n| | character to the right |\n+----------------------+-----------------------------------------------+\n| Ctrl+[ | move the end of the selection by one |\n| | character to the left |\n+----------------------+-----------------------------------------------+\n| Ctrl+} | move the beginning of the selection by one |\n| | character to the right |\n+----------------------+-----------------------------------------------+\n| Ctrl+{ | move the beginning of the selection by one |\n| | character to the left |\n+----------------------+-----------------------------------------------+\n\n+----------------------+-----------------------------------------------+\n| Navigating documents | |\n+======================+===============================================+\n| > | go to next document |\n+----------------------+-----------------------------------------------+\n| < | go to previous document |\n+----------------------+-----------------------------------------------+\n\n2.4. Exporting annotations\n\nOnce you are satisfied with your annotations you can export them to an\n.json, .jsonl or .xml file to share them or use them in other\napplications.\n\nBack in the \u201cImport / Export\u201d tab, click Export docs & annotations. You\ncan choose to export all documents or only those that have annotations.\nYou can choose to export the text of the documents or not. If you don\u2019t\nexport the text, the documents can be identified from metadata you may\nhave associated with them, or by the MD5 checksum of the text that is\nalways exported. You can also provide an \u201cannotation approver\u201d (user\nname), that will be exported as the annotation_approver (used by\ndoccano).\n\nWhen clicking Export docs & annotations you are asked to select a file\nand the resulting format will depend on the filename extension. The\nexport format is the same as the import format. Exported documents and\nannotations can thus be imported back into a labelbuddy database.\n\nCompared to previous description of the import format, in exported\ndocuments:\n\n- text is optional (you can choose not to export it to save space, in\n this case documents can be identified from their MD5 checksum or\n from the user metadata).\n\n- document_md5_checksum (containing the hex representation of the MD5\n checksum of the text) and labels (containing the document\u2019s\n annotations) are added. The optional attribute annotation_approver\n can also be added.\n\nlabels is a list of annotations, each represented by a triplet of:\n\n ------------ -----------------------------------------------------------------------------------\n start_char the position of the first character (starting from 0 at the begining of the text)\n end_char the position of one past the last character\n label the label name.\n ------------ -----------------------------------------------------------------------------------\n\nFor example if the text starts with \u201chello\u201d and you highlighted exactly\nthat word, and labelled it with label_1, the associated annotation will\nbe [0, 5, \"label_1\"].\n\n+-----------------------------------+-----------------------------------+\n| Note | Documents are exported in the |\n| | same order that they were |\n| | imported. |\n+-----------------------------------+-----------------------------------+\n\n2.4.1. Exporting to .json\n\nJSON exported annotations might look like:\n\n [\n {\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"f5a42de39848dbdadf79aade46135b7a\",\"labels\":[[0,4,\"Noun\"]],\"meta\":{\"DOI\":\"123\",\"author\":\"me\"},\"text\":\"text of first doc\"},\n {\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"d5c080bd4c6033f977182e757a0059b1\",\"labels\":[[0,4,\"Verb\"],[8,14,\"Adjective\"]],\"meta\":{}, \"text\":\"text of second doc\", \"short_title\": \"doc456\"}\n ]\n\nEach document will always be on one separate line; this makes it easy to\nparse the file incrementally. Moreover as the documents are always in\nthe same order, it gives line-oriented tools such as diff or git a\nbetter chance of producing useful output.\n\n2.4.2. Exporting to .jsonl\n\nIf you choose to export to a JSON lines file, the content will be almost\nthe same as the JSON one, but with just one JSON object per line and not\none JSON array containing all the documents:\n\n {\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"f5a42de39848dbdadf79aade46135b7a\",\"labels\":[[0,4,\"Noun\"]],\"meta\":{\"DOI\":\"123\",\"author\":\"me\"},\"text\":\"text of first doc\"}\n {\"annotation_approver\":\"jerome\",\"document_md5_checksum\":\"d5c080bd4c6033f977182e757a0059b1\",\"labels\":[[0,4,\"Verb\"],[8,14,\"Adjective\"]],\"meta\":{}, \"text\":\"text of second doc\", \"short_title\": \"doc456\"}\n\n2.4.3. Exporting to .xml\n\nIf you choose a .xml file the result is a UTF-8 encoded XML document.\nAgain the format is the same as for importing with some additional\nelements (and possibly no text).\n\nSo it may look like:\n\n \n \n \n text of first doc\n f5a42de39848dbdadf79aade46135b7a\n \n jerome\n \n \n 0\n 4\n \n \n \n \n \n text of second doc\n d5c080bd4c6033f977182e757a0059b1\n \n jerome\n doc456\n \n \n 0\n 4\n \n \n \n 8\n 14\n \n \n \n \n \n\n2.5. Exporting labels\n\nYou can also export labels by clicking \u201cExport labels\u201d and selecting a\nJSON file. The resulting file will contain an array of json objects; the\nsame format as the input format. Unlike documents each label is not on a\nsingle line. Both keys color and background_color are set to the label\u2019s\ncolors. Both keys shortcut_key and suffix_key are set to the label\u2019s\nshortcut key if it has one.\n\n2.6. Importing annotations\n\nExported annotations can be imported back into the same or another\nlabelbuddy database. Simply use the Import docs & annotations button and\nselect the exported file. Labels used in the annotations that are not in\nthe database will be added (with an arbitrary color that can be changed\nin the application).\n\nFor documents already in the database, annotations will be imported\nwhether the document\u2019s text was exported together with the annotations\nor not. If the text is not present in the exported file, the MD5\nchecksum will be used to associate the annotations with the correct\ndocument.\n\nTo avoid mixing annotations from different sources, if the document\nalready contains annotations in the database, the new annotations will\nnot be added.\n\nFor documents that are not in the database, their text must have been\nexported together with the annotations and in this case both the\ndocument and the annotations will be added to the database.\n\n2.6.1. Copying annotations to and from doccano\n\nDocuments and annotations exported from doccano can also be imported\ninto a labelbuddy database. To do so, when exporting from doccano select\nthe format \u201cjsonl (text label)\u201d. Make sure to save them in a file with\nthe .jsonl extension (not .json) otherwise labelbuddy will try to parse\nit as JSON and JSON Lines is not valid JSON.\n\n+-----------------------------------+-----------------------------------+\n| Caution | doccano strips leading and |\n| | trailing whitespace from |\n| | documents when importing them. |\n| | Therefore if you import the |\n| | result into a labelbuddy database |\n| | that already contains the |\n| | original documents, it may not be |\n| | recognized as being the same |\n| | (labelbuddy doesn\u2019t modify the |\n| | imported documents) and you might |\n| | end up with (near) duplicate |\n| | documents in the database. |\n+-----------------------------------+-----------------------------------+\n\nAnnotations exported from labelbuddy in the .jsonl format together with\nthe document\u2019s text can also be imported into doccano (selecting the\n\u201cjsonl\u201d import format).\n\n+-----------------------------------+-----------------------------------+\n| Caution | if the original document |\n| | contained leading whitespace, |\n| | labelbuddy annotations will |\n| | appear shifted when doccano |\n| | removes the whitespace. Moreover, |\n| | doccano allows duplicate |\n| | documents so if the documents |\n| | were already in the doccano |\n| | database, they will appear as new |\n| | (duplicate) documents rather than |\n| | new annotations for existing |\n| | documents. |\n+-----------------------------------+-----------------------------------+\n\n2.7. Managing projects\n\nEach labelbuddy database (containing documents, labels and annotations)\nis an SQLite database. That is a single binary file on your disk that\nyou can copy, backup, or share, like any other file.\n\n+-----------------------------------+-----------------------------------+\n| Tip | Using SQLite you can also open a |\n| | connection directly to the |\n| | database to query it or even |\n| | modify it. If you do so, set |\n| | PRAGMA foreign_keys = ON. |\n+-----------------------------------+-----------------------------------+\n\nWhen you first start labelbuddy it creates a new database in\n~/labelbuddy_data.sqlite3. You can switch to a different one by\nselecting File\u00a0\u203a Open or File\u00a0\u203a New. The path to the current database is\ndisplayed in the \u201cImport / Export\u201d tab.\n\nThe next time you start labelbuddy, it will open the last database that\nyou opened.\n\nThe database to open can also be specified when invoking labelbuddy from\nthe command line:\n\n labelbuddy /path/to/my_annotations.sqlite3\n\nIf you just want to give labelbuddy a try and don\u2019t have documents or\nlabels yet, you can also select File\u00a0\u203a Demo to open a temporary database\npre-loaded with a few examples.\n\nAs it is easy to create and delete databases (an empty labelbuddy\ndatabase is just 48K), and to copy documents, labels and annotations\nfrom one to another, you have some freedom in the organization of\nannotation work. In particular, it is possible to work with several\ndatabases rather than a monolithic one. You can break down the\nannotations into several files to reflect the structure of your project.\nAs an example you could also create a new database to annotate a fresh\nbatch of documents, then export and merge into a main database once that\nbatch is finished.\n\n2.8. Command-line interface\n\nlabelbuddy can also be used from the command line to create databases,\nimport and export documents, labels and annotations without opening the\nGUI. See the labelbuddy(1) man page, or labelbuddy -h for a short list\nof options reproduced here:\n\n Usage: ./labelbuddy [options] database\n Annotate documents.\n\n Options:\n -h, --help Displays this help.\n -v, --version Displays version information.\n --demo Open a temporary demo database\n with pre-loaded docs\n --import-labels Labels file to import in database.\n --import-docs Docs & annotations file to import\n in database.\n --export-labels Labels file to export to.\n --export-docs Docs & annotations file to export\n to.\n --labelled-only Export only labelled documents\n --include-text Include doc text with exported\n annotations\n --approver User or 'annotations approver'\n name\n --vacuum Repack database into minimal\n amount of disk space.\n\n Arguments:\n database Database to open.\n\nIf any of the import- or export- options are used, labelbuddy doesn\u2019t\nstart a GUI but performs the required import or export operations and\nexits. It is possible to specify these options several times. To use\nthese options, the database path must be provided explicitly. Labels are\nimported first, then documents, then export operations are performed.\n\nAs an example, to convert a previously exported file docs.xml to JSON\nand strip the documents' text, you could run:\n\n tmpdb=$(mktemp) && labelbuddy $tmpdb --import-docs docs.xml --export-docs docs.json; rm $tmpdb\n\nRegarding vacuum: when data is deleted from an sqlite3 database, the\nfile doesn\u2019t shrink. The freed up space is not lost; it is kept and\nreused when new data is added to the database. To shrink the database to\noccupy a minimal amount of disk space, we can use:\n\n labelbuddy --vacuum /path/to/db.sqlite3\n\nor equivalently:\n\n sqlite3 /path/to/db.sqlite3 'VACUUM;'\n\nSee more details here. When the vacuum option is used, other options are\nignored and labelbuddy shrinks the database then exits without starting\nthe GUI.\n\n3. Conclusion\n\nlabelbuddy was created using C++, Qt, SQLite, tools from the GNU\nproject, and more.\n\nIf you find a bug, kindly open an issue on the labelbuddy GitHub\nrepository.\n\n", "meta": {"title": "documentation.txt", "md5": "70c140cd8530ac34414edf16f042360f"}, "short_title": "labelbuddy documentation \u2014 online version", "long_title": "labelbuddy documentation"}, {"text": "\nExtract from Wikipedia:\nhttps://ar.wikipedia.org/wiki/%D9%84%D8%BA%D8%A9\n\n\u0627\u0644\u0644\u063a\u0629 \u0647\u064a \u0646\u0633\u0642 \u0639\u0644\u0649 \u0645\u0646 \u0627\u0644\u0625\u0634\u0627\u0631\u0627\u062a \u0648\u0627\u0644\u0631\u0645\u0648\u0632\u060c \u062a\u0634\u0643\u0644 \u0623\u062f\u0627\u0629 \u0645\u0646 \u0623\u062f\u0648\u0627\u062a \u0627\u0644\u0645\u0639\u0631\u0641\u0629\u060c \u0648\u062a\u0639\u062a\u0628\u0631 \u0627\u0644\u0644\u063a\u0629 \u0623\u0647\u0645 \u0648\u0633\u0627\u0626\u0644 \u0627\u0644\u062a\u0641\u0627\u0647\u0645 \u0648\u0627\u0644\u0627\u062d\u062a\u0643\u0627\u0643 \u0628\u064a\u0646 \u0623\u0641\u0631\u0627\u062f \u0627\u0644\u0645\u062c\u062a\u0645\u0639 \u0641\u064a \u062c\u0645\u064a\u0639 \u0645\u064a\u0627\u062f\u064a\u0646 \u0627\u0644\u062d\u064a\u0627\u0629.\n\n\u0648\u0628\u062f\u0648\u0646 \u0627\u0644\u0644\u063a\u0629 \u064a\u062a\u0639\u0630\u0631 \u0646\u0634\u0627\u0637 \u0627\u0644\u0646\u0627\u0633 \u0627\u0644\u0645\u0639\u0631\u0641\u064a. \u062a\u0631\u062a\u0628\u0637 \u0627\u0644\u0644\u063a\u0629 \u0628\u0627\u0644\u062a\u0641\u0643\u064a\u0631 \u0627\u0631\u062a\u0628\u0627\u0637\u064b\u0627 \u0648\u062b\u064a\u0642\u064b\u0627\u061b \u0641\u0623\u0641\u0643\u0627\u0631 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u062a\u0635\u0627\u063a \u062f\u0648\u0645\u064b\u0627 \u0641\u064a \u0642\u0627\u0644\u0628 \u0644\u063a\u0648\u064a\u060c \u062d\u062a\u0649 \u0641\u064a \u062d\u0627\u0644 \u062a\u0641\u0643\u064a\u0631\u0647 \u0627\u0644\u0628\u0627\u0637\u0646\u064a. \u0648\u0645\u0646 \u062e\u0644\u0627\u0644 \u0627\u0644\u0644\u063a\u0629 \u062a\u062d\u0635\u0644 \u0627\u0644\u0641\u0643\u0631\u0629 \u0641\u0642\u0637 \u0639\u0644\u0649 \u0648\u062c\u0648\u062f\u0647\u0627 \u0627\u0644\u0648\u0627\u0642\u0639\u064a. \u0643\u0645\u0627 \u062a\u0631\u0645\u0632 \u0627\u0644\u0644\u063a\u0629 \u0625\u0644\u0649 \u0627\u0644\u0623\u0634\u064a\u0627\u0621 \u0627\u0644\u0645\u0646\u0639\u0643\u0633\u0629 \u0641\u064a\u0647\u0627\u060c \u0641\u0627\u0644\u0644\u063a\u0629 \u0647\u064a \u0627\u0644\u0642\u062f\u0631\u0629 \u0639\u0644\u0649 \u0627\u0643\u062a\u0633\u0627\u0628 \u0648\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0646\u0638\u0627\u0645 \u0645\u0639\u0642\u062f \u0644\u0644\u062a\u0648\u0627\u0635\u0644 \u0648\u062e\u0627\u0635\u0629 \u0642\u062f\u0631\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u0639\u0644\u0649 \u0627\u0644\u0642\u064a\u0627\u0645 \u0628\u0630\u0644\u0643\u060c \u0648\u0627\u0644\u0644\u063a\u0629 \u0647\u064a \u0623\u062d\u062f \u0627\u0644\u0623\u0645\u062b\u0644\u0629 \u0627\u0644\u0645\u062d\u062f\u062f\u0629 \u0645\u0646 \u0647\u0630\u0627 \u0627\u0644\u0646\u0638\u0627\u0645\u060c \u0648\u062a\u0633\u0645\u0649 \u0627\u0644\u062f\u0631\u0627\u0633\u0629 \u0627\u0644\u0639\u0644\u0645\u064a\u0629 \u0644\u0644\u063a\u0629 \u0628\u0639\u0644\u0645 \u0627\u0644\u0644\u063a\u0648\u064a\u0627\u062a.\n\n\u0647\u0646\u0627\u0643 \u062a\u0633\u0627\u0624\u0644\u0627\u062a \u062d\u0648\u0644 \u0641\u0644\u0633\u0641\u0629 \u0627\u0644\u0644\u063a\u0629 \u0646\u0648\u0642\u0634\u062a \u0645\u0646 \u0642\u0628\u0644 \u062c\u0648\u0631\u062c\u064a\u0627\u0633 \u0648\u0628\u0644\u0627\u062a\u0648 \u0641\u064a \u0627\u0644\u064a\u0648\u0646\u0627\u0646 \u0627\u0644\u0642\u062f\u064a\u0645\u0629 \u0645\u062b\u0644 \u0645\u0627 \u0625\u0630\u0627 \u0643\u0627\u0646 \u0644\u0644\u0643\u0644\u0645\u0627\u062a \u064a\u0645\u0643\u0646 \u0623\u0646 \u062a\u0639\u0628\u0631 \u0639\u0646 \u062e\u0628\u0631\u0629 \u0645\u0627\u060c \u0641\u064a\u0642\u0648\u0644 \u0628\u0639\u0636 \u0645\u0646 \u0627\u0644\u0645\u0641\u0643\u0631\u064a\u0646 \u0645\u062b\u0644 \u0631\u0648\u0633\u0648 \u0623\u0646 \u0627\u0644\u0644\u063a\u0629 \u0646\u0634\u0626\u062a \u0645\u0646 \u0627\u0644\u0639\u0648\u0627\u0637\u0641\u060c \u0628\u064a\u0646\u0645\u0627 \u0622\u062e\u0631\u0648\u0646 \u0645\u062b\u0644 \u0643\u0627\u0646\u062a \u064a\u0631\u0649 \u0623\u0646\u0647\u0627 \u0646\u0634\u0626\u062a \u0645\u0646 \u0627\u0644\u062a\u0641\u0643\u064a\u0631 \u0627\u0644\u0639\u0642\u0644\u0627\u0646\u064a \u0648\u0627\u0644\u0645\u0646\u0637\u0642\u064a\u060c \u0648\u0645\u0646 \u0641\u0644\u0627\u0633\u0641\u0629 \u0627\u0644\u0642\u0631\u0646 \u0627\u0644\u0639\u0634\u0631\u064a\u0646 \u0645\u062b\u0644 \u0648\u064a\u062a\u064a\u0646\u0633\u062a\u0627\u064a\u0646 \u0642\u062f \u0642\u0627\u0644 \u0628\u0623\u0646 \u0627\u0644\u0641\u0644\u0633\u0641\u0629 \u0647\u064a \u062d\u0642\u0627\u064b \u062f\u0631\u0627\u0633\u0629 \u0627\u0644\u0644\u063a\u0629\u060c \u0648\u062a\u0634\u0645\u0644 \u0627\u0644\u0634\u062e\u0635\u064a\u0627\u062a \u0627\u0644\u0631\u0626\u064a\u0633\u064a\u0629 \u0641\u064a \u0639\u0644\u0645 \u0627\u0644\u0644\u063a\u0648\u064a\u0627\u062a \u0641\u0631\u062f\u064a\u0646\u0627\u0646\u062f \u062f\u064a \u0633\u0648\u0633\u064a\u0631\u060c \u0648\u0646\u0648\u0645 \u0646\u0634\u0648\u0645\u0633\u0643\u064a\u060c \u0648\u0648\u064a\u0644\u064a\u0627\u0645 \u0633\u062a\u0648\u0643\u064a\u0648.\n\n\u064a\u062a\u0641\u0627\u0648\u062a \u062a\u0642\u062f\u064a\u0631 \u0639\u062f\u062f \u0627\u0644\u0644\u063a\u0627\u062a \u0641\u064a \u0627\u0644\u0639\u0627\u0644\u0645 \u0628\u064a\u0646 5000 \u06487000 \u0644\u063a\u0629\u060c \u0648\u0645\u0639 \u0630\u0644\u0643 \u0641\u0625\u0646 \u0623\u064a \u062a\u0642\u062f\u064a\u0631 \u062f\u0642\u064a\u0642 \u064a\u0639\u062a\u0645\u062f \u062c\u0632\u0626\u064a\u0627 \u0639\u0644\u0649 \u0627\u0644\u062a\u0645\u064a\u064a\u0632 \u0627\u0644\u062a\u0639\u0633\u0641\u064a \u0628\u064a\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0648\u0627\u0644\u0644\u0647\u062c\u0627\u062a\u060c \u0641\u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0637\u0628\u064a\u0639\u064a\u0629 \u062a\u0643\u0648\u0646 \u0625\u0645\u0627 \u0644\u063a\u0629 \u0645\u0646\u0637\u0648\u0642\u0629 \u0623\u0648 \u0644\u063a\u0629 \u0627\u0644\u0625\u0634\u0627\u0631\u0629\u060c \u0648\u0644\u0643\u0646 \u064a\u0643\u0646 \u062a\u0631\u0645\u064a\u0632 \u0623\u064a \u0644\u063a\u0629 \u0625\u0644\u0649 \u0648\u0633\u0627\u0626\u0644 \u0627\u0644\u0625\u0639\u0644\u0627\u0645 \u0627\u0644\u062b\u0627\u0646\u0648\u064a\u0629 \u0628\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0627\u0644\u0645\u0646\u0628\u0647\u0627\u062a \u0627\u0644\u0633\u0645\u0639\u064a\u0629 \u0648\u0627\u0644\u0628\u0635\u0631\u064a\u0629 \u0623\u0648 \u0627\u0644\u0644\u0645\u0633\u064a\u0629 \u0639\u0644\u0649 \u0633\u0628\u064a\u0644 \u0627\u0644\u0645\u062b\u0627\u0644\u060c \u0641\u064a \u0627\u0644\u0643\u062a\u0627\u0628\u0629 \u0627\u0644\u062a\u0635\u0648\u064a\u0631\u064a\u0629\u060c \u0623\u0648 \u0637\u0631\u064a\u0642\u0629 \u0628\u0631\u064a\u0644 \u0644\u0644\u0645\u0643\u0641\u0648\u0641\u064a\u0646\u060c \u0623\u0648 \u0627\u0644\u0635\u0641\u064a\u0631\u060c \u0648\u0647\u0630\u0627 \u0644\u0623\u0646 \u0644\u063a\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u0647\u064a \u0644\u063a\u0629 \u0645\u0633\u062a\u0642\u0644\u0629. \u0648\u064a\u0645\u0643\u0646 \u0644\u0645\u0635\u0637\u0644\u062d \"\u0627\u0644\u0644\u063a\u0629\" \u0639\u0646\u062f\u0645\u0627 \u064a\u0633\u062a\u062e\u062f\u0645 \u0639\u0644\u0649 \u0627\u0644\u0645\u0641\u0647\u0648\u0645 \u0627\u0644\u0639\u0627\u0645 \u0623\u0646 \u064a\u0631\u062c\u0639 \u0625\u0644\u0649 \u0627\u0644\u0642\u062f\u0631\u0629 \u0627\u0644\u0625\u062f\u0631\u0627\u0643\u064a\u0629 \u0644\u062a\u0639\u0644\u064a\u0645 \u0648\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0646\u0638\u0627\u0645 \u0627\u0644\u062a\u0648\u0627\u0635\u0644 \u0627\u0644\u0645\u0639\u0642\u062f\u060c \u0623\u0648 \u0625\u0644\u0649 \u0648\u0635\u0641 \u0645\u062c\u0645\u0648\u0639\u0629 \u0645\u0646 \u0627\u0644\u0642\u0648\u0627\u0639\u062f \u0627\u0644\u062a\u064a \u062a\u0634\u0643\u0644 \u0647\u0630\u0647 \u0627\u0644\u0623\u0646\u0638\u0645\u0629\u060c \u0623\u0648 \u0645\u062c\u0645\u0648\u0639\u0629 \u0645\u0646 \u0627\u0644\u062a\u0635\u0631\u064a\u062d\u0627\u062a \u0627\u0644\u062a\u064a \u064a\u0645\u0643\u0646 \u0623\u0646 \u062a\u0646\u062a\u062c \u0645\u0646 \u062a\u0644\u0643 \u0627\u0644\u0642\u0648\u0627\u0639\u062f \u0627\u0639\u062a\u0645\u0627\u062f\u0627 \u0639\u0644\u0649 \u0648\u062c\u0647\u0627\u062a \u0627\u0644\u0646\u0638\u0631 \u0627\u0644\u0641\u0644\u0633\u0641\u064a\u0629 \u0627\u0644\u0645\u062a\u0639\u0644\u0642\u0629 \u0628\u062a\u0639\u0631\u064a\u0641 \u0627\u0644\u0644\u063a\u0629 \u0648\u0627\u0644\u0645\u0639\u0646\u0649\u060c \u0641\u062a\u0639\u062a\u0645\u062f \u062c\u0645\u064a\u0639 \u0627\u0644\u0644\u063a\u0627\u062a \u0639\u0644\u0649 \u0639\u0645\u0644\u064a\u0629 \u0635\u064a\u0631\u0648\u0631\u0629 \u0627\u0644\u0639\u0644\u0627\u0645\u0627\u062a \u0644\u062a\u0634\u064a\u0631 \u0625\u0644\u0649 \u0645\u0639\u0627\u0646\u064a \u0645\u0639\u064a\u0646\u0629 \u0641\u062a\u062d\u062a\u0648\u064a \u0627\u0644\u0644\u063a\u0629 \u0627\u0644\u0645\u0634\u0644\u0641\u0647\u0629 \u0623\u0648 \u0627\u0644\u0645\u0646\u0637\u0648\u0642\u0629 \u0648\u0644\u063a\u0629 \u0627\u0644\u0625\u0634\u0627\u0631\u0629 \u0639\u0644\u0649 \u0646\u0638\u0627\u0645 \u0635\u0648\u062a\u064a \u064a\u062a\u062d\u0643\u0645 \u0628\u0643\u064a\u0641\u064a\u0629 \u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0627\u0644\u0631\u0645\u0648\u0632 \u0644\u062a\u0634\u0643\u064a\u0644 \u0633\u0644\u0627\u0633\u0644 \u062a\u0639\u0631\u0641 \u0628\u0627\u0633\u0645 \u0627\u0644\u0643\u0644\u0645\u0627\u062a \u0623\u0648 \u0627\u0644\u0635\u0631\u0641\u064a\u0629\u060c \u0648\u0646\u0638\u0627\u0645 \u0646\u062d\u0648\u064a \u0648\u0627\u0644\u0630\u064a \u064a\u062a\u062d\u0643\u0645 \u0628\u0643\u064a\u0641\u064a\u0629 \u0627\u0644\u062c\u0645\u0639 \u0628\u064a\u0646 \u0627\u0644\u0643\u0644\u0645\u0627\u062a \u0648\u0627\u0644\u0635\u0631\u0641 \u0644\u062a\u0634\u0643\u0644 \u0627\u0644\u0639\u0628\u0627\u0631\u0627\u062a \u0648\u0627\u0644\u0623\u0642\u0648\u0627\u0644.\n\n\u0644\u062f\u0649 \u0644\u063a\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u062e\u0635\u0627\u0626\u0635 \u0639\u062f\u064a\u062f\u0629 \u0643\u0627\u0644\u0625\u0646\u062a\u0627\u062c\u064a\u0629\u060c \u0648\u0627\u0644\u062a\u0643\u0631\u0627\u0631\u060c \u0648\u0627\u0644\u0625\u0632\u0627\u062d\u0629\u060c \u0648\u0627\u0644\u062a\u064a \u062a\u0639\u062a\u0645\u062f \u0639\u0644\u0649 \u0627\u0644\u0627\u062a\u0641\u0627\u0642\u064a\u0629 \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0648\u0627\u0644\u062a\u0639\u0644\u0645. \u0641\u0647\u064a \u0628\u0646\u064a\u0629 \u0645\u0639\u0642\u062f\u0629 \u062a\u062a\u064a\u062d \u0627\u0644\u062d\u0635\u0648\u0644 \u0639\u0644\u0649 \u0645\u062c\u0645\u0648\u0639\u0629 \u0623\u0648\u0633\u0639 \u0628\u0643\u062b\u064a\u0631 \u0645\u0646 \u0627\u0644\u0639\u0628\u0627\u0631\u0627\u062a \u0645\u0646 \u0623\u064a \u0646\u0638\u0627\u0645 \u0645\u0639\u0631\u0648\u0641 \u0645\u0646 \u0627\u0644\u062a\u0648\u0627\u0635\u0644 \u0627\u0644\u062d\u064a\u0648\u0627\u0646\u064a\u060c \u0648\u064a\u0639\u062a\u0642\u062f \u0623\u0646 \u0627\u0644\u0644\u063a\u0629 \u0642\u062f \u0646\u0634\u0623\u062a \u0639\u0646\u062f\u0645\u0627 \u0628\u062f\u0623\u062a \u0643\u0627\u0626\u0646\u0627\u062a \u0634\u0628\u064a\u0647\u0629 \u0628\u0627\u0644\u0625\u0646\u0633\u0627\u0646 \u0641\u064a \u0648\u0642\u062a \u0645\u0628\u0643\u0631 \u0628\u0627\u0644\u062a\u063a\u064a\u064a\u0631 \u062a\u062f\u0631\u064a\u062c\u064a\u0627 \u0641\u064a \u0646\u0638\u0645 \u0627\u0644\u062a\u0648\u0627\u0635\u0644 \u0627\u0644\u0631\u0626\u064a\u0633\u064a\u0629\u060c \u0648\u0627\u0643\u062a\u0633\u0627\u0628 \u0627\u0644\u0642\u062f\u0631\u0629 \u0639\u0644\u0649 \u062a\u0634\u0643\u064a\u0644 \u0646\u0638\u0631\u064a\u0629 \u0627\u0644\u0639\u0642\u0648\u0644 \u0627\u0644\u0623\u062e\u0631\u0649 \u0648\u0627\u0644\u0642\u0635\u062f \u0627\u0644\u0645\u0634\u062a\u0631\u0643\u060c \u0648\u064a\u0639\u062a\u0642\u062f \u0623\u0646 \u0647\u0630\u0627 \u0627\u0644\u062a\u0637\u0648\u0631 \u0641\u064a \u0628\u0639\u0636 \u0627\u0644\u0623\u062d\u064a\u0627\u0646 \u0642\u062f \u062a\u0632\u0627\u0645\u0646 \u0645\u0639 \u0632\u064a\u0627\u062f\u0629 \u0641\u064a \u062d\u062c\u0645 \u0627\u0644\u0645\u062e\u060c \u0648\u064a\u0631\u0649 \u0627\u0644\u0643\u062b\u064a\u0631 \u0645\u0646 \u0627\u0644\u0644\u063a\u0648\u064a\u064a\u0646 \u0647\u064a\u0627\u0643\u0644 \u0627\u0644\u0644\u063a\u0629 \u0628\u0623\u0646\u0647\u0627 \u062a\u0637\u0648\u0631\u062a \u0644\u062a\u062e\u062f\u0645 \u0648\u0638\u0627\u0626\u0641 \u062a\u0648\u0627\u0635\u0644\u064a\u0629 \u0648\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0645\u062d\u062f\u062f\u0629\u060c \u0641\u062a\u062a\u0645 \u0645\u0639\u0627\u0644\u062c\u0629 \u0627\u0644\u0644\u063a\u0629 \u0641\u064a \u0627\u0644\u0639\u062f\u064a\u062f \u0645\u0646 \u0627\u0644\u0645\u0648\u0627\u0642\u0639 \u0627\u0644\u0645\u062e\u062a\u0644\u0641\u0629 \u0641\u064a \u0627\u0644\u062f\u0645\u0627\u063a \u0627\u0644\u0628\u0634\u0631\u064a\u060c \u0648\u0644\u0643\u0646 \u062e\u0635\u0648\u0635\u0627 \u0641\u064a \u0645\u0646\u0627\u0637\u0642 \u0628\u0631\u0648\u0643\u0627 \u0648\u0641\u064a\u0631\u0646\u064a\u0643\u060c \u0648\u064a\u0643\u062a\u0633\u0628 \u0627\u0644\u0628\u0634\u0631 \u0627\u0644\u0644\u063a\u0629 \u0645\u0646 \u062e\u0644\u0627\u0644 \u0627\u0644\u062a\u0641\u0627\u0639\u0644 \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a \u0641\u064a \u0645\u0631\u062d\u0644\u0629 \u0627\u0644\u0637\u0641\u0648\u0644\u0629 \u0627\u0644\u0645\u0628\u0643\u0631\u0629\u060c \u0648\u064a\u062a\u062d\u062f\u062b \u0627\u0644\u0623\u0637\u0641\u0627\u0644 \u0639\u0645\u0648\u0645\u0627 \u0628\u0637\u0644\u0627\u0642\u0629 \u0639\u0646\u062f\u0645\u0627 \u064a\u0628\u0644\u063a\u0648\u0646 \u0645\u0627 \u064a\u0642\u0631\u0628 \u0627\u0644\u062b\u0644\u0627\u062b \u0633\u0646\u0648\u0627\u062a \u0645\u0646 \u0627\u0644\u0639\u0645\u0631\u060c \u0648\u0627\u0633\u062a\u062e\u062f\u0627\u0645 \u0627\u0644\u0644\u063a\u0629 \u0645\u062a\u0623\u0635\u0644 \u0641\u064a \u062b\u0642\u0627\u0641\u0629 \u0627\u0644\u0625\u0646\u0633\u0627\u0646\u060c \u0628\u0627\u0644\u0625\u0636\u0627\u0641\u0629 \u0625\u0644\u0649 \u0627\u0633\u062a\u062e\u062f\u0627\u0645\u0627\u062a\u0647 \u0644\u0644\u062a\u0648\u0627\u0635\u0644 \u0628\u0634\u0643\u0644 \u0635\u0627\u0631\u0645\u060c \u0648\u0623\u064a\u0636\u0627 \u0644\u0644\u063a\u0647 \u0627\u0644\u0639\u062f\u064a\u062f \u0645\u0646 \u0627\u0644\u0627\u0633\u062a\u062e\u062f\u0627\u0645\u0627\u062a \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0648\u0627\u0644\u062b\u0642\u0627\u0641\u064a\u0629\u060c \u0645\u062b\u0644 \u0627\u0644\u062f\u0644\u0627\u0644\u0629 \u0639\u0644\u0649 \u0647\u0648\u064a\u0629 \u0627\u0644\u0645\u062c\u0645\u0648\u0639\u0629\u060c \u0648\u0627\u0644\u0637\u0628\u0642\u0627\u062a \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629\u060c \u0648\u0643\u0630\u0644\u0643 \u0627\u0644\u0627\u0633\u062a\u0645\u0627\u0644\u0629 \u0627\u0644\u0627\u062c\u062a\u0645\u0627\u0639\u064a\u0629 \u0648\u0627\u0644\u062a\u0631\u0641\u064a\u0647\u064a\u0629.\n\n\u062a\u062a\u0637\u0648\u0631\u0627\u0644\u0644\u063a\u0627\u062a \u0648\u062a\u062a\u0646\u0648\u0639 \u0645\u0639 \u0645\u0631\u0648\u0631 \u0627\u0644\u0648\u0642\u062a\u060c \u0648\u064a\u0645\u0643\u0646 \u0625\u0639\u0627\u062f\u0629 \u062a\u0627\u0631\u064a\u062e \u062a\u0637\u0648\u0631\u0647\u0627 \u0648\u0628\u0646\u0627\u0626\u0647\u0627 \u0645\u0646 \u062e\u0644\u0627\u0644 \u0645\u0642\u0627\u0631\u0646\u0629 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u062d\u062f\u064a\u062b\u0629 \u0644\u062a\u062d\u062f\u064a\u062f \u0633\u0645\u0627\u062a \u0644\u063a\u0627\u062a \u0623\u062c\u062f\u0627\u062f\u0647\u0645 \u0627\u0644\u062a\u064a \u064a\u062c\u0628 \u0623\u0646 \u062a\u0643\u0648\u0646 \u0645\u0646 \u0623\u062c\u0644 \u0627\u0644\u0645\u0631\u0627\u062d\u0644 \u0627\u0644\u062a\u0646\u0645\u0648\u064a\u0629 \u0627\u0644\u062a\u064a \u064a\u0645\u0643\u0646 \u0627\u0646 \u062a\u062d\u062f\u062b \u0641\u064a \u0648\u0642\u062a \u0644\u0627\u062d\u0642\u060c \u0648\u0645\u0646 \u0627\u0644\u0645\u0639\u0631\u0648\u0641 \u0623\u0646 \u0645\u062c\u0645\u0648\u0639\u0629 \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u062a\u064a \u062a\u0646\u062d\u062f\u0631 \u0645\u0646 \u0633\u0644\u0641 \u0645\u0634\u062a\u0631\u0643 \u062a\u0646\u062f\u0631\u062c \u0643\u0644\u063a\u0629 \u0627\u0644\u0623\u0633\u0631\u0629 \u0645\u062b\u0644 \u0639\u0627\u0626\u0644\u0629 \u0627\u0644\u0647\u0646\u062f\u0648 \u0623\u0648\u0631\u0648\u0628\u064a\u0629 \u0648\u0647\u064a \u0627\u0644\u0623\u0643\u062b\u0631 \u0627\u0646\u062a\u0634\u0627\u0631\u0627 \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0644\u063a\u0627\u062a \u0639\u062f\u0629 \u0645\u062b\u0644 \u0627\u0644\u0625\u0646\u062c\u0644\u064a\u0632\u064a\u0629 \u0648\u0627\u0644\u0631\u0648\u0633\u064a\u0629 \u0648\u0627\u0644\u0647\u0646\u062f\u064a\u0629\u060c \u0648\u0627\u0644\u0623\u0633\u0631\u0629 \u0628\u064a\u0646 \u0627\u0644\u0635\u064a\u0646 \u0648\u0627\u0644\u062a\u0628\u062a\u060c \u0648\u0627\u0644\u0630\u064a \u062a\u062a\u0636\u0645\u0646 \u0627\u0644\u0641\u0635\u062d\u0649 \u0648\u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0635\u064a\u0646\u064a\u0629 \u0627\u0644\u0623\u062e\u0631\u0649\u060c \u0648\u0627\u0644\u062a\u0628\u062a\u060c \u0648\u0639\u0627\u0626\u0644\u0629 \u0627\u0644\u0623\u0641\u0631\u0648 \u0622\u0633\u064a\u0648\u064a\u0629\u060c \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0627\u0644\u0639\u0631\u0628\u064a\u0629 \u0648\u0627\u0644\u0635\u0648\u0645\u0627\u0644\u064a\u0629\u060c \u0648\u0627\u0644\u0639\u0628\u0631\u064a\u0629. \u0623\u064a\u0636\u0627 \u0644\u063a\u0627\u062a \u0627\u0644\u0628\u0627\u0646\u062a\u0648 \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0627\u0644\u0633\u0648\u0627\u062d\u0644\u064a\u0629\u060c \u0648\u0627\u0644\u0632\u0648\u0644\u0648\u060c \u0648\u0645\u0626\u0627\u062a \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0623\u062e\u0631\u0649 \u0627\u0644\u0645\u0633\u062a\u062e\u062f\u0645\u0629 \u0641\u064a \u062c\u0645\u064a\u0639 \u0623\u0646\u062d\u0627\u0621 \u0623\u0641\u0631\u064a\u0642\u064a\u0627. \u0648\u0644\u063a\u0627\u062a \u0645\u0627\u0644\u0627\u064a\u0648-\u0627\u0644\u0628\u0648\u0644\u064a\u0646\u064a\u0632\u064a\u0629\u060c \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0627\u0644\u0625\u0646\u062f\u0648\u0646\u064a\u0633\u064a\u0629 \u0648\u0627\u0644\u0645\u0627\u0644\u064a\u0632\u064a\u0629 \u0648\u0627\u0644\u062a\u063a\u0627\u0644\u0648\u063a\u060c \u0648\u0645\u0626\u0627\u062a \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u0623\u062e\u0631\u0649 \u0627\u0644\u0645\u0633\u062a\u062e\u062f\u0645\u0629 \u0641\u064a \u062c\u0645\u064a\u0639 \u0623\u0646\u062d\u0627\u0621 \u0627\u0644\u0645\u062d\u064a\u0637 \u0627\u0644\u0647\u0627\u062f\u064a. \u0648 \u0644\u063a\u0627\u062a \u0623\u0633\u0631\u0629 \u062f\u0631\u0627\u0641\u064a\u062f\u064a\u0648\u0646 \u0627\u0644\u062a\u064a \u063a\u0627\u0644\u0628\u0627 \u064a\u062a\u062d\u062f\u062b \u0628\u0647\u0627 \u062c\u0646\u0648\u0628 \u0627\u0644\u0647\u0646\u062f \u0648\u0627\u0644\u062a\u064a \u062a\u0634\u0645\u0644 \u0644\u063a\u0629 \u0627\u0644\u062a\u0627\u0645\u064a\u0644 \u0648\u0627\u0644\u062a\u064a\u0644\u062c\u0648. \u064a\u0630\u0647\u0628 \u0627\u0644\u062a\u0648\u0627\u0641\u0642 \u0627\u0644\u062f\u0631\u0627\u0633\u064a \u0625\u0644\u0649 \u0623\u0646 \u0645\u0627 \u0628\u064a\u0646 50\u066a \u0648 90\u066a \u0645\u0646 \u0627\u0644\u0644\u063a\u0627\u062a \u0627\u0644\u062a\u064a \u064a\u062a\u062d\u062f\u062b \u0628\u0647\u0627 \u0641\u064a \u0628\u062f\u0627\u064a\u0629 \u0627\u0644\u0642\u0631\u0646 \u0627\u064421 \u0639\u0644\u0649 \u0648\u0634\u0643 \u0623\u0646 \u062a\u0646\u0642\u0631\u0636 \u0628\u062d\u0644\u0648\u0644 \u0639\u0627\u0645 2100.\n", "meta": {"title": "wikipedia_language_ar.txt", "md5": "2912510120a2fccfdcb04e47d26a5d82"}, "short_title": "\u0644\u063a\u0629", "long_title": "\u0644\u063a\u0629"}, {"text": "\nExtract from Wikipedia:\nhttps://el.wikipedia.org/wiki/%CE%93%CE%BB%CF%8E%CF%83%CF%83%CE%B1\n\n\u039f \u03cc\u03c1\u03bf\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1 \u03c7\u03c1\u03b7\u03c3\u03b9\u03bc\u03bf\u03c0\u03bf\u03b9\u03b5\u03af\u03c4\u03b1\u03b9 \u03ba\u03c5\u03c1\u03af\u03c9\u03c2 \u03b3\u03b9\u03b1 \u03bd\u03b1 \u03b1\u03bd\u03b1\u03c6\u03b5\u03c1\u03b8\u03bf\u03cd\u03bc\u03b5 \u03c3\u03c4\u03bf\u03bd \u03c4\u03c1\u03cc\u03c0\u03bf \u03b5\u03c0\u03b9\u03ba\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1\u03c2, \u03b9\u03b4\u03af\u03c9\u03c2 \u03c4\u03bf\u03c5 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03bf\u03c5, \u03b1\u03bb\u03bb\u03ac \u03bc\u03c0\u03bf\u03c1\u03b5\u03af \u03bd\u03b1 \u03b1\u03bd\u03b1\u03c6\u03ad\u03c1\u03b5\u03c4\u03b1\u03b9 \u03b5\u03c0\u03af\u03c3\u03b7\u03c2 \u03ba\u03b1\u03b9 \u03c3\u03b5 \u03c0\u03b5\u03c1\u03b9\u03c0\u03c4\u03ce\u03c3\u03b5\u03b9\u03c2 \u03c4\u03b5\u03c7\u03bd\u03b7\u03c4\u03ce\u03bd \u03ba\u03b1\u03b9 \u03bc\u03b7 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03b9\u03bd\u03c9\u03bd \u03c3\u03b7\u03bc\u03b5\u03b9\u03b1\u03ba\u03ce\u03bd \u03c3\u03c5\u03c3\u03c4\u03b7\u03bc\u03ac\u03c4\u03c9\u03bd (\u03bc\u03b9\u03bc\u03cc\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1, \u03c4\u03c5\u03c0\u03b9\u03ba\u03ad\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b5\u03c2 \u03c4\u03c9\u03bd \u03bc\u03b1\u03b8\u03b7\u03bc\u03b1\u03c4\u03b9\u03ba\u03ce\u03bd \u03ba\u03b1\u03b9 \u03c4\u03b7\u03c2 \u03c0\u03bb\u03b7\u03c1\u03bf\u03c6\u03bf\u03c1\u03b9\u03ba\u03ae, \u03b3\u03bb\u03ce\u03c3\u03c3\u03b5\u03c2 \u03c0\u03c1\u03bf\u03b3\u03c1\u03b1\u03bc\u03bc\u03b1\u03c4\u03b9\u03c3\u03bc\u03bf\u03cd, \u03c3\u03c5\u03c3\u03c4\u03ae\u03bc\u03b1\u03c4\u03b1 \u03b5\u03c0\u03b9\u03ba\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1\u03c2 \u03b6\u03ce\u03c9\u03bd \u03ba.\u03bb\u03c0.).\n\n\u0397 \u03c6\u03c5\u03c3\u03b9\u03ba\u03ae \u03b4\u03b9\u03b1\u03b4\u03b9\u03ba\u03b1\u03c3\u03af\u03b1 \u03b1\u03c0\u03cc\u03ba\u03c4\u03b7\u03c3\u03b7\u03c2 \u03bc\u03b9\u03b1\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2, \u03c0\u03bf\u03c5 \u03c3\u03c5\u03bc\u03b2\u03b1\u03af\u03bd\u03b5\u03b9 \u03ba\u03b1\u03c4\u03ac \u03c4\u03b1 4-5 \u03c0\u03c1\u03ce\u03c4\u03b1 \u03c7\u03c1\u03cc\u03bd\u03b9\u03b1 \u03b6\u03c9\u03ae\u03c2 \u03c4\u03bf\u03c5 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03bf\u03c5, \u03b1\u03bd\u03b1\u03c6\u03ad\u03c1\u03b5\u03c4\u03b1\u03b9 \u03c9\u03c2 \u03bc\u03b7\u03c4\u03c1\u03b9\u03ba\u03ae \u03ba\u03b1\u03b9 \u03b5\u03af\u03bd\u03b1\u03b9 \u03bc\u03ad\u03c1\u03bf\u03c2 \u03c4\u03b7\u03c2 \u03c6\u03c5\u03c3\u03b9\u03ba\u03ae\u03c2 \u03b5\u03be\u03ad\u03bb\u03b9\u03be\u03b7\u03c2 \u03c4\u03bf\u03c5 \u03b1\u03bd\u03b8\u03c1\u03ce\u03c0\u03bf\u03c5. \u0397 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1 \u03b3\u03af\u03bd\u03b5\u03c4\u03b1\u03b9 \u03c4\u03bf \u03b1\u03c0\u03b1\u03c1\u03b1\u03af\u03c4\u03b7\u03c4\u03bf \u03bc\u03ad\u03c3\u03bf \u03bc\u03b5 \u03c4\u03bf \u03bf\u03c0\u03bf\u03af\u03bf \u03bf \u03ac\u03bd\u03b8\u03c1\u03c9\u03c0\u03bf\u03c2 \u03b5\u03c0\u03b9\u03ba\u03bf\u03b9\u03bd\u03c9\u03bd\u03b5\u03af \u03ba\u03b1\u03b9 \u03b3\u03bd\u03c9\u03c1\u03af\u03b6\u03b5\u03b9 \u03c4\u03bf \u03c0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd \u03c4\u03bf\u03c5. \u039c\u03b5 \u03bc\u03b9\u03b1 \u03bf\u03bb\u03bf\u03ba\u03bb\u03b7\u03c1\u03c9\u03bc\u03ad\u03bd\u03b7 \u03b3\u03bb\u03c9\u03c3\u03c3\u03b9\u03ba\u03ae \u03b1\u03b3\u03c9\u03b3\u03ae, \u03bf \u03bf\u03bc\u03b9\u03bb\u03b7\u03c4\u03ae\u03c2 \u03ba\u03ac\u03b8\u03b5 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2 \u03b5\u03af\u03bd\u03b1\u03b9 \u03c3\u03b5 \u03b8\u03ad\u03c3\u03b7 \u03bd\u03b1 \u03c3\u03c5\u03bd\u03b4\u03c5\u03ac\u03b6\u03b5\u03b9 \u03c4\u03b7 \u03b3\u03bd\u03ce\u03c3\u03b7 \u03c4\u03bf\u03c5 \u03c3\u03c5\u03c3\u03c4\u03ae\u03bc\u03b1\u03c4\u03bf\u03c2 \u03c4\u03b7\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2 \u03b3\u03b9\u03b1 \u03c4\u03b7\u03bd \u03c0\u03b1\u03c1\u03b1\u03b3\u03c9\u03b3\u03ae \u03ba\u03b1\u03b9 \u03c4\u03b7\u03bd \u03c0\u03c1\u03cc\u03c3\u03bb\u03b7\u03c8\u03b7 \u03bc\u03b7\u03bd\u03c5\u03bc\u03ac\u03c4\u03c9\u03bd.\n\n\u03a0\u03b9\u03c3\u03c4\u03b5\u03cd\u03b5\u03c4\u03b1\u03b9 \u03cc\u03c4\u03b9 \u03b4\u03b5\u03bd \u03c5\u03c0\u03ac\u03c1\u03c7\u03b5\u03b9 \u03c3\u03b1\u03c6\u03ae\u03c2 \u03b4\u03b9\u03b1\u03c7\u03c9\u03c1\u03b9\u03c3\u03c4\u03b9\u03ba\u03ae \u03b3\u03c1\u03b1\u03bc\u03bc\u03ae \u03b1\u03bd\u03ac\u03bc\u03b5\u03c3\u03b1 \u03c3\u03c4\u03bf\u03bd \u03cc\u03c1\u03bf \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1, \u03bc\u03b5 \u03c4\u03b7 \u03c3\u03b7\u03bc\u03b1\u03c3\u03af\u03b1 \u03c4\u03b7\u03c2 \u03c3\u03c5\u03b3\u03ba\u03b5\u03ba\u03c1\u03b9\u03bc\u03ad\u03bd\u03b7\u03c2 \u03b5\u03b8\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2, \u03ba\u03b1\u03b9 \u03c3\u03c4\u03bf\u03bd \u03cc\u03c1\u03bf \u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03c2. \u03a3\u03cd\u03bc\u03c6\u03c9\u03bd\u03b1 \u03bc\u03b5 \u03c4\u03b7 \u03b4\u03b9\u03b1\u03c4\u03cd\u03c0\u03c9\u03c3\u03b7 \u03c4\u03bf\u03c5 Max Weinrich, \u00ab\u03b3\u03bb\u03ce\u03c3\u03c3\u03b1 \u03b5\u03af\u03bd\u03b1\u03b9 \u03bc\u03b9\u03b1 \u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03c2 \u03bc\u03b5 \u03c3\u03c4\u03c1\u03b1\u03c4\u03cc \u03ba\u03b1\u03b9 \u03c3\u03c4\u03cc\u03bb\u03bf\u00bb, \u03b5\u03bd\u03ce \u2014\u03c3\u03cd\u03bc\u03c6\u03c9\u03bd\u03b1 \u03bc\u03b5 \u03ac\u03bb\u03bb\u03b7 \u03b4\u03b9\u03b1\u03c4\u03cd\u03c0\u03c9\u03c3\u03b7\u2014 \u00ab\u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03c2 \u03b5\u03af\u03bd\u03b1\u03b9 \u03bc\u03b9\u03b1 \u03b7\u03c4\u03c4\u03b7\u03bc\u03ad\u03bd\u03b7 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\". \u03a0\u03b9\u03b8\u03b1\u03bd\u03cc\u03c4\u03b1\u03c4\u03b1, \u00ab\u03cc\u03bb\u03b5\u03c2 \u03bf\u03b9 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b5\u03c2 \u03c4\u03bf\u03c5 \u03ba\u03cc\u03c3\u03bc\u03bf\u03c5, \u03c3\u03b5 \u03cc\u03bb\u03bf\u03c5\u03c2 \u03c4\u03bf\u03c5\u03c2 \u03ba\u03b1\u03b9\u03c1\u03bf\u03cd\u03c2, \u03b5\u03af\u03bd\u03b1\u03b9 \u03b4\u03b9\u03ac\u03bb\u03b5\u03ba\u03c4\u03bf\u03b9 \u03bc\u03b9\u03b1\u03c2 \u03c0\u03c1\u03ce\u03c4\u03b7\u03c2 \u03ba\u03bf\u03b9\u03bd\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2 \u03c0\u03bf\u03c5 \u03c0\u03c1\u03b9\u03bd \u03c7\u03b9\u03bb\u03b9\u03ac\u03b4\u03b5\u03c2 \u03c7\u03c1\u03cc\u03bd\u03b9\u03b1 \u03ae\u03c4\u03b1\u03bd \u03c0\u03b1\u03b3\u03ba\u03cc\u03c3\u03bc\u03b9\u03b1 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u00bb\n", "meta": {"title": "wikipedia_language_el.txt", "md5": "26e477ca3f55320204f109bb07e79198"}, "short_title": "\u0393\u03bb\u03ce\u03c3\u03c3\u03b1", "long_title": "\u0393\u03bb\u03ce\u03c3\u03c3\u03b1"}, {"text": "\nExtract from Wikipedia:\nhttps://zh.wikipedia.org/wiki/%E8%AA%9E%E8%A8%80\n\n\u8bed\u8a00\u662f\u4e00\u7c7b\u590d\u5408\u4ea4\u6d41\u7cfb\u7edf\uff0c\u4e3b\u8981\u5305\u542b\u5176\u5f62\u6210\u3001\u4e60\u5f97\u3001\u7ef4\u62a4\u53ca\u5e94\u7528\uff0c\u7279\u522b\u662f\u76f8\u5e94\u7684\u4eba\u7c7b\u80fd\u529b\u3002\u67d0\u4e00\u95e8\u8bed\u8a00\u5219\u662f\u8fd9\u7c7b\u7cfb\u7edf\u7684\u5177\u4f53\u4f8b\u5b50\u3002\u9664\u4e86\u4ea4\u6d41\u6e9d\u901a\u5916\uff0c\u8a9e\u8a00\u4e5f\u662f\u4e00\u500b\u4eba\u7684\u8eab\u5206\u8a8d\u540c\u4e2d\u4e3b\u8981\u7684\u69cb\u6210\u90e8\u5206\uff0c\u4e5f\u662f\u4e00\u500b\u6587\u5316\u7684\u4e3b\u8981\u6210\u5206\u4e4b\u4e00\u3002\n\n\u8bed\u8a00\u5b66\u662f\u5bf9\u8bed\u8a00\u7684\u79d1\u5b66\u7814\u7a76\u3002\u8bed\u8a00\u54f2\u5b66\u65b9\u9762\u76f8\u5173\u7684\u8fa9\u8bba\uff0c\u6bd4\u5982\u8bcd\u6c47\u80fd\u5426\u80fd\u8868\u8ff0\u7ecf\u5386\uff0c\u81f3\u5c11\u53ef\u8ffd\u6eaf\u81f3\u53e4\u5e0c\u814a\u7684\u9ad8\u723e\u5409\u4e9e\u4e0e\u67cf\u62c9\u56fe\u3002\u4ee5\u5362\u68ad\u4e3a\u4ee3\u8868\u7684\u4e00\u4e9b\u601d\u60f3\u5bb6\u8a8d\u70ba\u8a9e\u8a00\u6e90\u81ea\u60c5\u7eea\uff0c\u800c\u5eb7\u5fb7\u8ba4\u4e3a\u5176\u6e90\u4e8e\u7406\u6027\u548c\u903b\u8f91\u7684\u601d\u8fa8\u3002\u8bf8\u5982\u7ef4\u7279\u6839\u65af\u5766\u7b4920\u4e16\u7d00\u7684\u54f2\u5b78\u5bb6\u8bba\u8bc1\u8bf4\u54f2\u5b78\u5b9e\u8d28\u4e0a\u5c31\u662f\u5bf9\u8a9e\u8a00\u7684\u7814\u7a76\u3002\u8457\u540d\u7684\u8bed\u8a00\u5b66\u5bb6\u6709\u5f17\u8fea\u5357\u00b7\u5fb7\u00b7\u7d22\u7dd2\u723e\u3001\u8bfa\u59c6\u00b7\u4e54\u59c6\u65af\u57fa\u7b49\u3002\n\n\u4eba\u7c7b\u8bed\u8a00\u636e\u4f30\u7b97\u7ea6\u67095000\u52307000\u79cd\uff0c\u7531\u4e8e\u8bed\u8a00\u4e0e\u65b9\u8a00\u7684\u5dee\u522b\u96be\u4ee5\u5b9a\u4e49\uff0c\u96be\u4ee5\u6709\u51c6\u786e\u7684\u7edf\u8ba1\u3002\u81ea\u7136\u8bed\u8a00\u5e38\u4ee5\u53e3\u8ff0\u6216\u624b\u8a9e\u65b9\u5f0f\u8868\u9054\uff0c\u4f46\u4efb\u4f55\u8a9e\u8a00\u90fd\u53ef\u4ee5\u7528\u8072\u97f3\uff08\u5982\u53e3\u54e8\u8a9e\uff09\u3001\u8996\u89ba\uff08\u5982\u624b\u8a9e\uff09\u6216\u662f\u89f8\u89ba\u524c\u6fc0\uff08\u5982\u76f2\u6587\uff09\u6765\u8868\u73b0\u3002\u7b26\u865f\u5b78\u4e2d\u5c06\u8fd9\u4e00\u73b0\u8c61\u63cf\u8ff0\u4e3a\u6a21\u6001\u65e0\u5173\uff08modality-independent\uff09\u3002\u4f9d\u7167\u8bed\u8a00\u54f2\u5b66\u5c0d\u8bed\u8a00\u4e0e\u610f\u4e49\u7684\u5b9a\u4e49\uff0c\u5e7f\u4e49\u4e0a\uff0c\u201c\u8bed\u8a00\u201d\u53ef\u4ee5\u6307\u4ee3\u5b66\u4e60\u5e76\u4f7f\u7528\u8be5\u590d\u6742\u4ea4\u6d41\u7cfb\u7edf\u7684\u8ba4\u77e5\u80fd\u529b\uff0c\u4e5f\u53ef\u4ee5\u63cf\u8ff0\u6784\u6210\u8fd9\u4e00\u7cfb\u7edf\u7684\u89c4\u5219\u96c6\u5408\uff0c\u8fd8\u53ef\u4ee5\u6307\u7531\u8bed\u8a00\u89c4\u5219\u751f\u6210\uff08produce\uff09\u7684\u8bcd\u53e5\u7684\u96c6\u5408\u3002\u6240\u6709\u7684\u8a9e\u8a00\u90fd\u4f9d\u9760\u7b26\u53f7\u8fc7\u7a0b\u4f86\u5c07\u7279\u5b9a\u7684\u8bb0\u53f7\u548c\u610f\u4e49\u76f8\u9023\u7d50\u3002\u53e3\u8bed\u3001\u624b\u8a9e\u53ca\u89f8\u89ba\u8a9e\u8a00\u90fd\u6709\u97f3\u4f4d\u5b78\u7cfb\u7d71\u4f86\u5c07\u7b26\u865f\u7d44\u5408\u6210\u8a5e\u6216\u662f\u8a9e\u7d20\u7684\u7cfb\u7d71\uff0c\u4e5f\u6709\u8bed\u6cd5\u5b66\u7cfb\u7d71\u4f86\u5c07\u8a5e\u53ca\u8a9e\u7d20\u7d44\u5408\u6210\u77ed\u8bed\u548c\u8bdd\u8bed\u3002\n\n\u4eba\u985e\u8a9e\u8a00\u5177\u6709\u521b\u9020\u6027\u548c\u79fb\u4f4d\u6027\uff0c\u5b8c\u5168\u9760\u793e\u6703\u7fd2\u4fd7\u53ca\u5b78\u7fd2\u800c\u4f86\u3002\u8a9e\u8a00\u7684\u8907\u96dc\u7d50\u69cb\u4f7f\u5f97\u5176\u53ef\u8868\u9054\u7684\u7bc4\u570d\u6bd4\u4efb\u4f55\u5df2\u77e5\u7684\u52d5\u7269\u4ea4\u6d41\u7cfb\u7d71\u90fd\u8981\u5ee3\u3002\u4f9d\u5fc3\u7406\u5b78\u7684\u89c0\u9ede\uff0c\u8a9e\u8a00\u662f\u8d77\u6e90\u65bc\u4eba\u65cf\u6709\u4e86\u5f62\u6210\u5fc3\u667a\u7406\u8ad6\u7684\u80fd\u529b\uff0c\u4ee5\u53ca\u6709\u5206\u4eab\u7684\u610f\u5411\uff0c\u4e4b\u5f8c\u6f38\u6f38\u7531\u5176\u539f\u59cb\u7684\u6e9d\u901a\u7cfb\u7d71\u6f14\u8b8a\u800c\u4f86[1][2]\u3002\u9019\u500b\u767c\u5c55\u51fa\u73fe\u7684\u6642\u671f\u4e5f\u548c\u4eba\u985e\u8166\u5bb9\u91cf\u64f4\u5c55\u7684\u6642\u671f\u5927\u81f4\u76f8\u540c\uff0c\u8a31\u591a\u8a9e\u8a00\u5b78\u5bb6\u8a8d\u70ba\u8a9e\u8a00\u7684\u7d50\u69cb\u6709\u56e0\u70ba\u4e00\u4e9b\u7279\u5b9a\u7684\u6e9d\u901a\u53ca\u793e\u6703\u6a5f\u80fd\u800c\u6f14\u8b8a\u3002\u4eba\u8166\u6709\u8a31\u591a\u90e8\u4efd\u6703\u8655\u7406\u8a9e\u8a00\uff0c\u4f46\u4e3b\u8981\u662f\u5728\u5e03\u82e5\u5361\u6c0f\u533a\u53ca\u97e6\u5c3c\u514b\u533a\u3002\u4eba\u985e\u7684\u8bed\u8a00\u4e60\u5f97\u662f\u5728\u7ae5\u5e74\u65e9\u671f\u7684\u793e\u6703\u4e92\u52d5\u4e2d\u5b78\u7fd2\u7684\uff0c\u5c0f\u5b69\u5927\u7d04\u4e09\u6b72\u5c31\u53ef\u4ee5\u6d41\u5229\u7684\u8aaa\u8a71\u4e86\u3002\u8a9e\u8a00\u7684\u4f7f\u7528\u5df2\u6df1\u6df1\u7d2e\u6839\u65bc\u4eba\u985e\u6587\u5316\u4e4b\u4e2d\u3002\u56e0\u6b64\u8a9e\u8a00\u9664\u4e86\u7528\u5728\u6e9d\u901a\u4e0a\uff0c\u4e5f\u6709\u8a31\u591a\u793e\u6703\u53ca\u6587\u5316\u4e0a\u7684\u7528\u9014\uff0c\u4f8b\u5982\u5f37\u5316\u7fa4\u9ad4\u8eab\u4efd\u8ba4\u540c\u53ca\u793e\u6703\u968e\u5c64\uff0c\u4e5f\u7528\u4f86\u793e\u6703\u6027\u68b3\u7406\u53ca\u5a1b\u6a02\u3002\n\n\u8a9e\u8a00\u6703\u96a8\u6642\u9593\u6f14\u8fdb\u4e0e\u5206\u5316\u3002\u5176\u6f14\u5316\u6b77\u53f2\u53ef\u4ee5\u901a\u8fc7\u4e0e\u73b0\u4ee3\u8bed\u8a00\u7684\u6bd4\u8f03\u4f86\u6784\u62df\uff1a\u4ece\u73b0\u4ee3\u8bed\u8a00\u4e2d\u786e\u5b9a\u54ea\u4e9b\u7279\u8d28\u662f\u7956\u8bed\u5b58\u5728\u7684\uff0c\u7531\u6b64\u5728\u8bed\u8a00\u7684\u540e\u7eed\u53d1\u5c55\u9636\u6bb5\u5f97\u4ee5\u5b58\u7eed\u3002\u6709\u5171\u540c\u7956\u8bed\u7684\u4e00\u7ec4\u8a9e\u8a00\u7a31\u70ba\u8bed\u7cfb\u3002\u5370\u6b27\u8bed\u7cfb\u7684\u8bed\u8a00\u5728\u4eca\u5929\u4e3a\u4f7f\u7528\u4eba\u6570\u4e4b\u6700\uff0c\u5176\u4e3b\u8981\u6210\u5458\u6709\u82f1\u8bed\u3001\u4fc4\u8bed\u548c\u5370\u5730\u8bed\u3002\u6c49\u85cf\u8bed\u7cfb\u5305\u62ec\u6c49\u8bed\u8bf8\u65b9\u8a00[\u8a3b 1]\u3001\u85cf\u8bed\u3001\u535a\u591a\u8bed\u7b49\u3002\u4e9a\u975e\u8bed\u7cfb\u5305\u62ec\u963f\u62c9\u4f2f\u8bed\u3001\u7d22\u99ac\u91cc\u8a9e\u53ca\u5e0c\u4f2f\u6765\u8bed\u3002\u73ed\u56fe\u8bed\u652f\u4e2d\u6709\u65af\u74e6\u5e0c\u91cc\u8bed\u3001\u7956\u9c81\u8bed\u548c\u5176\u4ed6\u6578\u767e\u7a2e\u975e\u6d32\u8a9e\u8a00\u3002\u9a6c\u6765\uff0d\u6ce2\u5229\u5c3c\u897f\u4e9a\u8bed\u65cf\u5305\u62ec\u5370\u5c3c\u8bed\u3001\u9a6c\u6765\u8bed\u3001\u4ed6\u52a0\u797f\u8a9e\u53ca\u6570\u767e\u79cd\u904d\u5e03\u592a\u5e73\u6d0b\u7684\u8a9e\u8a00\u3002\u8fbe\u7f57\u6bd7\u837c\u8bed\u7cfb\u4e3b\u8981\u5206\u5e03\u4e8e\u5370\u5ea6\u5357\u90e8\uff0c\u5176\u4e2d\u6709\u6cf0\u7c73\u5c14\u8bed\u548c\u6cf0\u5362\u56fa\u8bed\u3002\u5b78\u8853\u754c\u8ba4\u4e3a\u73b0\u5b58\u53e3\u8bed\u4e2d\u670950%\u81f390%\u6703\u57282100\u5e74\u524d\u6ec5\u7d55[3]\u3002\n", "meta": {"title": "wikipedia_language_zh.txt", "md5": "5be6d87531d4a1b4a2e70fe4ae0ed111"}, "short_title": "\u8a9e\u8a00", "long_title": "\u8a9e\u8a00"}] \ No newline at end of file diff --git a/docs/example_data/example_labels.json b/docs/example_data/example_labels.json new file mode 100644 index 0000000..f5cffb5 --- /dev/null +++ b/docs/example_data/example_labels.json @@ -0,0 +1,34 @@ +[ + { + "text": "adjective", + "shortcut_key": "a" + }, + { + "text": "adverb", + "shortcut_key": "b" + }, + { + "text": "conjunction", + "shortcut_key": "c" + }, + { + "text": "interjection", + "shortcut_key": "i" + }, + { + "text": "noun", + "shortcut_key": "n" + }, + { + "text": "preposition", + "shortcut_key": "p" + }, + { + "text": "pronoun", + "shortcut_key": "q" + }, + { + "text": "verb", + "shortcut_key": "v" + } +] diff --git a/docs/example_data/example_labels.txt b/docs/example_data/example_labels.txt deleted file mode 100644 index cd4f712..0000000 --- a/docs/example_data/example_labels.txt +++ /dev/null @@ -1,8 +0,0 @@ -adjective -adverb -conjunction -interjection -noun -preposition -pronoun -verb diff --git a/docs/example_data/hello_annotations.txt b/docs/example_data/hello_annotations.txt index 3b82dff..5e04e44 100644 --- a/docs/example_data/hello_annotations.txt +++ b/docs/example_data/hello_annotations.txt @@ -3,6 +3,8 @@ THIS IS A TEMPORARY DATABASE. It will disappear when you close the application. This database contains a few documents and labels to allow trying out labelbuddy. You can create annotations, import more documents and labels and export annotations, but the database itself will be removed when labelbuddy exits. +(You don't have to actually read this text, try annotating it instead!) + If you decide to start a real project you want to keep, from the File menu select "New" and choose a location on your filesystem to create a persistent database. You can also close labelbuddy and open it again and by default it will open a database in your home directory: ~/labelbuddy_data.sqlite3. Time to start annotating documents! Select some text with the mouse and click on one of the labels on the left to annotate it. @@ -13,27 +15,6 @@ If you create a new annotation that overlaps with a previously existing one, the Annotations are inserted in the database as you create them so there is no need for a "Save" button. -Here is a summary of the keybindings available in this page: - -Ctrl and scroll the mouse : (un-)zoom the text -Ctrl+F : search -Enter : next search match -Shift+Enter : previous search match -Ctrl+J, Ctrl+N, Down : scroll down one line -Ctrl+K, Ctrl+P, Up : scroll up one line -Ctrl+D : scroll down one page -Ctrl+U : scroll up one page -] : move the *end* of the selection by one *word* to the *right* -[ : move the *end* of the selection by one *word* to the *left* -} : move the *beginning* of the selection by one *word* to the *right* -{ : move the *beginning* of the selection by one *word* to the *left* -Ctrl+] : move the *end* of the selection by one *character* to the *right* -Ctrl+[ : move the *end* of the selection by one *character* to the *left* -Ctrl+} : move the *beginning* of the selection by one *character* to the *right* -Ctrl+{ : move the *beginning* of the selection by one *character* to the *left* - -If you prefer to read these documents with a fixed width font, select in the menu bar "Preferences" > "monospace font". You can prevent the text becoming bold by deselecting "Preferences" > "Bold selected region". - Once you are done annotating this document, you can click "Next" above to move on to another one. The next document is the labelbuddy documentation, and then a few extracts from Wikipedia (https://en.wikipedia.org/wiki/Main_Page) to illustrate using labelbuddy with different languages and writing systems. The documentation in HTML format can be seen by clicking "Help" > "Documentation". @@ -41,3 +22,31 @@ The next document is the labelbuddy documentation, and then a few extracts from To see the list of documents, remove documents or labels, or change the labels' colors, go to the "Dataset" tab. To export your annotations or import more documents or labels, go to the "Import / Export" tab. + + +Here is a summary of the keybindings available in this page: + +Ctrl and scroll the mouse: zoom or dezoom the text +Ctrl+F, /: search +Enter: next search match +Shift+Enter: previous search match +Ctrl+J, Ctrl+N, Down: scroll down one line +Ctrl+K, Ctrl+P, Up: scroll up one line +Ctrl+D: scroll down one page +Ctrl+U: scroll up one page +Ctrl+L: cycle between placing the cursor at the center, top and bottom of the +a-z (label’s shortcut_key): set corresponding label for the currently selected region or annotation +Backspace: remove selected annotation +Space: jump to next annotation and select it +Shift+Space: jump to previous annotation and select it +Esc: un-select selected annotation +]: move the end of the selection by one word to the right +[: move the end of the selection by one word to the left +}: move the beginning of the selection by one word to the right +{: move the beginning of the selection by one word to the left +Ctrl+]: move the end of the selection by one character to the right +Ctrl+[: move the end of the selection by one character to the left +Ctrl+}: move the beginning of the selection by one character to the right +Ctrl+{: move the beginning of the selection by one character to the left +>: go to next document +<: go to previous document diff --git a/docs/example_data/make_example_docs.py b/docs/example_data/make_example_docs.py index c6c64a6..12623d6 100755 --- a/docs/example_data/make_example_docs.py +++ b/docs/example_data/make_example_docs.py @@ -25,7 +25,9 @@ "documentation.html'>online version" ) long_title = "labelbuddy documentation" - body = "\n".join(lines[1:]) + body = doc + else: + body = "\n".join(lines[1:]) meta = { "title": doc_name, "md5": hashlib.md5(body.encode("utf-8")).hexdigest(), diff --git a/docs/extended-description b/docs/extended-description new file mode 100644 index 0000000..f36487b --- /dev/null +++ b/docs/extended-description @@ -0,0 +1,5 @@ +This is an application for annotating parts of documents with labels. +labelbuddy can be used for Part Of Speech tagging, +Named Entity Recognition, +sentiment analysis and document classification, etc. +It depends on Qt5. diff --git a/docs/installation.adoc b/docs/installation.adoc index a77d3f4..8b67913 100644 --- a/docs/installation.adoc +++ b/docs/installation.adoc @@ -40,17 +40,26 @@ Then the latest release of {lb} (and any future releases) can be installed with: sudo apt-get install labelbuddy .... -If you don't want to add the PPA to your sources list you can manually choose and download one of the {lbppapackages}[published packages], then install it with `apt-get`. -For example: +If you don't want to add the PPA to your sources list you can manually choose and download one of the {lbppapackages}[published packages], then install it with `apt-get`: .... -sudo apt install ./labelbuddy_0.0.1-1~groovy1_amd64.deb +sudo apt install ./labelbuddy_.deb +.... + +=== On Debian + +Binary packages can be downloaded from {ghreleases}[the releases page on GitHub]. +Once you have downloaded the package, you can install it with `apt-get`: +.... +sudo apt install ./labelbuddy_.deb .... === On Windows -Select the latest release from {ghreleases}[the releases page on GitHub], and download and run `labelbuddy_windows_installer.exe` (ignore Windows’ security warnings). -An alternative to the installer is to download `labelbuddy_windows.zip`, unzip it anywhere, then execute the file `labelbuddy.exe` that it contains to start {lb}. +Select the latest release from {ghreleases}[here], and download and run `labelbuddy__windows_installer.exe` (ignore Windows’ security warnings). +{lb} can be uninstalled from the Windows applications menu, or by running the `maintenancetool.exe` executable found in the {lb} installation folder. + +An alternative to using the installer is to download `labelbuddy_windows.zip`, unzip it anywhere, then execute the file `labelbuddy.exe` that it contains to start {lb}. == Building from source diff --git a/docs/man/labelbuddy.1 b/docs/man/labelbuddy.1 index 4c21136..59a04b7 100644 --- a/docs/man/labelbuddy.1 +++ b/docs/man/labelbuddy.1 @@ -1,4 +1,4 @@ -.TH labelbuddy 1 "2021-02-23" "version 0.0.1" +.TH labelbuddy 1 "2021-02-27" "version 0.0.3" .SH NAME labelbuddy \- application for annotating text documents .SH SYNOPSIS @@ -61,19 +61,19 @@ into the database. Can be used several times. Export labels in the database to the .json file .IR labelsfile . .TP -.BI \-\-export\-annotations " docsfile" +.BI \-\-export\-docs " docsfile" Export documents and annotations in the database to the (.json, .jsonl or .xml) file .IR docsfile . Some options described below control what is exported. .TP .B \-\-labelled\-only When using the -.B \-\-export\-annotations +.B \-\-export\-docs option, only export documents that contain at least one annotation. .TP .B \-\-include\-text When using the -.B \-\-export\-annotations +.B \-\-export\-docs option, include the document's text in the output. By default, it is not included (and the documents can be identified from the MD5 checksum found in the output, or from any user metadata that was imported with the documents). diff --git a/docs/short-readme.txt b/docs/short-readme.txt deleted file mode 100644 index 35952a5..0000000 --- a/docs/short-readme.txt +++ /dev/null @@ -1,8 +0,0 @@ -labelbuddy is a GUI tool for annotating documents (for example for Part Of -Speech tagging, Named Entity Recognition, etc.) - -See more information and the documentation: - - https://github.com/jeromedockes/labelbuddy - -(License: GNU GENERAL PUBLIC LICENSE Version 3) diff --git a/resources.qrc b/resources.qrc index 3a02b5e..35bfd05 100644 --- a/resources.qrc +++ b/resources.qrc @@ -3,6 +3,6 @@ data/LB.png data/VERSION.txt docs/example_data/example_documents.json - docs/example_data/example_labels.txt + docs/example_data/example_labels.json diff --git a/src/annotations_model.cpp b/src/annotations_model.cpp index eb1572f..8ee0892 100644 --- a/src/annotations_model.cpp +++ b/src/annotations_model.cpp @@ -270,4 +270,14 @@ bool AnnotationsModel::has_prev_unlabelled() const { return (first != -1) && (current_doc_id > first); } +int AnnotationsModel::shortcut_to_id(const QString& shortcut) const { + auto query = get_query(); + query.prepare("select id from label where shortcut_key = :key;"); + query.bindValue(":key", shortcut); + query.exec(); + if (!query.next()) { + return -1; + } + return query.value(0).toInt(); +} } // namespace labelbuddy diff --git a/src/annotations_model.h b/src/annotations_model.h index eea249d..c734f4e 100644 --- a/src/annotations_model.h +++ b/src/annotations_model.h @@ -46,6 +46,7 @@ class AnnotationsModel : public QObject { bool has_prev_labelled() const; bool has_next_unlabelled() const; bool has_prev_unlabelled() const; + int shortcut_to_id(const QString& shortcut) const; public slots: diff --git a/src/annotator.cpp b/src/annotator.cpp index c7b8345..00219ef 100644 --- a/src/annotator.cpp +++ b/src/annotator.cpp @@ -82,6 +82,9 @@ void LabelChoices::enable_delete() { delete_button->setEnabled(true); } void LabelChoices::disable_delete() { delete_button->setDisabled(true); } void LabelChoices::enable_label_choice() { labels_view->setEnabled(true); } void LabelChoices::disable_label_choice() { labels_view->setDisabled(true); } +bool LabelChoices::is_label_choice_enabled() const { + return labels_view->isEnabled(); +} Annotator::Annotator(QWidget* parent) : QSplitter(parent) { label_choices = new LabelChoices(); @@ -100,6 +103,7 @@ Annotator::Annotator(QWidget* parent) : QSplitter(parent) { title_label->setWordWrap(true); text = new SearchableText(); text_layout->addWidget(text); + text->get_text_edit()->installEventFilter(this); default_weight = text->get_text_edit()->fontWeight(); text->fill(""); text->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding); @@ -253,7 +257,8 @@ void Annotator::delete_annotations(QList annotation_ids) { if (active_annotation == id) { deactivate_active_annotation(); } - annotations.remove(id); + auto anno = annotations.take(id); + pos_to_anno.remove(anno.start_char); to_delete << id; } if (!to_delete.length()) { @@ -320,6 +325,7 @@ void Annotator::add_annotation(int label_id, int start_char, int end_char) { annotation_cursor.setPosition(end_char, QTextCursor::KeepAnchor); annotations[annotation_id] = AnnotationCursor{ annotation_id, label_id, start_char, end_char, annotation_cursor}; + pos_to_anno[start_char] = annotation_id; auto new_cursor = text->get_text_edit()->textCursor(); new_cursor.clearSelection(); text->get_text_edit()->setTextCursor(new_cursor); @@ -342,7 +348,8 @@ void Annotator::fetch_annotations_info() { int prev_active{active_annotation}; deactivate_active_annotation(); auto annotation_positions = annotations_model->get_annotations_info(); - annotations = QMap{}; + annotations.clear(); + pos_to_anno.clear(); for (auto i = annotation_positions.constBegin(); i != annotation_positions.constEnd(); ++i) { auto start = i.value().start_char; @@ -352,12 +359,58 @@ void Annotator::fetch_annotations_info() { cursor.setPosition(end, QTextCursor::KeepAnchor); annotations[i.value().id] = AnnotationCursor{i.value().id, i.value().label_id, start, end, cursor}; + pos_to_anno[start] = i.value().id; } if (annotations.contains(prev_active)) { active_annotation = prev_active; } } +int Annotator::find_next_annotation(int pos, bool forward) const { + if (annotations.size() == 0) { + return -1; + } + if (forward) { + QMap::const_iterator i = pos_to_anno.lowerBound(pos); + if (i != pos_to_anno.constEnd()) { + return *i; + } + return *pos_to_anno.constBegin(); + } + QMap::const_iterator i = pos_to_anno.lowerBound(pos); + if (i == pos_to_anno.constEnd()) { + --i; + } + while (i != pos_to_anno.constBegin() - 1 && i.key() > pos) { + --i; + } + if (i != pos_to_anno.constBegin() - 1) { + return *i; + } + return *(pos_to_anno.constEnd() - 1); +} + +void Annotator::select_next_annotation(bool forward) { + int pos{}; + if (active_annotation != -1) { + int offset = forward ? 1 : -1; + pos = annotations[active_annotation].start_char + offset; + } else { + pos = text->get_text_edit()->textCursor().position(); + } + auto next_anno = find_next_annotation(pos, forward); + if (next_anno == -1) { + return; + } + auto cursor = text->get_text_edit()->textCursor(); + cursor.setPosition(annotations[next_anno].start_char); + text->get_text_edit()->setTextCursor(cursor); + text->get_text_edit()->ensureCursorVisible(); + deactivate_active_annotation(); + active_annotation = next_anno; + emit active_annotation_changed(); +} + void Annotator::paint_annotations() { QList new_selections{}; for (auto& anno : annotations) { @@ -375,6 +428,51 @@ void Annotator::paint_annotations() { text->get_text_edit()->setExtraSelections(new_selections); } +bool Annotator::eventFilter(QObject* object, QEvent* event) { + if (object == text->get_text_edit()) { + if (event->type() == QEvent::KeyPress) { + auto key_event = static_cast(event); + if (key_event->key() == Qt::Key_Space) { + keyPressEvent(key_event); + return true; + } + } + } + return QWidget::eventFilter(object, event); +} + +void Annotator::keyPressEvent(QKeyEvent* event) { + if (event->text() == ">") { + annotations_model->visit_next(); + return; + } + if (event->text() == "<") { + annotations_model->visit_prev(); + return; + } + if (event->key() == Qt::Key_Escape) { + deactivate_active_annotation(); + emit active_annotation_changed(); + } + if (event->key() == Qt::Key_Space) { + bool backward{event->modifiers() & Qt::ShiftModifier}; + select_next_annotation(!backward); + return; + } + if (!label_choices->is_label_choice_enabled()) { + return; + } + auto id = annotations_model->shortcut_to_id(event->text()); + if (id != -1) { + label_choices->set_selected_label_id(id); + return; + } + if (event->key() == Qt::Key_Backspace) { + delete_active_annotation(); + return; + } +} + AnnotationsNavButtons::AnnotationsNavButtons(QWidget* parent) : QWidget(parent) { auto layout = new QHBoxLayout(); diff --git a/src/annotator.h b/src/annotator.h index a8e9f30..5f32201 100644 --- a/src/annotator.h +++ b/src/annotator.h @@ -36,6 +36,7 @@ public slots: void disable_delete(); void enable_label_choice(); void disable_label_choice(); + bool is_label_choice_enabled() const; private: QPushButton* delete_button; @@ -87,6 +88,7 @@ class Annotator : public QSplitter { public: Annotator(QWidget* parent = nullptr); + void keyPressEvent(QKeyEvent*) override; void set_annotations_model(AnnotationsModel*); void set_label_list_model(LabelListModel*); int annotation_at_pos(int) const; @@ -110,6 +112,10 @@ public slots: void store_state(); void set_monospace_font(bool monospace = true); void set_use_bold_font(bool use_bold = true); + void select_next_annotation(bool forward = true); + +protected: + bool eventFilter(QObject* object, QEvent* event) override; private: void fetch_labels_info(); @@ -121,9 +127,11 @@ public slots: void delete_annotations(QList); void delete_annotation(int); void deactivate_active_annotation(); + int find_next_annotation(int pos, bool forward = true) const; int active_annotation = -1; QMap annotations{}; QMap labels{}; + QMap pos_to_anno{}; QLabel* title_label; SearchableText* text; diff --git a/src/database.cpp b/src/database.cpp index 6868fca..6e57a73 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -442,6 +442,11 @@ LabelRecord json_to_label_record(const QJsonValue& json) { } else { record.color = json_obj["background_color"].toString(); } + if (json_obj.contains("shortcut_key")) { + record.shortcut_key = json_obj["shortcut_key"].toString(); + } else { + record.shortcut_key = json_obj["suffix_key"].toString(); + } } else { record.name = json.toArray()[0].toString(); record.color = json.toArray()[1].toString(); @@ -500,7 +505,7 @@ QString DatabaseCatalog::open_temp_database() { open_database(file_name, false); if (is_new) { import_documents(":docs/example_data/example_documents.json"); - import_labels(":docs/example_data/example_labels.txt"); + import_labels(":docs/example_data/example_labels.json"); } return file_name; } @@ -626,8 +631,20 @@ int DatabaseCatalog::insert_doc_record(const DocRecord& record, } void DatabaseCatalog::insert_label(QSqlQuery& query, const QString& label_name, - const QString& color) { - query.prepare("insert into label (name, color) values (:name, :color)"); + const QString& color, + const QString& shortcut_key) { + auto re = shortcut_key_pattern(); + bool valid_shortcut = re.match(shortcut_key).hasMatch(); + if (valid_shortcut) { + query.prepare("select id from label where keyboard_shortcut = :shortcut;"); + query.bindValue(":shortcut", shortcut_key); + query.exec(); + if (query.next()) { + valid_shortcut = false; + } + } + query.prepare("insert into label (name, color, shortcut_key) values (:name, " + ":color, :shortcut)"); query.bindValue(":name", label_name); if (QColor::isValidColor(color)) { query.bindValue(":color", QColor(color).name()); @@ -636,6 +653,8 @@ void DatabaseCatalog::insert_label(QSqlQuery& query, const QString& label_name, label_colors[color_index % label_colors.length()]); ++color_index; } + query.bindValue(":shortcut", + (valid_shortcut ? shortcut_key : QVariant(QVariant::String))); query.exec(); } @@ -715,7 +734,8 @@ int DatabaseCatalog::import_labels(const QString& file_path) { query.exec("begin transaction;"); for (const auto& label_info : json_array) { auto label_record = json_to_label_record(label_info); - insert_label(query, label_record.name, label_record.color); + insert_label(query, label_record.name, label_record.color, + label_record.shortcut_key); } query.exec("commit transaction;"); query.exec("select count(*) from label;"); @@ -833,11 +853,17 @@ int DatabaseCatalog::write_doc_and_annotations(AnnotationsWriter& writer, int DatabaseCatalog::export_labels(const QString& file_path) { QJsonArray labels{}; QSqlQuery query(QSqlDatabase::database(current_database)); - query.exec("select name, color from label order by id;"); + query.exec("select name, color, shortcut_key from label order by id;"); while (query.next()) { QJsonObject label_info{}; label_info["text"] = query.value(0).toString(); label_info["color"] = QColor(query.value(1).toString()).name(); + label_info["background_color"] = QColor(query.value(1).toString()).name(); + auto key = query.value(2); + if (!key.isNull()) { + label_info["shortcut_key"] = key.toString(); + label_info["suffix_key"] = key.toString(); + } labels << label_info; } QFile file(file_path); @@ -857,7 +883,7 @@ void batch_import_export(const QString& db_path, const QString& user_name, bool vacuum) { DatabaseCatalog catalog{}; catalog.open_database(db_path); - if (vacuum){ + if (vacuum) { catalog.vacuum_db(); return; } @@ -890,7 +916,8 @@ void DatabaseCatalog::create_tables() { "long_title TEXT DEFAULT NULL, short_title TEXT DEFAULT NULL);"); query.exec("CREATE TABLE IF NOT EXISTS label(id INTEGER PRIMARY KEY, name " - "TEXT UNIQUE NOT NULL, color TEXT NOT NULL DEFAULT '#FFA000'); "); + "TEXT UNIQUE NOT NULL, color TEXT NOT NULL DEFAULT '#FFA000', " + "shortcut_key TEXT UNIQUE DEFAULT NULL); "); query.exec(" CREATE TABLE IF NOT EXISTS annotation(doc_id NOT NULL " "REFERENCES document(id) ON DELETE CASCADE, label_id NOT NULL " diff --git a/src/database.h b/src/database.h index 963dd3e..3109140 100644 --- a/src/database.h +++ b/src/database.h @@ -4,9 +4,9 @@ #include #include -#include #include #include +#include #include #include #include @@ -191,6 +191,7 @@ class AnnotationsXmlWriter : public AnnotationsWriter { struct LabelRecord { QString name; QString color; + QString shortcut_key{}; }; LabelRecord json_to_label_record(const QJsonValue& json); @@ -231,7 +232,8 @@ class DatabaseCatalog : public QObject { QString get_default_database_path(); int insert_doc_record(const DocRecord& record, QSqlQuery& query); void insert_label(QSqlQuery& query, const QString& label_name, - const QString& color = QString()); + const QString& color = QString(), + const QString& shortcut_key = QString()); int write_doc_and_annotations(AnnotationsWriter& writer, int doc_id, bool include_document, const QString& user_name); diff --git a/src/label_list.cpp b/src/label_list.cpp index f3d4e81..2c43ff4 100644 --- a/src/label_list.cpp +++ b/src/label_list.cpp @@ -2,15 +2,18 @@ #include #include #include +#include #include #include #include #include #include +#include -#include "utils.h" #include "label_list.h" #include "label_list_model.h" +#include "user_roles.h" +#include "utils.h" namespace labelbuddy { @@ -59,28 +62,63 @@ QSize LabelDelegate::sizeHint(const QStyleOptionViewItem& option, } LabelListButtons::LabelListButtons(QWidget* parent) : QFrame(parent) { - QHBoxLayout* layout = new QHBoxLayout(); - setLayout(layout); + auto outer_layout = new QVBoxLayout(); + setLayout(outer_layout); + auto top_layout = new QHBoxLayout(); + outer_layout->addLayout(top_layout); select_all_button = new QPushButton("Select all"); - layout->addWidget(select_all_button); + top_layout->addWidget(select_all_button); delete_button = new QPushButton("Delete"); - layout->addWidget(delete_button); + top_layout->addWidget(delete_button); + + auto bottom_layout = new QHBoxLayout(); + outer_layout->addLayout(bottom_layout); set_color_button = new QPushButton("Set color"); - layout->addWidget(set_color_button); + bottom_layout->addWidget(set_color_button); + shortcut_label = new QLabel("Shortcut key: "); + bottom_layout->addWidget(shortcut_label); + shortcut_edit = new QLineEdit(); + bottom_layout->addWidget(shortcut_edit); + shortcut_edit->setMaxLength(1); + shortcut_edit->setValidator(&validator); + shortcut_edit->setFixedWidth(shortcut_edit->fontMetrics().maxWidth()); + bottom_layout->addStretch(1); + QObject::connect(select_all_button, &QPushButton::clicked, this, &LabelListButtons::select_all); - QObject::connect(set_color_button, &QPushButton::clicked, this, &LabelListButtons::set_label_color); - QObject::connect(delete_button, SIGNAL(clicked()), this, SIGNAL(delete_selected_rows())); + QObject::connect(shortcut_edit, &QLineEdit::returnPressed, this, + &LabelListButtons::shortcut_edit_pressed); } -void LabelListButtons::update_button_states(int n_selected, int total) { +void LabelListButtons::update_button_states(int n_selected, int total, + const QModelIndex& first_selected) { select_all_button->setEnabled(total > 0); delete_button->setEnabled(n_selected > 0); set_color_button->setEnabled(n_selected == 1); + shortcut_edit->setEnabled(n_selected == 1); + shortcut_label->setEnabled(n_selected == 1); + if (n_selected == 1 && first_selected.isValid()) { + shortcut_edit->setText(first_selected.model() + ->data(first_selected, Roles::ShortcutKeyRole) + .toString()); + } else { + shortcut_edit->setText(""); + } +} + +void LabelListButtons::set_model(LabelListModel* new_model) { + validator.setModel(new_model); +} +void LabelListButtons::set_view(QListView* new_view) { + validator.setView(new_view); +} + +void LabelListButtons::shortcut_edit_pressed() { + emit set_label_shortcut(shortcut_edit->text()); } LabelList::LabelList(QWidget* parent) : QFrame(parent) { @@ -96,6 +134,7 @@ LabelList::LabelList(QWidget* parent) : QFrame(parent) { labels_view->setSpacing(3); labels_view->setItemDelegate(new LabelDelegate); labels_view->setFocusPolicy(Qt::NoFocus); + buttons_frame->set_view(labels_view); labels_view->setSelectionMode(QAbstractItemView::ExtendedSelection); @@ -107,10 +146,14 @@ LabelList::LabelList(QWidget* parent) : QFrame(parent) { QObject::connect(buttons_frame, &LabelListButtons::set_label_color, this, &LabelList::set_label_color); + + QObject::connect(buttons_frame, &LabelListButtons::set_label_shortcut, this, + &LabelList::set_label_shortcut); } void LabelList::setModel(LabelListModel* new_model) { labels_view->setModel(new_model); + buttons_frame->set_model(new_model); model = new_model; QObject::connect(model, &LabelListModel::modelReset, this, &LabelList::update_button_states); @@ -159,7 +202,13 @@ void LabelList::update_button_states() { ++n_rows; } } - buttons_frame->update_button_states(n_rows, model->total_n_labels()); + auto first_selected = find_first_in_col_0(selected); + QModelIndex selected_index{}; + if (first_selected != selected.constEnd()) { + selected_index = *first_selected; + } + buttons_frame->update_button_states(n_rows, model->total_n_labels(), + selected_index); } void LabelList::set_label_color() { @@ -173,9 +222,39 @@ void LabelList::set_label_color() { model->data(*selected, Qt::BackgroundRole).value(); auto color = QColorDialog::getColor( current_color, this, QString("Set color for '%0'").arg(label_name)); - if (!color.isValid()) { + model->set_label_color(*selected, color); +} + +void LabelList::set_label_shortcut(const QString& new_shortcut) { + auto all_selected = labels_view->selectionModel()->selectedIndexes(); + auto selected = find_first_in_col_0(all_selected); + if (selected == all_selected.constEnd()) { return; } - model->set_label_color(*selected, color.name()); + model->set_label_shortcut(*selected, new_shortcut); +} + +QValidator::State ShortcutValidator::validate(QString& input, int& pos) const { + (void) pos; + if (model == nullptr || view == nullptr) { + return State::Invalid; + } + if (input == QString("")) { + return State::Acceptable; + } + auto all_selected = view->selectionModel()->selectedIndexes(); + QModelIndex selected{}; + auto selected_it = find_first_in_col_0(all_selected); + if (selected_it != all_selected.constEnd()) { + selected = *selected_it; + } + if (!model->is_valid_shortcut(input, selected)) { + return State::Invalid; + } + return State::Acceptable; +} +void ShortcutValidator::setModel(LabelListModel* new_model) { + model = new_model; } +void ShortcutValidator::setView(QListView* new_view) { view = new_view; } } // namespace labelbuddy diff --git a/src/label_list.h b/src/label_list.h index 01b97cc..e7441a7 100644 --- a/src/label_list.h +++ b/src/label_list.h @@ -2,13 +2,16 @@ #define LABELBUDDY_LABEL_LIST_H #include +#include #include #include #include #include +#include #include #include -#include +#include +#include #include "label_list_model.h" @@ -20,7 +23,18 @@ class LabelDelegate : public QStyledItemDelegate { void paint(QPainter* painter, const QStyleOptionViewItem& option, const QModelIndex& index) const override; QSize sizeHint(const QStyleOptionViewItem& option, - const QModelIndex& index) const override; + const QModelIndex& index) const override; +}; + +class ShortcutValidator : public QValidator { +public: + State validate(QString& input, int& pos) const override; + void setModel(LabelListModel* new_model); + void setView(QListView* new_view); + +private: + LabelListModel* model = nullptr; + QListView* view = nullptr; }; class LabelListButtons : public QFrame { @@ -28,21 +42,31 @@ class LabelListButtons : public QFrame { public: LabelListButtons(QWidget* parent = nullptr); + void set_model(LabelListModel* new_model); + void set_view(QListView* new_view); signals: void select_all(); void delete_selected_rows(); void set_label_color(); + void set_label_shortcut(const QString& new_shortcut); public slots: - void update_button_states(int n_selected, int total); + void update_button_states(int n_selected, int total, + const QModelIndex& first_selected); private: QPushButton* select_all_button; QPushButton* delete_button; QPushButton* set_color_button; + QLineEdit* shortcut_edit; + QLabel* shortcut_label; + ShortcutValidator validator; + +private slots: + void shortcut_edit_pressed(); }; class LabelList : public QFrame { @@ -57,12 +81,14 @@ public slots: void delete_selected_rows(); void update_button_states(); void set_label_color(); + void set_label_shortcut(const QString& new_shortcut); private: LabelListButtons* buttons_frame; QListView* labels_view; LabelListModel* model = nullptr; }; + } // namespace labelbuddy #endif diff --git a/src/label_list_model.cpp b/src/label_list_model.cpp index d98b305..171d2bc 100644 --- a/src/label_list_model.cpp +++ b/src/label_list_model.cpp @@ -10,8 +10,7 @@ LabelListModel::LabelListModel(QObject* parent) : QSqlQueryModel(parent) {} void LabelListModel::set_database(const QString& new_database_name) { database_name = new_database_name; - setQuery("select name, id from label order by id;", - QSqlDatabase::database(database_name)); + setQuery(select_query_text, QSqlDatabase::database(database_name)); } QSqlQuery LabelListModel::get_query() const { @@ -19,12 +18,29 @@ QSqlQuery LabelListModel::get_query() const { } QVariant LabelListModel::data(const QModelIndex& index, int role) const { + if (role == Qt::DisplayRole && index.column() == 0) { + auto name = QSqlQueryModel::data(index, role).toString(); + auto key = data(index, Roles::ShortcutKeyRole).toString(); + if (key != QString()) { + return QString("%0) %1").arg(key).arg(name); + } + return name; + } if (role == Roles::RowIdRole) { if (index.column() != 0) { return QVariant{}; } return QSqlQueryModel::data(index.sibling(index.row(), 1), Qt::DisplayRole); } + if (role == Roles::ShortcutKeyRole) { + auto label_id = data(index, Roles::RowIdRole).toInt(); + auto query = get_query(); + query.prepare("select shortcut_key from label where id = :labelid;"); + query.bindValue(":labelid", label_id); + query.exec(); + query.next(); + return query.value(0).toString(); + } if (role == Qt::BackgroundRole) { auto label_id = data(index, Roles::RowIdRole).toInt(); auto query = get_query(); @@ -85,21 +101,67 @@ int LabelListModel::delete_labels(const QModelIndexList& indices) { } void LabelListModel::refresh_current_query() { - setQuery("select name, id from label order by id;", - QSqlDatabase::database(database_name)); + setQuery(select_query_text, QSqlDatabase::database(database_name)); } void LabelListModel::set_label_color(const QModelIndex& index, - const QString& color) { + const QColor& color) { + if (!color.isValid()){ + return; + } auto label_id = data(index, Roles::RowIdRole); if (label_id == QVariant()) { return; } auto query = get_query(); query.prepare("update label set color = :col where id = :labelid;"); - query.bindValue(":col", color); + query.bindValue(":col", color.name()); + query.bindValue(":labelid", label_id.toInt()); + query.exec(); + emit dataChanged(index, index, {Qt::BackgroundRole}); + emit labels_changed(); +} + +bool LabelListModel::is_valid_shortcut(const QString& shortcut, + const QModelIndex& index) { + auto label_id_variant = data(index, Roles::RowIdRole); + int label_id = label_id_variant != QVariant() ? label_id_variant.toInt() : -1; + return is_valid_shortcut(shortcut, label_id); +} + +bool LabelListModel::is_valid_shortcut(const QString& shortcut, int label_id) { + if (!re.match(shortcut).hasMatch()) { + return false; + } + auto query = get_query(); + query.prepare("select id from label where shortcut_key = :shortcut " + "and id != :labelid;"); + query.bindValue(":shortcut", shortcut); + query.bindValue(":labelid", label_id); + query.exec(); + if (query.next()) { + return false; + } + return true; +} + +void LabelListModel::set_label_shortcut(const QModelIndex& index, + const QString& shortcut) { + auto label_id = data(index, Roles::RowIdRole); + if (label_id == QVariant()) { + return; + } + if (!re.match(shortcut).hasMatch()) { + return; + } + auto query = get_query(); + query.prepare( + "update label set shortcut_key = :shortcut where id = :labelid;"); + query.bindValue(":shortcut", + shortcut != "" ? shortcut : QVariant(QVariant::String)); query.bindValue(":labelid", label_id.toInt()); query.exec(); + emit dataChanged(index, index, {Qt::DisplayRole}); emit labels_changed(); } diff --git a/src/label_list_model.h b/src/label_list_model.h index b5b03ac..bca6c50 100644 --- a/src/label_list_model.h +++ b/src/label_list_model.h @@ -3,11 +3,13 @@ #include #include +#include + +#include "utils.h" namespace labelbuddy { class LabelListModel : public QSqlQueryModel { - Q_OBJECT public: @@ -21,12 +23,14 @@ class LabelListModel : public QSqlQueryModel { int total_n_labels() const; int delete_labels(const QModelIndexList& indices); + bool is_valid_shortcut(const QString& shortcut, const QModelIndex& index); public slots: void set_database(const QString& new_database_name); void refresh_current_query(); - void set_label_color(const QModelIndex& index, const QString& color); + void set_label_color(const QModelIndex& index, const QColor& color); + void set_label_shortcut(const QModelIndex& index, const QString& shortcut); signals: @@ -36,7 +40,12 @@ public slots: private: QSqlQuery get_query() const; + bool is_valid_shortcut(const QString& shortcut, int label_id); + QString database_name; + const QString select_query_text = ("select name, id from label order by id;"); + QRegularExpression re = shortcut_key_pattern(true); }; + } // namespace labelbuddy #endif // LABELBUDDY_DOC_LIST_MODEL_H diff --git a/src/main.cpp b/src/main.cpp index fd86e43..e8d50b5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -24,7 +24,7 @@ int main(int argc, char* argv[]) { const QStringList labels_files = parser.values("import-labels"); const QStringList docs_files = parser.values("import-docs"); const QString export_labels_file = parser.value("export-labels"); - const QString export_docs_file = parser.value("export-annotations"); + const QString export_docs_file = parser.value("export-docs"); QString db_path = (args.length() == 0) ? QString() : args[0]; if (labels_files.length() || docs_files.length() || diff --git a/src/searchable_text.cpp b/src/searchable_text.cpp index c844e39..d7179a9 100644 --- a/src/searchable_text.cpp +++ b/src/searchable_text.cpp @@ -66,7 +66,7 @@ SearchableText::SearchableText(QWidget* parent) : QWidget(parent) { void SearchableText::fill(const QString& content) { text_edit->setPlainText(content); text_edit->setProperty("readOnly", true); - text_edit->setFocus(); + this->setFocus(); } void SearchableText::update_search_button_states() { @@ -81,6 +81,7 @@ void SearchableText::search_backward() { search(QTextDocument::FindBackward); } void SearchableText::continue_search() { search(current_search_flags); } void SearchableText::search(QTextDocument::FindFlags flags) { + this->setFocus(); auto pattern = search_box->text(); if (pattern.isEmpty()) { return; @@ -154,6 +155,47 @@ bool SearchableText::eventFilter(QObject* object, QEvent* event) { return QWidget::eventFilter(object, event); } +void SearchableText::cycle_cursor_height() { + auto top = text_edit->cursorRect().top(); + for (int i = 0; i < 3; ++i) { + cycle_cursor_height_once(); + if (text_edit->cursorRect().top() != top) { + return; + } + } +} + +void SearchableText::cycle_cursor_height_once() { + auto pos = text_edit->textCursor().position(); + auto crect = text_edit->cursorRect(); + auto bottom = text_edit->rect().bottom(); + auto top = text_edit->rect().top(); + auto center = (bottom + top) / 2; + auto sb = text_edit->verticalScrollBar(); + + CursorHeight target_height; + if (pos != last_cursor_pos) { + target_height = CursorHeight::Center; + last_cursor_pos = pos; + } else { + target_height = static_cast( + (static_cast(last_cursor_height) + 1) % 3); + } + + switch (target_height) { + case CursorHeight::Center: + sb->setValue(sb->value() + crect.bottom() - center); + break; + case CursorHeight::Top: + sb->setValue(sb->value() + crect.top() - top); + break; + case CursorHeight::Bottom: + sb->setValue(sb->value() + crect.bottom() - bottom); + break; + } + last_cursor_height = target_height; +} + void SearchableText::handle_nav_event(QKeyEvent* event) { if (((event->key() == Qt::Key_J) && (event->modifiers() & Qt::ControlModifier)) || @@ -185,7 +227,11 @@ void SearchableText::handle_nav_event(QKeyEvent* event) { QAbstractSlider::SliderPageStepSub); return; } - + if ((event->key() == Qt::Key_L) && + (event->modifiers() & Qt::ControlModifier)) { + cycle_cursor_height(); + return; + } if ((event->key() == Qt::Key_BracketRight) && (event->modifiers() & Qt::ControlModifier)) { extend_selection(QTextCursor::NextCharacter, Side::Right); @@ -227,7 +273,7 @@ void SearchableText::handle_nav_event(QKeyEvent* event) { } void SearchableText::keyPressEvent(QKeyEvent* event) { - if (event->matches(QKeySequence::Find)) { + if (event->matches(QKeySequence::Find) || event->key() == Qt::Key_Slash) { search_box->setFocus(); search_box->selectAll(); return; @@ -247,6 +293,7 @@ QTextCursor SearchableText::textCursor() const { return text_edit->textCursor(); } QTextEdit* SearchableText::get_text_edit() { return text_edit; } +QLineEdit* SearchableText::get_search_box() { return search_box; } QList SearchableText::current_selection() const { QTextCursor cursor = text_edit->textCursor(); diff --git a/src/searchable_text.h b/src/searchable_text.h index bdec657..37a044d 100644 --- a/src/searchable_text.h +++ b/src/searchable_text.h @@ -19,10 +19,11 @@ class SearchableText : public QWidget { SearchableText(QWidget* parent = nullptr); void fill(const QString& content); - void keyPressEvent(QKeyEvent*); + void keyPressEvent(QKeyEvent*) override; QList current_selection() const; QTextCursor textCursor() const; QTextEdit* get_text_edit(); + QLineEdit* get_search_box(); public slots: @@ -47,11 +48,16 @@ public slots: void swap_pos_anchor(QTextCursor& cursor) const; void handle_nav_event(QKeyEvent* event); + void cycle_cursor_height(); + void cycle_cursor_height_once(); const QList nav_keys{Qt::Key_K, Qt::Key_J, Qt::Key_N, Qt::Key_P, Qt::Key_U, Qt::Key_D}; const QList nav_keys_nomodif{Qt::Key_BracketLeft, Qt::Key_BracketRight, Qt::Key_BraceLeft, Qt::Key_BraceRight}; enum class Side { Right, Left }; + enum class CursorHeight {Center, Top, Bottom}; + CursorHeight last_cursor_height{}; + int last_cursor_pos{}; private slots: diff --git a/src/user_roles.h b/src/user_roles.h index 210d492..d119f2d 100644 --- a/src/user_roles.h +++ b/src/user_roles.h @@ -5,7 +5,7 @@ namespace labelbuddy { - enum Roles { RowIdRole = Qt::UserRole}; + enum Roles { RowIdRole = Qt::UserRole, ShortcutKeyRole}; } #endif diff --git a/src/utils.cpp b/src/utils.cpp index a8b202c..81bf4b6 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -54,7 +54,7 @@ void prepare_parser(QCommandLineParser& parser) { "docs file"}); parser.addOption( {"export-labels", "Labels file to export to.", "exported labels file"}); - parser.addOption({"export-annotations", + parser.addOption({"export-docs", "Docs & annotations file to export to.", "exported docs file"}); parser.addOption({"labelled-only", "Export only labelled documents"}); @@ -66,4 +66,8 @@ void prepare_parser(QCommandLineParser& parser) { {"vacuum", "Repack database into minimal amount of disk space."}); } +QRegularExpression shortcut_key_pattern(bool accept_empty) { + return QRegularExpression{accept_empty ? "^[a-z]?$" : "^[a-z]$"}; +} + } // namespace labelbuddy diff --git a/src/utils.h b/src/utils.h index 4b10a76..2ff2a54 100644 --- a/src/utils.h +++ b/src/utils.h @@ -1,10 +1,11 @@ #ifndef LABELBUDDY_UTILS_H #define LABELBUDDY_UTILS_H +#include #include +#include #include #include -#include namespace labelbuddy { @@ -17,5 +18,7 @@ void prepare_parser(QCommandLineParser& parser); QModelIndexList::const_iterator find_first_in_col_0(const QModelIndexList& indices); +QRegularExpression shortcut_key_pattern(bool accept_empty = false); + } // namespace labelbuddy #endif diff --git a/test/data/test_labels.json b/test/data/test_labels.json index 33f59fe..1f8d6dd 100644 --- a/test/data/test_labels.json +++ b/test/data/test_labels.json @@ -1,5 +1,5 @@ [ - ["label: Reinício da sessão", "#aec7e8"], + {"text": "label: Reinício da sessão","color": "#aec7e8", "shortcut_key": "p"}, ["label: Resumption of the session", "#ffbb78"], ["label: Επαvάληψη της συvσδoυ", "#98df8a"] ] diff --git a/test/test_annotator.cpp b/test/test_annotator.cpp index 4ce0053..07df090 100644 --- a/test/test_annotator.cpp +++ b/test/test_annotator.cpp @@ -34,13 +34,14 @@ void TestAnnotator::test_annotator() { QVERIFY(lv->isEnabled()); auto del = labels->findChild(); QVERIFY(!del->isEnabled()); + // set label using label list button lv->selectionModel()->select(labels_model.index(1, 0), QItemSelectionModel::SelectCurrent); QCOMPARE(annotator.active_annotation_label(), 2); QCOMPARE(annotations_model.get_annotations_info()[1].label_id, 2); QVERIFY(del->isEnabled()); - lv->selectionModel()->select(labels_model.index(0, 0), - QItemSelectionModel::SelectCurrent); + // set label using shortcut + QTest::keyClicks(&annotator, "p"); QCOMPARE(annotations_model.get_annotations_info()[1].label_id, 1); del->click(); QCOMPARE(annotations_model.get_annotations_info().size(), 0); @@ -58,5 +59,8 @@ void TestAnnotator::test_annotator() { nav->findChildren()[5]->click(); QCOMPARE(annotations_model.current_doc_position(), 2); QCOMPARE(annotations_model.get_annotations_info().size(), 1); + QCOMPARE(annotator.active_annotation_label(), -1); + annotator.select_next_annotation(); + QCOMPARE(annotator.active_annotation_label(), 2); } } // namespace labelbuddy diff --git a/test/test_database.cpp b/test/test_database.cpp index e1a18b2..497bcd9 100644 --- a/test/test_database.cpp +++ b/test/test_database.cpp @@ -124,10 +124,11 @@ void TestDatabase::test_import_export_labels() { } void TestDatabase::check_db_labels(QSqlQuery& query) { - query.exec("select name, color from label;"); + query.exec("select name, color, shortcut_key from label;"); query.next(); QCOMPARE(query.value(0).toString(), QString("label: Reinício da sessão")); QCOMPARE(query.value(1).toString(), QString("#aec7e8")); + QCOMPARE(query.value(2).toString(), QString("p")); query.next(); QCOMPARE(query.value(0).toString(), QString("label: Resumption of the session")); diff --git a/test/test_label_list_model.cpp b/test/test_label_list_model.cpp index d4a8404..a87638d 100644 --- a/test/test_label_list_model.cpp +++ b/test/test_label_list_model.cpp @@ -23,4 +23,37 @@ void TestLabelListModel::test_delete_labels() { QCOMPARE(model.label_id_to_model_index(3), model.index(1, 0)); } +void TestLabelListModel::test_set_shortcut() { + QTemporaryDir tmp_dir{}; + auto db_name = prepare_db(tmp_dir); + LabelListModel model{}; + model.set_database(db_name); + QSqlQuery query(QSqlDatabase::database(db_name)); + QCOMPARE(model.rowCount(), 3); + query.exec("select shortcut_key from label where id = 1;"); + query.next(); + QCOMPARE(query.value(0).toString(), QString("p")); + auto index = model.index(1, 0); + model.set_label_shortcut(index, "z"); + query.exec("select shortcut_key from label where id = 2;"); + query.next(); + QCOMPARE(query.value(0).toString(), QString("z")); + model.set_label_shortcut(model.index(1, 0), ""); + query.exec("select shortcut_key from label where id = 2;"); + query.next(); + QVERIFY(query.value(0).isNull()); + model.set_label_shortcut(model.index(1, 0), "/"); + query.exec("select shortcut_key from label where id = 2;"); + query.next(); + QVERIFY(query.value(0).isNull()); + QVERIFY(query.value(0).isNull()); + model.set_label_shortcut(model.index(1, 0), "p"); + query.exec("select shortcut_key from label where id = 2;"); + query.next(); + QVERIFY(query.value(0).isNull()); + model.set_label_shortcut(model.index(1, 0), "x"); + query.exec("select shortcut_key from label where id = 2;"); + query.next(); + QCOMPARE(query.value(0).toString(), QString("x")); +} } // namespace labelbuddy diff --git a/test/test_label_list_model.h b/test/test_label_list_model.h index 278bca6..99cadfe 100644 --- a/test/test_label_list_model.h +++ b/test/test_label_list_model.h @@ -8,6 +8,7 @@ class TestLabelListModel : public QObject { Q_OBJECT private slots: void test_delete_labels(); + void test_set_shortcut(); }; } // namespace labelbuddy #endif