From 5240532e36ad71379ad0d4dedb08f4421928de52 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 28 Sep 2007 23:56:41 +0100 Subject: defaults & configuration of suggested manpage generating/viewing tool/display --- conf/sisu/sisurc.yml | 3 ++- data/doc/sisu/sisu_markup_samples/sisu_manual/_sisu/sisurc.yml | 3 ++- lib/sisu/v0/sysenv.rb | 6 ++++++ lib/sisu/v0/urls.rb | 3 ++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/conf/sisu/sisurc.yml b/conf/sisu/sisurc.yml index c5409774..3ed56c96 100644 --- a/conf/sisu/sisurc.yml +++ b/conf/sisu/sisurc.yml @@ -103,10 +103,11 @@ program_set: #program_select: # editor: 'gvim -c :R -c :S' # pdf_viewer: 'evince' -# web_browser: 'kazehakase' #'galeon' +# web_browser: 'firefox' #'iceweasel' #'epiphany' #'galeon' #'konqueror' #'kazehakase' # console_www_browser: 'links2' #'elinks' # odf_viewer: 'oowriter' # xml_viewer: 'xml-viewer' +# man: 'nroff -man' #'groff -man -Tascii' # 'nroff -man' #promo: sisu_icon, sisu, sisu_search_libre, open_society, fsf, ruby #search: # sisu: diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/_sisu/sisurc.yml b/data/doc/sisu/sisu_markup_samples/sisu_manual/_sisu/sisurc.yml index 876c338a..c54594ca 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/_sisu/sisurc.yml +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/_sisu/sisurc.yml @@ -107,10 +107,11 @@ program_set: program_select: editor: 'gvim -c :R' pdf_viewer: 'evince' - web_browser: 'kazehakase' + web_browser: 'firefox' #'iceweasel' #'epiphany' #'galeon' #'konqueror' #'kazehakase' console_web_browser: 'links2' odf_viewer: 'oowriter' xml_viewer: 'xml-viewer' + man: 'nroff -man' #'groff -man -Tascii' # 'nroff -man' #promo: sisu_icon, sisu, sisu_search_libre, open_society, fsf, ruby search: sisu: diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb index 58ad1bf8..8bf805be 100644 --- a/lib/sisu/v0/sysenv.rb +++ b/lib/sisu/v0/sysenv.rb @@ -1596,6 +1596,12 @@ WOK else 'oowriter' #'odf-viewer','oowriter' end end + def manpage_generator + if defined? @rc['program_select']['man'] and @rc['program_select']['man'] =~/\S\S+/ + @rc['program_select']['man'] + else 'nroff -man' #'nroff -man' #'groff -man -Tascii' + end + end def file_encoding #file encoding is='' if defined? @rc['program_set']['file_encoding']; is=@rc['program_set']['encoding'] diff --git a/lib/sisu/v0/urls.rb b/lib/sisu/v0/urls.rb index 5d5d8795..7f087b95 100644 --- a/lib/sisu/v0/urls.rb +++ b/lib/sisu/v0/urls.rb @@ -95,6 +95,7 @@ module SiSU_urls @console_browser=@env.program.console_web_browser @pdf_viewer=@env.program.pdf_viewer @odf_viewer=@env.program.odf_viewer + @manpage_gen=@env.program.manpage_generator source=if @opt.fns =~/\.sst$/; @opt.fns elsif @opt.fns =~/\._sst/; "#@fnb.composite.sst" else 'not recognised file' @@ -206,7 +207,7 @@ module SiSU_urls @opt.cmd.gsub!(/d[iu]/,'') end if x=~/^i/ and @opt.cmd =~/i/ - tell=SiSU_Screen::Ansi.new(@opt.cmd,"-#{x}","nroff -man #{@path.path.manpage}/#@fnb.1 |most") + tell=SiSU_Screen::Ansi.new(@opt.cmd,"-#{x}","#@manpage_gen #{@path.path.manpage}/#@fnb.1 |most") tell.result unless @opt.cmd =~/q/ @opt.cmd.gsub!(/I/,'') end -- cgit v1.2.3 From 761f2323dd5e8d34cd5bdf4fc38719a273814f21 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 29 Sep 2007 08:36:34 +0100 Subject: reorganising documentation --- CHANGELOG | 7 +- .../sisu/sisu_markup_samples/sisu_manual/Rantfile | 4 +- .../sisu/sisu_markup_samples/sisu_manual/sisu.ssm | 6 +- .../sisu_manual/sisu_complete.sst | 4 + .../sisu_manual/sisu_help_sources.sst | 160 +++++++++++---------- .../sisu_markup_samples/sisu_manual/sisu_how.ssi | 133 +++++++++++++++++ .../sisu_manual/sisu_introduction.ssi | 133 +++++++++++++++++ .../sisu_manual/sisu_introduction.ssm | 58 ++++++++ .../sisu_manual/sisu_introduction.sst | 133 ----------------- .../sisu_manual/sisu_manual.ssm | 6 +- .../sisu_markup_samples/sisu_manual/sisu_pdf.sst | 4 + .../sisu_manual/sisu_postgresql.sst | 5 + .../sisu_manual/sisu_short_feature_summary.ssi | 133 +++++++++++++++++ .../sisu_manual/sisu_sqlite.sst | 5 + .../sisu_manual/sisu_summary_of_features.ssi | 133 +++++++++++++++++ .../sisu_manual/sisu_webrick.sst | 4 + .../conf/editor-syntax-etc/vim/syntax/sisu.vim | 6 +- 17 files changed, 713 insertions(+), 221 deletions(-) create mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi create mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi create mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssm delete mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.sst create mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi create mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi diff --git a/CHANGELOG b/CHANGELOG index ed8f23d7..a860dc80 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -29,8 +29,11 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.60.0.orig.tar.gz * sysenv, sisurc.yml allow use of relative output paths, expand to absolute - * param, behavior changed to take in multi-lines, affects regex matching, - watch + * param, behavior for reading headers changed to take in multi-lines, affects + regex matching, watch + + * vim syntax highlighting for sisu markup, change to take account of + possibility of multi-line headers * html segmented, bugfix: footnotes for headings levels :A,:B and :C correctly placed in first segment when batch-processing files/output diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/Rantfile b/data/doc/sisu/sisu_markup_samples/sisu_manual/Rantfile index e9cba974..37198c7c 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/Rantfile +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/Rantfile @@ -147,11 +147,11 @@ def files_all files end def filelist_docs - files='sisu_manual.ssm sisu_introduction.sst sisu_help.sst sisu_help_sources.sst sisu_commands.sst sisu_markup.sst sisu_filetypes.sst sisu_config.ssi sisu_skin.sst sisu_css.ssi sisu_content_directories.ssi sisu_homepages.ssi sisu_examples.sst sisu_output_overview.sst sisu_webrick.sst sisu_quickstart.sst sisu_faq.sst sisu_syntax_highlighting.sst sisu_configuration.ssm sisu_description.sst sisu_remote.sst sisu_complete.sst sisu_postgresql.sst sisu_sqlite.sst sisu_pdf.sst sisu_search.ssm sisu_vim.sst sisu_doc.sst' # sisu_outpuman/man7/sisu_doc.7t_types.ssm sisu_sql.sst sisu_hyperestraier.sst sisu_latex.sst sisu_odf.sst sisu_xml.sst sisu_concordance.sst sisu_document_digest_certificate.sst document_digest_certificate.sst sisu_markup_source.sst sisupod.sst + files='sisu_manual.ssm sisu_introduction.ssm sisu_help.sst sisu_help_sources.sst sisu_commands.sst sisu_markup.sst sisu_filetypes.sst sisu_config.ssi sisu_skin.sst sisu_css.ssi sisu_content_directories.ssi sisu_homepages.ssi sisu_examples.sst sisu_output_overview.sst sisu_webrick.sst sisu_quickstart.sst sisu_faq.sst sisu_syntax_highlighting.sst sisu_configuration.ssm sisu_description.sst sisu_remote.sst sisu_complete.sst sisu_postgresql.sst sisu_sqlite.sst sisu_pdf.sst sisu_search.ssm sisu_vim.sst sisu_doc.sst' # sisu_outpuman/man7/sisu_doc.7t_types.ssm sisu_sql.sst sisu_hyperestraier.sst sisu_latex.sst sisu_odf.sst sisu_xml.sst sisu_concordance.sst sisu_document_digest_certificate.sst document_digest_certificate.sst sisu_markup_source.sst sisupod.sst files end def filelist_manpage - files='sisu.ssm sisu_introduction.sst sisu_help.sst sisu_help_sources.sst sisu_commands.sst sisu_markup.sst sisu_filetypes.sst sisu_config.ssi sisu_skin.sst sisu_css.ssi sisu_content_directories.ssi sisu_homepages.ssi sisu_examples.ssi sisu_output_overview.sst sisu_webrick.sst sisu_download.ssi sisu_installation.ssi sisu_quickstart.sst sisu_howto.sst sisu_faq.sst sisu_syntax_highlighting.sst sisu_configuration.ssm sisu_remote.sst sisu_complete.sst sisu_postgresql.sst sisu_sqlite.sst sisu_pdf.sst sisu_search.ssm sisu_vim.sst sisu_doc.sst' # sisu_output_types.ssm sisu_sql.sst sisu_hyperestraier.sst sisu_latex.sst sisu_odf.sst sisu_xml.sst sisu_concordance.sst sisu_document_digest_certificate.sst document_digest_certificate.sst sisu_markup_source.sst sisupod.sst + files='sisu.ssm sisu_introduction.ssm sisu_help.sst sisu_help_sources.sst sisu_commands.sst sisu_markup.sst sisu_filetypes.sst sisu_config.ssi sisu_skin.sst sisu_css.ssi sisu_content_directories.ssi sisu_homepages.ssi sisu_examples.ssi sisu_output_overview.sst sisu_webrick.sst sisu_download.ssi sisu_installation.ssi sisu_quickstart.sst sisu_howto.sst sisu_faq.sst sisu_syntax_highlighting.sst sisu_configuration.ssm sisu_remote.sst sisu_complete.sst sisu_postgresql.sst sisu_sqlite.sst sisu_pdf.sst sisu_search.ssm sisu_vim.sst sisu_doc.sst' # sisu_output_types.ssm sisu_sql.sst sisu_hyperestraier.sst sisu_latex.sst sisu_odf.sst sisu_xml.sst sisu_concordance.sst sisu_document_digest_certificate.sst document_digest_certificate.sst sisu_markup_source.sst sisupod.sst #puts files + "\n\n" files end diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm index 50bd7dc9..33fbc344 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm @@ -49,7 +49,7 @@ sisu [-CcFLSVvW] :B~ What is SiSU? -<< |sisu_introduction.sst|@|^| +<< |sisu_introduction.ssi|@|^| << |sisu_help.sst|@|^| @@ -99,4 +99,8 @@ sisu [-CcFLSVvW] << |sisu_syntax_highlighting.sst|@|^| +<< |sisu_how.ssi|@|^| + +<< |sisu_short_feature_summary.ssi|@|^| + << |sisu_help_sources.sst|@|^| diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst index ac177878..75252e89 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst @@ -28,6 +28,10 @@ @bold: /Gnu|Debian|Ruby|SiSU/ +@man: 8; +name=sisu - documents: structuring, publishing in multiple formats, and search; +synopsis=package for the installation of the whole of sisu with all its dependencies + @links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ { Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help_sources.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help_sources.sst index 3006ef75..860b2173 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help_sources.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help_sources.sst @@ -87,45 +87,47 @@ _1 http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/index.html _1 man sisu -_1 man sisu_commands +% _1 man sisu_commands _1 man 7 sisu_complete -_1 man sisu_configuration +% _1 man sisu_configuration -_1 man 8 sisu_faq +% _1 man sisu_faq -_1 man sisu_filetypes +% _1 man sisu_filetypes -_1 man sisu_help +% _1 man sisu_help -_1 man sisu_help_sources +% _1 man sisu_help_sources -_1 man 8 sisu_howto +% _1 man 8 sisu_howto -_1 man sisu_introduction +% _1 man sisu_introduction -_1 man sisu_markup +% _1 man sisu_markup -_1 man sisu_output_overview +% _1 man sisu_output_overview _1 man 7 sisu_pdf _1 man 7 sisu_postgresql -_1 man 8 sisu_quickstart +% _1 man 8 sisu_quickstart -_1 man 8 sisu_remote +% _1 man 8 sisu_remote -_1 man 8 sisu_search +% _1 man 8 sisu_search -_1 man sisu_skin +% _1 man sisu_skin _1 man 7 sisu_sqlite -_1 man 8 sisu_syntax_highlighting +% _1 man 8 sisu_syntax_highlighting + +_1 man sisu_termsheet -_1 man 7 sisu_vim +% _1 man 7 sisu_vim _1 man sisu_webrick @@ -143,51 +145,53 @@ file:///usr/share/doc/sisu/sisu_manual/sisu_help_sources/index.html _1 /usr/share/doc/sisu/sisu_manual/sisu/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_commands/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_commands/index.html _1 /usr/share/doc/sisu/sisu_manual/sisu_complete/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_configuration/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_configuration/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_description/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_description/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_examples/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_examples/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_faq/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_faq/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_filetypes/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_filetypes/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_help/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_help/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_help_sources/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_help_sources/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_howto/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_howto/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_introduction/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_introduction/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_manual/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_manual/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_markup/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_markup/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_output_overview/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_output_overview/index.html _1 /usr/share/doc/sisu/sisu_manual/sisu_pdf/index.html _1 /usr/share/doc/sisu/sisu_manual/sisu_postgresql/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_quickstart/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_quickstart/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_remote/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_remote/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_search/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_search/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_skin/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_skin/index.html _1 /usr/share/doc/sisu/sisu_manual/sisu_sqlite/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_syntax_highlighting/index.html +% _1 /usr/share/doc/sisu/sisu_manual/sisu_syntax_highlighting/index.html -_1 /usr/share/doc/sisu/sisu_manual/sisu_vim/index.html +_1 /usr/share/doc/sisu/sisu_manual/sisu_termsheet/index.html + +% _1 /usr/share/doc/sisu/sisu_manual/sisu_vim/index.html _1 /usr/share/doc/sisu/sisu_manual/sisu_webrick/index.html @@ -299,11 +303,11 @@ _1 http://www.jus.uio.no/sisu/sisu_manual/sisu_vim/index.html _1 http://www.jus.uio.no/sisu/sisu_manual/sisu_webrick/index.html -3~ man2html +2~ man2html 3~ locally installed -file:///usr/share/doc/sisu/html/sisu_manual.1.html +file:///usr/share/doc/sisu/html/sisu.1.html file:///usr/share/doc/sisu/html/sisu_help.1.html @@ -311,31 +315,31 @@ file:///usr/share/doc/sisu/html/sisu_help_sources.1.html _1 /usr/share/doc/sisu/html/sisu.1.html -_1 /usr/share/doc/sisu/html/sisu_commands.1.html +% _1 /usr/share/doc/sisu/html/sisu_commands.1.html -_1 /usr/share/doc/sisu/html/sisu_complete.7.html +% _1 /usr/share/doc/sisu/html/sisu_complete.7.html -_1 /usr/share/doc/sisu/html/sisu_configuration.1.html +% _1 /usr/share/doc/sisu/html/sisu_configuration.1.html % _1 /usr/share/doc/sisu/html/sisu_description.1.html % _1 /usr/share/doc/sisu/html/sisu_examples.1.html -_1 /usr/share/doc/sisu/html/sisu_faq.8.html +% _1 /usr/share/doc/sisu/html/sisu_faq.8.html % _1 /usr/share/doc/sisu/html/sisu_filetypes.1.html -_1 /usr/share/doc/sisu/html/sisu_help.1.html +% _1 /usr/share/doc/sisu/html/sisu_help.1.html -_1 /usr/share/doc/sisu/html/sisu_help_sources.1.html +% _1 /usr/share/doc/sisu/html/sisu_help_sources.1.html -_1 /usr/share/doc/sisu/html/sisu_howto.8.html +% _1 /usr/share/doc/sisu/html/sisu_howto.8.html % _1 /usr/share/doc/sisu/html/sisu_introduction.1.html % _1 /usr/share/doc/sisu/html/sisu_manual.1.html -_1 /usr/share/doc/sisu/html/sisu_markup.1.html +% _1 /usr/share/doc/sisu/html/sisu_markup.1.html % _1 /usr/share/doc/sisu/html/sisu_output_overview.1.html @@ -343,19 +347,19 @@ _1 /usr/share/doc/sisu/html/sisu_pdf.7.html _1 /usr/share/doc/sisu/html/sisu_postgresql.7.html -_1 /usr/share/doc/sisu/html/sisu_quickstart.8.html +% _1 /usr/share/doc/sisu/html/sisu_quickstart.8.html -_1 /usr/share/doc/sisu/html/sisu_remote.8.html +% _1 /usr/share/doc/sisu/html/sisu_remote.8.html -_1 /usr/share/doc/sisu/html/sisu_search.8.html +% _1 /usr/share/doc/sisu/html/sisu_search.8.html -_1 /usr/share/doc/sisu/html/sisu_skin.1.html +% _1 /usr/share/doc/sisu/html/sisu_skin.1.html _1 /usr/share/doc/sisu/html/sisu_sqlite.7.html -_1 /usr/share/doc/sisu/html/sisu_syntax_highlighting.8.html +% _1 /usr/share/doc/sisu/html/sisu_syntax_highlighting.8.html -_1 /usr/share/doc/sisu/html/sisu_vim.7.html +% _1 /usr/share/doc/sisu/html/sisu_vim.7.html _1 /usr/share/doc/sisu/html/sisu_webrick.1.html @@ -369,45 +373,45 @@ http:///sisudoc.org/man/sisu_help_sources.1.html _1 http://sisudoc.org/man/sisu.1.html -_1 http://sisudoc.org/man/sisu_commands.1.html +% _1 http://sisudoc.org/man/sisu_commands.1.html _1 http://sisudoc.org/man/sisu_complete.7.html -_1 http://sisudoc.org/man/sisu_configuration.1.html +% _1 http://sisudoc.org/man/sisu_configuration.1.html -_1 http://sisudoc.org/man/sisu_faq.8.html +% _1 http://sisudoc.org/man/sisu_faq.8.html -_1 http://sisudoc.org/man/sisu_help.1.html +% _1 http://sisudoc.org/man/sisu_help.1.html -_1 http://sisudoc.org/man/sisu_help_sources.1.html +% _1 http://sisudoc.org/man/sisu_help_sources.1.html -_1 http://sisudoc.org/man/sisu_howto.8.html +% _1 http://sisudoc.org/man/sisu_howto.8.html -_1 http://sisudoc.org/man/sisu_markup.1.html +% _1 http://sisudoc.org/man/sisu_markup.1.html _1 http://sisudoc.org/man/sisu_pdf.7.html _1 http://sisudoc.org/man/sisu_postgresql.7.html -_1 http://sisudoc.org/man/sisu_quickstart.8.html +% _1 http://sisudoc.org/man/sisu_quickstart.8.html -_1 http://sisudoc.org/man/sisu_remote.8.html +% _1 http://sisudoc.org/man/sisu_remote.8.html -_1 http://sisudoc.org/man/sisu_search.8.html +% _1 http://sisudoc.org/man/sisu_search.8.html -_1 http://sisudoc.org/man/sisu_skin.1.html +% _1 http://sisudoc.org/man/sisu_skin.1.html _1 http://sisudoc.org/man/sisu_sqlite.7.html -_1 http://sisudoc.org/man/sisu_syntax_highlighting.8.html +% _1 http://sisudoc.org/man/sisu_syntax_highlighting.8.html -_1 http://sisudoc.org/man/sisu_vim.7.html +% _1 http://sisudoc.org/man/sisu_vim.7.html _1 http://sisudoc.org/man/sisu_webrick.1.html 3~ www.jus.uio.no/sisu -http://www.jus.uio.no/sisu/man/sisu_manual.1.html +http://www.jus.uio.no/sisu/man/sisu.1.html http://www.jus.uio.no/sisu/man/sisu_help.1.html @@ -415,38 +419,38 @@ http://www.jus.uio.no/sisu/man/sisu_help_sources.1.html _1 http://www.jus.uio.no/sisu/man/sisu.1.html -_1 http://www.jus.uio.no/sisu/man/sisu_commands.1.html +% _1 http://www.jus.uio.no/sisu/man/sisu_commands.1.html _1 http://www.jus.uio.no/sisu/man/sisu_complete.7.html -_1 http://www.jus.uio.no/sisu/man/sisu_configuration.1.html +% _1 http://www.jus.uio.no/sisu/man/sisu_configuration.1.html -_1 http://www.jus.uio.no/sisu/man/sisu_faq.8.html +% _1 http://www.jus.uio.no/sisu/man/sisu_faq.8.html -_1 http://www.jus.uio.no/sisu/man/sisu_help.1.html +% _1 http://www.jus.uio.no/sisu/man/sisu_help.1.html -_1 http://www.jus.uio.no/sisu/man/sisu_help_sources.1.html +% _1 http://www.jus.uio.no/sisu/man/sisu_help_sources.1.html -_1 http://www.jus.uio.no/sisu/man/sisu_howto.8.html +% _1 http://www.jus.uio.no/sisu/man/sisu_howto.8.html -_1 http://www.jus.uio.no/sisu/man/sisu_markup.1.html +% _1 http://www.jus.uio.no/sisu/man/sisu_markup.1.html _1 http://www.jus.uio.no/sisu/man/sisu_pdf.7.html _1 http://www.jus.uio.no/sisu/man/sisu_postgresql.7.html -_1 http://www.jus.uio.no/sisu/man/sisu_quickstart.8.html +% _1 http://www.jus.uio.no/sisu/man/sisu_quickstart.8.html -_1 http://www.jus.uio.no/sisu/man/sisu_remote.8.html +% _1 http://www.jus.uio.no/sisu/man/sisu_remote.8.html -_1 http://www.jus.uio.no/sisu/man/sisu_search.8.html +% _1 http://www.jus.uio.no/sisu/man/sisu_search.8.html -_1 http://www.jus.uio.no/sisu/man/sisu_skin.1.html +% _1 http://www.jus.uio.no/sisu/man/sisu_skin.1.html _1 http://www.jus.uio.no/sisu/man/sisu_sqlite.7.html -_1 http://www.jus.uio.no/sisu/man/sisu_syntax_highlighting.8.html +% _1 http://www.jus.uio.no/sisu/man/sisu_syntax_highlighting.8.html -_1 http://www.jus.uio.no/sisu/man/sisu_vim.7.html +% _1 http://www.jus.uio.no/sisu/man/sisu_vim.7.html _1 http://www.jus.uio.no/sisu/man/sisu_webrick.1.html diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi new file mode 100644 index 00000000..9a2e2ddd --- /dev/null +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi @@ -0,0 +1,133 @@ +% SiSU 0.58 + +@title: SiSU + +@subtitle: Commands + +@creator: Ralph Amissah + +@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +@type: information + +@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search + +@date.created: 2002-08-28 + +@date.issued: 2002-08-28 + +@date.available: 2002-08-28 + +@date.modified: 2007-09-16 + +@date: 2007-09-16 + +@level: new=C; break=1; num_top=1 + +@skin: skin_sisu_manual + +@bold: /Gnu|Debian|Ruby|SiSU/ + +@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ +{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html +{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ +{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ +{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html +{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html +{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html +{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ +{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ + +:A~? @title @creator + +:B~? What is SiSU? + +:C~? Description + +1~sisu_intro Introduction - What is SiSU? + +SiSU is a system for document markup, publishing (in multiple open standard formats) and search + +SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. + +SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. + +SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. + +Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. + +In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. + +SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. + +One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). + +The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. + +SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. + +SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. + +http://www.jus.uio.no/sisu + +% SiSU is a way of preparing, publishing, managing and searching documents. + +1~sisu_how How does sisu work? + +SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). + +1~sisu_feature_summary Summary of features + +_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. + +_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. + +_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) + +_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: + +_1* html - both as a single scrollable text and a segmented document + +_1* xhtml + +_1* XML - both in sax and dom style xml structures for further development as required + +_1* ODF - open document format, the iso standard for document storage + +_1* LaTeX - used to generate pdf + +_1* pdf (via LaTeX) + +_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) + +Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) + +_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. + +_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. + +_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts + +_* SQL search aside, documents are generated as required and static once generated. + +_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) + +_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs + +_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs + +_* generated document outputs may automatically be posted to remote sites. + +_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. + +_* as a developers tool it is flexible and extensible + +Syntax highlighting for SiSU markup is available for a number of text editors. + +SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content + +i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... + +the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. + diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi new file mode 100644 index 00000000..9a2e2ddd --- /dev/null +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi @@ -0,0 +1,133 @@ +% SiSU 0.58 + +@title: SiSU + +@subtitle: Commands + +@creator: Ralph Amissah + +@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +@type: information + +@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search + +@date.created: 2002-08-28 + +@date.issued: 2002-08-28 + +@date.available: 2002-08-28 + +@date.modified: 2007-09-16 + +@date: 2007-09-16 + +@level: new=C; break=1; num_top=1 + +@skin: skin_sisu_manual + +@bold: /Gnu|Debian|Ruby|SiSU/ + +@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ +{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html +{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ +{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ +{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html +{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html +{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html +{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ +{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ + +:A~? @title @creator + +:B~? What is SiSU? + +:C~? Description + +1~sisu_intro Introduction - What is SiSU? + +SiSU is a system for document markup, publishing (in multiple open standard formats) and search + +SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. + +SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. + +SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. + +Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. + +In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. + +SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. + +One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). + +The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. + +SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. + +SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. + +http://www.jus.uio.no/sisu + +% SiSU is a way of preparing, publishing, managing and searching documents. + +1~sisu_how How does sisu work? + +SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). + +1~sisu_feature_summary Summary of features + +_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. + +_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. + +_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) + +_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: + +_1* html - both as a single scrollable text and a segmented document + +_1* xhtml + +_1* XML - both in sax and dom style xml structures for further development as required + +_1* ODF - open document format, the iso standard for document storage + +_1* LaTeX - used to generate pdf + +_1* pdf (via LaTeX) + +_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) + +Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) + +_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. + +_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. + +_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts + +_* SQL search aside, documents are generated as required and static once generated. + +_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) + +_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs + +_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs + +_* generated document outputs may automatically be posted to remote sites. + +_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. + +_* as a developers tool it is flexible and extensible + +Syntax highlighting for SiSU markup is available for a number of text editors. + +SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content + +i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... + +the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. + diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssm b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssm new file mode 100644 index 00000000..a0c25a78 --- /dev/null +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssm @@ -0,0 +1,58 @@ +% SiSU 0.58 + +@title: SiSU + +@subtitle: Commands + +@creator: Ralph Amissah + +@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +@type: information + +@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search + +@date.created: 2002-08-28 + +@date.issued: 2002-08-28 + +@date.available: 2002-08-28 + +@date.modified: 2007-09-16 + +@date: 2007-09-16 + +@level: new=C; break=1; num_top=1 + +@skin: skin_sisu_manual + +@bold: /Gnu|Debian|Ruby|SiSU/ + +@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ +{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html +{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ +{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ +{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html +{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html +{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html +{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ +{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ + +:A~? @title @creator + +:B~? What is SiSU? + +:C~? Description + +<< |sisu_introduction.ssi|@|^| + +<< |sisu_how.ssi|@|^| + +<< |sisu_short_feature_summary.ssi|@|^| + +<< |sisu_help.sst|@|^| + +<< |sisu_help_sources.sst|@|^| + +<< |sisu_help_sources.sst|@|^| diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.sst deleted file mode 100644 index 9a2e2ddd..00000000 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.sst +++ /dev/null @@ -1,133 +0,0 @@ -% SiSU 0.58 - -@title: SiSU - -@subtitle: Commands - -@creator: Ralph Amissah - -@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 - -@type: information - -@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search - -@date.created: 2002-08-28 - -@date.issued: 2002-08-28 - -@date.available: 2002-08-28 - -@date.modified: 2007-09-16 - -@date: 2007-09-16 - -@level: new=C; break=1; num_top=1 - -@skin: skin_sisu_manual - -@bold: /Gnu|Debian|Ruby|SiSU/ - -@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ -{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html -{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU -{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ -{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ -{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html -{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html -{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html -{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ -{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ - -:A~? @title @creator - -:B~? What is SiSU? - -:C~? Description - -1~sisu_intro Introduction - What is SiSU? - -SiSU is a system for document markup, publishing (in multiple open standard formats) and search - -SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. - -SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. - -SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. - -Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. - -In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. - -SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. - -One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). - -The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. - -SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. - -SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. - -http://www.jus.uio.no/sisu - -% SiSU is a way of preparing, publishing, managing and searching documents. - -1~sisu_how How does sisu work? - -SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). - -1~sisu_feature_summary Summary of features - -_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. - -_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. - -_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) - -_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: - -_1* html - both as a single scrollable text and a segmented document - -_1* xhtml - -_1* XML - both in sax and dom style xml structures for further development as required - -_1* ODF - open document format, the iso standard for document storage - -_1* LaTeX - used to generate pdf - -_1* pdf (via LaTeX) - -_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) - -Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) - -_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. - -_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. - -_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts - -_* SQL search aside, documents are generated as required and static once generated. - -_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) - -_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs - -_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs - -_* generated document outputs may automatically be posted to remote sites. - -_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. - -_* as a developers tool it is flexible and extensible - -Syntax highlighting for SiSU markup is available for a number of text editors. - -SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content - -i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... - -the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. - diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm index 41d154a2..4a4ecef8 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm @@ -43,7 +43,11 @@ :B~ What is SiSU? -<< |sisu_introduction.sst|@|^| +<< |sisu_introduction.ssi|@|^| + +<< |sisu_how.ssi|@|^| + +<< |sisu_short_feature_summary.ssi|@|^| << |sisu_help.sst|@|^| diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst index f78f470b..81e5f217 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst @@ -28,6 +28,10 @@ @bold: /Gnu|Debian|Ruby|SiSU/ +@man: 8; +name=sisu - package to install what sisu needs to generate pdf (latex to pdf dependency component) +synopsis=sisu -pv [filename/wildcard ] + @links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ { Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst index 3f61e728..09cb2785 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst @@ -28,6 +28,11 @@ @bold: /Gnu|Debian|Ruby|SiSU/ +@man: 8; +name=sisu - package to install what sisu needs to to populate a postgresql database (postgresql dependency component) +synopsis=sisu -Dv [filename/wildcard ] +sisu -Dv [instruction] + @links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ { Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi new file mode 100644 index 00000000..9a2e2ddd --- /dev/null +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi @@ -0,0 +1,133 @@ +% SiSU 0.58 + +@title: SiSU + +@subtitle: Commands + +@creator: Ralph Amissah + +@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +@type: information + +@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search + +@date.created: 2002-08-28 + +@date.issued: 2002-08-28 + +@date.available: 2002-08-28 + +@date.modified: 2007-09-16 + +@date: 2007-09-16 + +@level: new=C; break=1; num_top=1 + +@skin: skin_sisu_manual + +@bold: /Gnu|Debian|Ruby|SiSU/ + +@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ +{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html +{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ +{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ +{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html +{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html +{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html +{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ +{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ + +:A~? @title @creator + +:B~? What is SiSU? + +:C~? Description + +1~sisu_intro Introduction - What is SiSU? + +SiSU is a system for document markup, publishing (in multiple open standard formats) and search + +SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. + +SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. + +SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. + +Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. + +In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. + +SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. + +One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). + +The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. + +SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. + +SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. + +http://www.jus.uio.no/sisu + +% SiSU is a way of preparing, publishing, managing and searching documents. + +1~sisu_how How does sisu work? + +SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). + +1~sisu_feature_summary Summary of features + +_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. + +_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. + +_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) + +_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: + +_1* html - both as a single scrollable text and a segmented document + +_1* xhtml + +_1* XML - both in sax and dom style xml structures for further development as required + +_1* ODF - open document format, the iso standard for document storage + +_1* LaTeX - used to generate pdf + +_1* pdf (via LaTeX) + +_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) + +Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) + +_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. + +_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. + +_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts + +_* SQL search aside, documents are generated as required and static once generated. + +_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) + +_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs + +_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs + +_* generated document outputs may automatically be posted to remote sites. + +_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. + +_* as a developers tool it is flexible and extensible + +Syntax highlighting for SiSU markup is available for a number of text editors. + +SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content + +i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... + +the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. + diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst index 74f8c84f..a9252056 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst @@ -28,6 +28,11 @@ @bold: /Gnu|Debian|Ruby|SiSU/ +@man: 8; +name=sisu - package to install what sisu needs to to populate a postgresql database (postgresql dependency component) +synopsis=sisu -dv [filename/wildcard ] +sisu -dv [instruction] + @links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ { Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi new file mode 100644 index 00000000..9a2e2ddd --- /dev/null +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi @@ -0,0 +1,133 @@ +% SiSU 0.58 + +@title: SiSU + +@subtitle: Commands + +@creator: Ralph Amissah + +@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +@type: information + +@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search + +@date.created: 2002-08-28 + +@date.issued: 2002-08-28 + +@date.available: 2002-08-28 + +@date.modified: 2007-09-16 + +@date: 2007-09-16 + +@level: new=C; break=1; num_top=1 + +@skin: skin_sisu_manual + +@bold: /Gnu|Debian|Ruby|SiSU/ + +@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ +{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html +{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ +{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ +{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html +{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html +{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html +{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ +{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ + +:A~? @title @creator + +:B~? What is SiSU? + +:C~? Description + +1~sisu_intro Introduction - What is SiSU? + +SiSU is a system for document markup, publishing (in multiple open standard formats) and search + +SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. + +SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. + +SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. + +Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. + +In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. + +SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. + +One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). + +The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. + +SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. + +SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. + +http://www.jus.uio.no/sisu + +% SiSU is a way of preparing, publishing, managing and searching documents. + +1~sisu_how How does sisu work? + +SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). + +1~sisu_feature_summary Summary of features + +_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. + +_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. + +_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) + +_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: + +_1* html - both as a single scrollable text and a segmented document + +_1* xhtml + +_1* XML - both in sax and dom style xml structures for further development as required + +_1* ODF - open document format, the iso standard for document storage + +_1* LaTeX - used to generate pdf + +_1* pdf (via LaTeX) + +_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) + +Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) + +_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. + +_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. + +_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts + +_* SQL search aside, documents are generated as required and static once generated. + +_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) + +_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs + +_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs + +_* generated document outputs may automatically be posted to remote sites. + +_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. + +_* as a developers tool it is flexible and extensible + +Syntax highlighting for SiSU markup is available for a number of text editors. + +SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content + +i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... + +the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. + diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst index c7d450f2..e9454cba 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst @@ -28,6 +28,10 @@ @bold: /Gnu|Debian|Ruby|SiSU/ +@man: 8; +name=sisu - documents: structuring, publishing in multiple formats, and search; +synopsis=sisu -W + @links: { Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU { SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ diff --git a/data/sisu/conf/editor-syntax-etc/vim/syntax/sisu.vim b/data/sisu/conf/editor-syntax-etc/vim/syntax/sisu.vim index c1a44ee5..791d0c47 100644 --- a/data/sisu/conf/editor-syntax-etc/vim/syntax/sisu.vim +++ b/data/sisu/conf/editor-syntax-etc/vim/syntax/sisu.vim @@ -42,10 +42,8 @@ syn match sisu_error contains=sisu_error "]" "% url/link syn region sisu_link contains=sisu_error,sisu_error_wspace matchgroup=sisu_action start="^<<\s*|[a-zA-Z0-9^._-]\+|@|[a-zA-Z0-9^._-]\+|"rs=s+2 end="$" "% header -syn region sisu_header_content contains=sisu_error,sisu_error_wspace,sisu_content_alt,sisu_link,sisu_linked,sisu_break matchgroup=sisu_header start="^0\~\(\S\+\|[^-]\)" end="$" -syn region sisu_header_content contains=sisu_error,sisu_error_wspace,sisu_content_alt,sisu_link,sisu_linked,sisu_break matchgroup=sisu_header start="^0\~\(tags\?\|date\)\s\+"rs=e-1 end="\n$" -syn region sisu_header_content contains=sisu_error,sisu_error_wspace,sisu_content_alt,sisu_link,sisu_linked,sisu_break matchgroup=sisu_header start="^@\S\+:[+-]\?\s"rs=e-1 end="$" -syn region sisu_header_content contains=sisu_error,sisu_error_wspace,sisu_content_alt,sisu_link,sisu_linked,sisu_break matchgroup=sisu_header start="^@\(tags\?\|date\):\s\+"rs=e-1 end="\n$" +syn region sisu_header_content contains=sisu_error,sisu_error_wspace,sisu_content_alt,sisu_link,sisu_linked,sisu_break matchgroup=sisu_header start="^0\~\(\S\+\|[^-]\)" end="\n$" +syn region sisu_header_content contains=sisu_error,sisu_error_wspace,sisu_content_alt,sisu_link,sisu_linked,sisu_break matchgroup=sisu_header start="^@\S\+:[+-]\?\s"rs=e-1 end="\n$" "% headings syn region sisu_heading contains=sisu_mark_endnote,sisu_content_endnote,sisu_marktail,sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_ocn,sisu_error,sisu_error_wspace matchgroup=sisu_structure start="^\([1-8]\|:\?[A-C]\)\~\(\S\+\|[^-]\)" end="$" "% grouped text -- cgit v1.2.3 From 7d9fced3aee0c451031e57ffd01086ed42d5e428 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 29 Sep 2007 10:10:13 +0100 Subject: sisu documentation related --- .../sisu/sisu_markup_samples/sisu_manual/sisu.ssm | 8 +-- .../sisu_manual/sisu_interesting_to_whom.ssi | 54 ++++++++++++++++ .../sisu_manual/sisu_introduction.ssi | 58 ----------------- .../sisu_manual/sisu_manual.ssm | 4 ++ .../sisu_manual/sisu_work_needed_and_wishlist.ssi | 75 ++++++++++++++++++++++ 5 files changed, 137 insertions(+), 62 deletions(-) create mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi create mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm index 33fbc344..322b3620 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm @@ -18,9 +18,9 @@ @date.available: 2002-08-28 -@date.modified: 2007-08-30 +@date.modified: 2007-09-29 -@date: 2007-08-30 +@date: 2007-09-29 @level: new=C; break=1; num_top=1 @@ -51,12 +51,12 @@ sisu [-CcFLSVvW] << |sisu_introduction.ssi|@|^| -<< |sisu_help.sst|@|^| - % :B~? SiSU Commands << |sisu_commands.sst|@|^| +<< |sisu_help.sst|@|^| + % :B~? SiSU Markup << |sisu_markup.sst|@|^| diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi new file mode 100644 index 00000000..b8e5a5d6 --- /dev/null +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi @@ -0,0 +1,54 @@ +% SiSU 0.58 + +@title: SiSU + +@subtitle: Who Might Be Interested + +@creator: Ralph Amissah + +@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +@type: information + +@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search + +@date.created: 2002-08-28 + +@date.issued: 2002-08-28 + +@date.available: 2002-08-28 + +@date.modified: 2007-09-16 + +@date: 2007-09-16 + +@level: new=C; break=1; num_top=1 + +@skin: skin_sisu_manual + +@bold: /Gnu|Debian|Ruby|SiSU/ + +@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ +{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html +{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ +{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ +{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html +{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html +{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html +{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ +{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ + +:A~? @title @creator + +:B~? Who might SiSU interest? + +1~sisu_interest Who might be interested in the SiSU feature set? + +SiSU is most likely to be of interest to people who are working with medium to large volumes of published texts that would like to have the presented in a uniform way that is searchable (either using sisu database integration or an appropriate indexing tool), with the possibility of multiple alternative output formats that may be added to and upgraded/updated over time. SiSU should be of interest to institutions/ organisations/ governments/ individuals with document collections and some technical knowhow that are interested in: + +_* long term maintenance and reducing downstream/future costs of maintaining those document sets for which SiSU is suited. + +_* the ability to output multiple standard format outputs for various purposes. + +_* the implications for search offered diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi index 9a2e2ddd..53301848 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi @@ -73,61 +73,3 @@ http://www.jus.uio.no/sisu % SiSU is a way of preparing, publishing, managing and searching documents. -1~sisu_how How does sisu work? - -SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). - -1~sisu_feature_summary Summary of features - -_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. - -_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. - -_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) - -_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: - -_1* html - both as a single scrollable text and a segmented document - -_1* xhtml - -_1* XML - both in sax and dom style xml structures for further development as required - -_1* ODF - open document format, the iso standard for document storage - -_1* LaTeX - used to generate pdf - -_1* pdf (via LaTeX) - -_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) - -Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) - -_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. - -_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. - -_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts - -_* SQL search aside, documents are generated as required and static once generated. - -_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) - -_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs - -_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs - -_* generated document outputs may automatically be posted to remote sites. - -_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. - -_* as a developers tool it is flexible and extensible - -Syntax highlighting for SiSU markup is available for a number of text editors. - -SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content - -i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... - -the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. - diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm index 4a4ecef8..0aab18c8 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_manual.ssm @@ -109,6 +109,10 @@ << |sisu_faq.sst|@|^| +<< |sisu_interesting_to_whom.ssi|@|^| + +<< |sisu_work_needed_and_wishlist.ssi|@|^| + << |sisu_syntax_highlighting.sst|@|^| << |sisu_help_sources.sst|@|^| diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi new file mode 100644 index 00000000..de9033e8 --- /dev/null +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi @@ -0,0 +1,75 @@ +% SiSU 0.58 + +@title: SiSU + +@subtitle: Work Needed and Wishlist + +@creator: Ralph Amissah + +@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +@type: information + +@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search + +@date.created: 2002-08-28 + +@date.issued: 2002-08-28 + +@date.available: 2002-08-28 + +@date.modified: 2007-09-16 + +@date: 2007-09-16 + +@level: new=C; break=1; num_top=1 + +@skin: skin_sisu_manual + +@bold: /Gnu|Debian|Ruby|SiSU/ + +@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ +{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html +{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ +{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ +{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html +{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html +{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html +{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ +{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ + +:A~? @title @creator + +:B~? Work Needed and Wishlist + +1~sisu_work_needed Work Needed + +SiSU is fairly mature and for most purposes the syntax and what it is supposed to do is clear. For the most part additions and changes are minor and backward compatible, (in particular there may be things of interest that to be able to achieve will require additions to the syntax). + +_* Amongst the most requested features is a way to represent and extract bibliographies from scholarly and other writings. This involves an extension of sisu markup syntax and a new module to extract the bibliography. + +_* Integration of postgresql tsearch2 / gin indexing, (which currently needs to be done manually, and) which has been waiting for the integration of tsearch2 / gin into Postgresql main, which is supposed to occur in Postgresql 8.3 + +1~sisu_wishlist Wishlist + +SiSU provides a lot of "plumbing" and is readily usable as a tool by those comfortable with marking up documents with an editor. The syntax is fairly easy to learn, especially the subset required to start using SiSU effectively. + +SiSU might also be of interest to developers interested in: + +_* experimenting with the search implications offered + +_* producing additional output formats + +_* producing conversion tools + +_* producing input interfaces, (experimenting with additional interfaces for producing sisu source documents) + +Several tools that are of interest would come under the heading interface and conversion. Amongst others, the following are of interest: + +_* Converters from various document formats, such as Open Document Text (ODF), MS Word(TM) and Word Perfect(TM), even html. The problem here is one of the most important things for SiSU is to be able to recognise the structure of a document, and many documents prepared in other formats have not been prepared strictly with a view to representing structure, but appearance - so heading levels may be "painted" to look right rather than have the correct structural representation. Even if conversion is not perfect this may serve as a first step in assisting in conversion of documents to SiSU for those with legacy document sets that they would like to have in sisu format. (once in SiSU it is easier to get out in various other formats as this is what sisu does, within the constraints of the information that sisu uses to generate output) + +_* The possibility to save directly from from various word processors, and possibly templates within them to assist in making sure the document structure is "understood" by SiSU. + +_* Web interface/front-end, a form like front end for the writing or submission of sisu documents to a server which uses SiSU to generate output. Headers could be made available as separate small entry forms with help provided to explain where they might be used. Apart from the most important headers such as title, author, date and possibly subject the remainder of the header forms could be placed after the form for substantive content. This would offer a more Web 2.0 like approach to the use of SiSU and the possibility of using it for collaborative editing of content (possibly for documents that are to be finalised/published as the citation system is most suited to published works). [Collaborative editing is currently possible through use of a collaborative editor such as Gobby which makes use of the Obby protocol]. + -- cgit v1.2.3 From 7b09e65205a33ad453e00df27a35388e62a732fb Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 29 Sep 2007 15:26:33 +0100 Subject: prepare documentation for use ... --- .../sisu/sisu_markup_samples/sisu_manual/sisu.ssm | 3 +- .../sisu_manual/sisu_commands.sst | 3 + .../sisu_manual/sisu_complete.sst | 2 +- .../sisu_manual/sisu_config.ssi | 6 +- .../sisu_manual/sisu_download.ssi | 2 +- .../sisu_markup_samples/sisu_manual/sisu_help.sst | 20 +-- .../sisu_markup_samples/sisu_manual/sisu_how.ssi | 89 +----------- .../sisu_manual/sisu_installation.ssi | 2 +- .../sisu_manual/sisu_interesting_to_whom.ssi | 2 +- .../sisu_manual/sisu_introduction.ssi | 16 +-- .../sisu_manual/sisu_markup.sst | 150 ++++++++++++--------- .../sisu_markup_samples/sisu_manual/sisu_pdf.sst | 2 +- .../sisu_manual/sisu_postgresql.sst | 2 +- .../sisu_manual/sisu_search_cgi.ssi | 2 +- .../sisu_manual/sisu_short_feature_summary.ssi | 34 +---- .../sisu_markup_samples/sisu_manual/sisu_sql.ssi | 2 +- .../sisu_manual/sisu_sqlite.sst | 2 +- .../sisu_manual/sisu_summary_of_features.ssi | 133 ------------------ .../sisu_manual/sisu_webrick.sst | 2 +- .../sisu_manual/sisu_work_needed_and_wishlist.ssi | 4 + 20 files changed, 127 insertions(+), 351 deletions(-) delete mode 100644 data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm index 322b3620..1a71f299 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu.ssm @@ -26,8 +26,7 @@ @skin: skin_sisu_manual -@man: 8; -name=sisu - documents: structuring, publishing in multiple formats, and search; +@man: name=sisu - documents: markup, structuring, publishing in multiple standard formats, and search; synopsis=sisu [-abcDdFHhIiMmNnopqRrSsTtUuVvwXxYyZz0-9] [filename/wildcard ] sisu [-Ddcv] [instruction] sisu [-CcFLSVvW] diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_commands.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_commands.sst index 41cac0a2..3d4d86aa 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_commands.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_commands.sst @@ -102,6 +102,9 @@ produces html (with hardlinks i.e. with name suffixes in links/local urls). html !_ -I [filename/wildcard]
produces texinfo and info file, (view with pinfo). +!_ -i [filename/wildcard]
+produces man page of file, not suitable for all outputs. + !_ -L
prints license information. diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst index 75252e89..61019cb0 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_complete.sst @@ -28,7 +28,7 @@ @bold: /Gnu|Debian|Ruby|SiSU/ -@man: 8; +@man: 7; name=sisu - documents: structuring, publishing in multiple formats, and search; synopsis=package for the installation of the whole of sisu with all its dependencies diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_config.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_config.ssi index 7b514430..3696f362 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_config.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_config.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Search +@subtitle: Configuration @creator: Ralph Amissah @@ -41,9 +41,7 @@ :A~? Configuration -:B~? Configuration - -:C~? Configure Environment +:B~? Configure Environment 1~config Configuration diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_download.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_download.ssi index 68010c60..5f12c1ad 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_download.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_download.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Download, Stable and Development Branches +@subtitle: Download @creator: Ralph Amissah diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help.sst index b79e2cd7..2fd78e3c 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_help.sst @@ -87,36 +87,30 @@ _1 /usr/share/doc/sisu/html/ _1 ./data/doc/sisu/html/ -The SiSU man pages can be viewed online at:~{ generated from source using rman
http://polyglotman.sourceforge.net/rman.html
With regard to SiSU man pages the formatting generated for markup syntax is not quite right, for that you might prefer the links under:
http://www.jus.uio.no/sample }~ - An online version of the sisu man page is available here: _* {~^ various sisu man pages }http://www.jus.uio.no/sisu/man/ _* {~^ sisu.1 }http://www.jus.uio.no/sisu/man/sisu.1.html -_* {~^ sisu.8 }http://www.jus.uio.no/sisu/man/sisu.8.html - -_* {~^ sisu_examples.1 }http://www.jus.uio.no/sisu/man/sisu_examples.1.html - -_* {~^ sisu_webrick.1 }http://www.jus.uio.no/sisu/man/sisu_webrick.1.html - 2~ SiSU built-in interactive help -This is particularly useful when current installation information is obtained as the interactive help is able to provide information on your sisu configuration and setup. +This is particularly useful for getting the current sisu setup/environment information: _1 sisu --help _1 sisu --help [subject] -_2 sisu --help env [for feedback on the way your system is setup with regard to sisu] - -_2 sisu -V [same as above command] - _2 sisu --help commands _2 sisu --help markup +_2 sisu --help env [for feedback on the way your system is setup with regard to sisu] + +_1 sisu -V [environment information, same as above command] + +_1 sisu (on its own provides version and some help information) + Apart from real-time information on your current configuration the SiSU manual and man pages are likely to contain more up-to-date information than the sisu interactive help (for example on commands and markup). NOTE: Running the command sisu (alone without any flags, filenames or wildcards) brings up the interactive help, as does any sisu command that is not recognised. Enter to escape. diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi index 9a2e2ddd..58f6840c 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_how.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Commands +@subtitle: How Does SiSU Work? @creator: Ralph Amissah @@ -41,93 +41,8 @@ :A~? @title @creator -:B~? What is SiSU? - -:C~? Description - -1~sisu_intro Introduction - What is SiSU? - -SiSU is a system for document markup, publishing (in multiple open standard formats) and search - -SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. - -SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. - -SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. - -Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. - -In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. - -SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. - -One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). - -The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. - -SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. - -SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. - -http://www.jus.uio.no/sisu - -% SiSU is a way of preparing, publishing, managing and searching documents. +:B~? Description 1~sisu_how How does sisu work? SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). - -1~sisu_feature_summary Summary of features - -_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. - -_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. - -_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) - -_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: - -_1* html - both as a single scrollable text and a segmented document - -_1* xhtml - -_1* XML - both in sax and dom style xml structures for further development as required - -_1* ODF - open document format, the iso standard for document storage - -_1* LaTeX - used to generate pdf - -_1* pdf (via LaTeX) - -_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) - -Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) - -_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. - -_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. - -_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts - -_* SQL search aside, documents are generated as required and static once generated. - -_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) - -_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs - -_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs - -_* generated document outputs may automatically be posted to remote sites. - -_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. - -_* as a developers tool it is flexible and extensible - -Syntax highlighting for SiSU markup is available for a number of text editors. - -SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content - -i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... - -the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. - diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_installation.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_installation.ssi index 673842e0..931bb17d 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_installation.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_installation.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Downloads, Stable and Development Branches +@subtitle: Installation @creator: Ralph Amissah diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi index b8e5a5d6..1ee6551c 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_interesting_to_whom.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Who Might Be Interested +@subtitle: Who Might Be Interested? @creator: Ralph Amissah diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi index 53301848..a9b148be 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_introduction.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Commands +@subtitle: Introduction @creator: Ralph Amissah @@ -47,21 +47,19 @@ 1~sisu_intro Introduction - What is SiSU? -SiSU is a system for document markup, publishing (in multiple open standard formats) and search +SiSU is a framework for document structuring, publishing (in multiple open standard formats) and search, comprising of: (a) a lightweight document structure and presentation markup syntax; and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs (including the population of sql databases) that (can) share a common numbering system for the citation of text within a document. -SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. +SiSU is developed under an open source, software libre license (GPL3). Its use case for development is to cope with medium to large document sets with evolving markup related technologies, which should be prepared once, and for which you want multiple output formats that can be updated and a common mechanism for cross-output-format citation, and search. -SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. +SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. -SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. - -Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. +Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds (to take advantage of the strengths of different ways of representing documents) various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. -SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. +SiSU works with an abstraction of the document based on its structure which is comprised of its structure (or frame)~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not well suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. -One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). +One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used, updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006 when it became an ISO standard for office applications and the archival of documents), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_markup.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_markup.sst index 13faa7db..7b2aa7be 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_markup.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_markup.sst @@ -289,6 +289,13 @@ name of taxonomy/vocabulary/wordlist to use against document skin_doc_[name_of_desired_document_skin]
skins change default settings related to the appearance of documents generated, such as the urls of the home site, and the icon/logo for the document or site. +!_ @man: 8;
+name=sisu - documents: markup, structuring, publishing in multiple standard formats, and search;
+synopsis=sisu [-abcDdFHhIiMmNnopqRrSsTtUuVvwXxYyZz0-9] [filename/wildcard ]
+sisu [-Ddcv] [instruction]
+sisu [-CcFLSVvW]
+the man page category number (default 1) and special tags used in preparing man page headings + !_ @links: \{ SiSU }http://www.jus.uio.no/sisu/;
\{ FSF }http://www.fsf.org @@ -1078,72 +1085,25 @@ r{filename} }code -% Composite documents - remote parts +:C~ Markup Syntax History -% Composite documents may be built from remote parts, by using the composite document syntax with a url. This makes sense using either sisu regular syntax (which is just a convenient way of marking up), or thlnk syntax, which also recognises remote urls, and permits hyperlinking ascii to the url location. +1~ Notes related to Files-types and Markup Syntax -% remote documents may be called with the thlnk syntax (or regular sisu syntax), e.g. +0.38 is substantially current, depreciated 0.16 supported, though file names were changed at 0.37 -% << -% -% -% .SH "DOCUMENT NAMING CONVENTION" -% .PP -% SiSU documents are named with the suffix -% .I ss -% followed by a third distinguishing letter, usually t for ordinary text files. -% .PP -% .I .sst -% is used by regular documents, and for most purposes is all you need to be aware of -% .PP -% .I .ssm -% suffix indicates a master or composite document, i.e. a document which requests other documents, which may have the file extension .sst or .ssi. See section on Composite Documents for information on how these are prepared. -% .PP -% .I .ssi -% indicates some prepared sisu markup information that is to be requested within master or composite document(s) and is not to be processed as a stand\-alone document. -% .PP -% .I ._sst -% and -% .I .\-sst -% suffix are reserved for SiSU processing, and indicate a secondary file. Such secondary files are created when a composite file is constructed, and when a url is provided, it is saved locally for processing, as a secondary processing file. Secondary files may be clobbered by SiSU at will, and are not a way of storing information. -% -% .I .sxs.xml -% simple xml sax, sisu markup representation -% -% .I .sxd.xml -% simple xml dom, sisu markup representation -% -% .I .sxn.xml -% simple xml node, sisu markup representation -% -% .I .sxs.xml.sst -% or -% .I .sxd.xml.sst -% or -% .I .sxn.xml.sst -% auto\-converted from a simple xml markup representation (sxs, sxd, sxn) -% .\" %% Remote Operations -% .SH "REMOTE OPERATIONS" -% .PP -% These may be of three basic types. -% .PP -% Instruction that processed files are to be copied to a remote server, using the \-r or \-R flag as part of the processing instruction. This requires previous setting up/configuration of the method to be used (eg scp assumed for \-r and rsync for \-R) and url to which these files are to be sent. * -% .PP -% The downloading of a remote file for processing using SiSU locally, which is achieved in one of two ways: -% .PP -% A processing instruction may include the url to the a remote file that is to be processed \- this will be downloaded and given a temporary file .t extension, and will be processed using SiSU locally. -% .PP -% A file may request the inclusion of a remote document within it, see comments on "Composite Documents" for the request syntax. -% .PP -% Finally SiSU may be run on a remote server, which you download marked up files to for processing. This is not really a function of the operation of SiSU, just an available possibility given that not much bandwidth is required. -% .PP -% * with regard to remote files processed locally, the \-r option, a limitation is that it is up to the user to ensure that the remote file does not have an identical filename to another, e.g. local file, that is to be processed in the same directory. So far this has not been found to happen in practice... Alternative solutions are under consideration, but it is desired that filenames be human assigned, and meaningful, so hash keys of contents for filenames are not amongst the options considered. +_* sisu --query=[sisu version [0.38] or 'history] -:C~ Markup Syntax History +provides a short history of changes to SiSU markup -1~ Notes related to Files-types and Markup Syntax +!_ 0.57 +(2007w34/4) +SiSU 0.57 is the same as 0.42 with the introduction of some a shortcut to use the headers @title and @creator in the first heading [expanded using the contents of the headers @title: and @author:] -0.38 is substantially current, depreciated 0.16 supported, though file names were changed at 0.37 +code{ + +:A~ @title by @author + +}code !_ 0.52 (2007w14/6) @@ -1165,14 +1125,75 @@ skins changed (simplified), markup unchanged (2006w27/4) * (asterisk) type endnotes, used e.g. in relation to author +SiSU 0.42 is the same as 0.38 with the introduction of some additional endnote types, + +Introduces some variations on endnotes, in particular the use of the asterisk + +code{ + +~{* for example for describing an author }~ and ~{** for describing a second author }~ + +}code + +* for example for describing an author + +** for describing a second author + +and + +code{ + +~[* my note ]~ or ~[+ another note ]~ + +}code + +which numerically increments an asterisk and plus respectively + +*1 my note ++1 another note + !_ 0.38 (2006w15/7) introduced new/alternative notation for headers, e.g. @title: (instead of 0\~title), and accompanying document structure markup, :A,:B,:C,1,2,3 (maps to previous 1,2,3,4,5,6) +SiSU 0.38 introduced alternative experimental header and heading/structure markers, + +code{ + +@headername: and headers :A~ :B~ :C~ 1~ 2~ 3~ + +}code + +as the equivalent of: + +code{ + +0~headername and headers 1~ 2~ 3~ 4~ 5~ 6~ + +}code + +The internal document markup of SiSU 0.16 remains valid and standard Though note that SiSU 0.37 introduced a new file naming convention + +SiSU has in effect two sets of levels to be considered, using 0.38 notation A-C headings/levels, pre-ordinary paragraphs /pre-substantive text, and 1-3 headings/levels, levels which are followed by ordinary text. This may be conceptualised as levels A,B,C, 1,2,3, and using such letter number notation, in effect: A must exist, optional B and C may follow in sequence (not strict) 1 must exist, optional 2 and 3 may follow in sequence i.e. there are two independent heading level sequences A,B,C and 1,2,3 (using the 0.16 standard notation 1,2,3 and 4,5,6) on the positive side: the 0.38 A,B,C,1,2,3 alternative makes explicit an aspect of structuring documents in SiSU that is not otherwise obvious to the newcomer (though it appears more complicated, is more in your face and likely to be understood fairly quickly); the substantive text follows levels 1,2,3 and it is 'nice' to do most work in those levels + !_ 0.37 (2006w09/7) introduced new file naming convention, .sst (text), .ssm (master), .ssi (insert), markup syntax unchanged +SiSU 0.37 introduced new file naming convention, using the file extensions .sst .ssm and .ssi to replace .s1 .s2 .s3 .r1 .r2 .r3 and .si + +this is captured by the following file 'rename' instruction: + +code{ + +rename 's/\.s[123]$/\.sst/' *.s{1,2,3} +rename 's/\.r[123]$/\.ssm/' *.r{1,2,3} +rename 's/\.si$/\.ssi/' *.si + +}code + +The internal document markup remains unchanged, from SiSU 0.16 + !_ 0.35 (2005w52/3) sisupod, zipped content file introduced @@ -1192,3 +1213,12 @@ header 0~links !_ 0.16 (2005w25/2) substantial changes introduced to make markup cleaner, header 0\~title type, and headings [1-6]\~ introduced, also percentage sign (%) at start of a text line as comment marker + +SiSU 0.16 (0.15 development branch) introduced the use of + +the header 0~ and headings/structure 1~ 2~ 3~ 4~ 5~ 6~ + +in place of the 0.1 header, heading/structure notation + +SiSU 0.1 headers and headings structure represented by +header 0{~ and headings/structure 1{ 2{ 3{ 4{~ 5{ 6{ diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst index 81e5f217..b5c17fc6 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_pdf.sst @@ -28,7 +28,7 @@ @bold: /Gnu|Debian|Ruby|SiSU/ -@man: 8; +@man: 7; name=sisu - package to install what sisu needs to generate pdf (latex to pdf dependency component) synopsis=sisu -pv [filename/wildcard ] diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst index 09cb2785..da18de01 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_postgresql.sst @@ -28,7 +28,7 @@ @bold: /Gnu|Debian|Ruby|SiSU/ -@man: 8; +@man: 7; name=sisu - package to install what sisu needs to to populate a postgresql database (postgresql dependency component) synopsis=sisu -Dv [filename/wildcard ] sisu -Dv [instruction] diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_search_cgi.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_search_cgi.ssi index 308f563f..dbad2520 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_search_cgi.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_search_cgi.ssi @@ -43,7 +43,7 @@ :B~? SiSU Search -:C~? Search +:C~? CGI Search Form 1~search_cgi Introduction diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi index 9a2e2ddd..3a12db7a 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_short_feature_summary.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Commands +@subtitle: Summary of Features @creator: Ralph Amissah @@ -45,38 +45,6 @@ :C~? Description -1~sisu_intro Introduction - What is SiSU? - -SiSU is a system for document markup, publishing (in multiple open standard formats) and search - -SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. - -SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. - -SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. - -Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. - -In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. - -SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. - -One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). - -The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. - -SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. - -SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. - -http://www.jus.uio.no/sisu - -% SiSU is a way of preparing, publishing, managing and searching documents. - -1~sisu_how How does sisu work? - -SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). - 1~sisu_feature_summary Summary of features _* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sql.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sql.ssi index 7a484909..fb742883 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sql.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sql.ssi @@ -2,7 +2,7 @@ @title: SiSU -@subtitle: Search +@subtitle: SQL and Search @creator: Ralph Amissah diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst index a9252056..5443c3f3 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_sqlite.sst @@ -28,7 +28,7 @@ @bold: /Gnu|Debian|Ruby|SiSU/ -@man: 8; +@man: 7; name=sisu - package to install what sisu needs to to populate a postgresql database (postgresql dependency component) synopsis=sisu -dv [filename/wildcard ] sisu -dv [instruction] diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi deleted file mode 100644 index 9a2e2ddd..00000000 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_summary_of_features.ssi +++ /dev/null @@ -1,133 +0,0 @@ -% SiSU 0.58 - -@title: SiSU - -@subtitle: Commands - -@creator: Ralph Amissah - -@rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 - -@type: information - -@subject: ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search - -@date.created: 2002-08-28 - -@date.issued: 2002-08-28 - -@date.available: 2002-08-28 - -@date.modified: 2007-09-16 - -@date: 2007-09-16 - -@level: new=C; break=1; num_top=1 - -@skin: skin_sisu_manual - -@bold: /Gnu|Debian|Ruby|SiSU/ - -@links: { SiSU Manual }http://www.jus.uio.no/sisu/sisu_manual/ -{ Book Samples and Markup Examples }http://www.jus.uio.no/sisu/SiSU/2.html -{ SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU -{ SiSU @ Freshmeat }http://freshmeat.net/projects/sisu/ -{ SiSU @ Ruby Application Archive }http://raa.ruby-lang.org/project/sisu/ -{ SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html -{ SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html -{ SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html -{ SiSU help }http://www.jus.uio.no/sisu/sisu_manual/sisu_help/ -{ SiSU help sources }http://www.jus.uio.no/sisu/sisu_manual/sisu_help_sources/ - -:A~? @title @creator - -:B~? What is SiSU? - -:C~? Description - -1~sisu_intro Introduction - What is SiSU? - -SiSU is a system for document markup, publishing (in multiple open standard formats) and search - -SiSU~{ "SiSU information Structuring Universe" or "Structured information, Serialized Units".
also chosen for the meaning of the Finnish term "sisu". }~ is a~{ Unix command line oriented }~ framework for document structuring, publishing and search, comprising of (a) a lightweight document structure and presentation markup syntax and (b) an accompanying engine for generating standard document format outputs from documents prepared in sisu markup syntax, which is able to produce multiple standard outputs that (can) share a common numbering system for the citation of text within a document. - -SiSU is developed under an open source, software libre license (GPL3). It has been developed in the context of coping with large document sets with evolving markup related technologies, for which you want multiple output formats, a common mechanism for cross-output-format citation, and search. - -SiSU both defines a markup syntax and provides an engine that produces open standards format outputs from documents prepared with SiSU markup. From a single lightly prepared document sisu custom builds several standard output formats which share a common (text object) numbering system for citation of content within a document (that also has implications for search). The sisu engine works with an abstraction of the document's structure and content from which it is possible to generate different forms of representation of the document. Significantly SiSU markup is more sparse than html and outputs which include html, LaTeX, landscape and portrait pdfs, Open Document Format (ODF), all of which can be added to and updated. SiSU is also able to populate SQL type databases at an object level, which means that searches can be made with that degree of granularity. Results of objects (primarily paragraphs and headings) can be viewed directly in the database, or just the object numbers shown - your search criteria is met in these documents and at these locations within each document. - -Source document preparation and output generation is a two step process: (i) document source is prepared, that is, marked up in sisu markup syntax and (ii) the desired output subsequently generated by running the sisu engine against document source. Output representations if updated (in the sisu engine) can be generated by re-running the engine against the prepared source. Using SiSU markup applied to a document, SiSU custom builds various standard open output formats including plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects~{ objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced. }~ (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity ( e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. - -In preparing a SiSU document you optionally provide semantic information related to the document in a document header, and in marking up the substantive text provide information on the structure of the document, primarily indicating heading levels and footnotes. You also provide information on basic text attributes where used. The rest is automatic, sisu from this information custom builds~{ i.e. the html, pdf, odf outputs are each built individually and optimised for that form of presentation, rather than for example the html being a saved version of the odf, or the pdf being a saved version of the html. }~ the different forms of output requested. - -SiSU works with an abstraction of the document based on its structure which is comprised of its frame~{ the different heading levels }~ and the objects~{ units of text, primarily paragraphs and headings, also any tables, poems, code-blocks }~ it contains, which enables SiSU to represent the document in many different ways, and to take advantage of the strengths of different ways of presenting documents. The objects are numbered, and these numbers can be used to provide a common base for citing material within a document across the different output format types. This is significant as page numbers are not suited to the digital age, in web publishing, changing a browser's default font or using a different browser means that text appears on different pages; and in publishing in different formats, html, landscape and portrait pdf etc. again page numbers are of no use to cite text in a manner that is relevant against the different output types. Dealing with documents at an object level together with object numbering also has implications for search. - -One of the challenges of maintaining documents is to keep them in a format that would allow users to use them without depending on a proprietary software popular at the time. Consider the ease of dealing with legacy proprietary formats today and what guarantee you have that old proprietary formats will remain (or can be read without proprietary software/equipment) in 15 years time, or the way the way in which html has evolved over its relatively short span of existence. SiSU provides the flexibility of outputing documents in multiple non-proprietary open formats including html, pdf~{ Specification submitted by Adobe to ISO to become a full open ISO specification
http://www.linux-watch.com/news/NS7542722606.html }~ and the ISO standard ODF.~{ ISO/IEC 26300:2006 }~ Whilst SiSU relies on software, the markup is uncomplicated and minimalistic which guarantees that future engines can be written to run against it. It is also easily converted to other formats, which means documents prepared in SiSU can be migrated to other document formats. Further security is provided by the fact that the software itself, SiSU is available under GPL3 a licence that guarantees that the source code will always be open, and free as in libre which means that that code base can be used updated and further developed as required under the terms of its license. Another challenge is to keep up with a moving target. SiSU permits new forms of output to be added as they become important, (Open Document Format text was added in 2006), and existing output to be updated (html has evolved and the related module has been updated repeatedly over the years, presumably when the World Wide Web Consortium (w3c) finalises html 5 which is currently under development, the html module will again be updated allowing all existing documents to be regenerated as html 5). - -The document formats are written to the file-system and available for indexing by independent indexing tools, whether off the web like Google and Yahoo or on the site like Lucene and Hyperestraier. - -SiSU also provides other features such as concordance files and document content certificates, and the working against an abstraction of document structure has further possibilities for the research and development of other document representations, the availability of objects is useful for example for topic maps and the commercial law thesaurus by Vikki Rogers and Al Krtizer, together with the flexibility of SiSU offers great possibilities. - -SiSU is primarily for published works, which can take advantage of the citation system to reliably reference its documents. SiSU works well in a complementary manner with such collaborative technologies as Wikis, which can take advantage of and be used to discuss the substance of content prepared in SiSU. - -http://www.jus.uio.no/sisu - -% SiSU is a way of preparing, publishing, managing and searching documents. - -1~sisu_how How does sisu work? - -SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within the substantive text of the document, which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the document header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,[2] which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones). - -1~sisu_feature_summary Summary of features - -_* sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. - -_* markup is easily readable/parsable by the human eye, (basic markup is simpler and more sparse than the most basic HTML), [this may also be converted to XML representations of the same input/source document]. - -_* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. SiSU markup is primarily an abstraction of document structure and document metadata to permit taking advantage of the basic strengths of existing alternative practical standard ways of representing documents [be that browser viewing, paper publication, sql search etc.] (html, xml, odf, latex, pdf, sql) - -_* for output produces reasonably elegant output of established industry and institutionally accepted open standard formats.[3] takes advantage of the different strengths of various standard formats for representing documents, amongst the output formats currently supported are: - -_1* html - both as a single scrollable text and a segmented document - -_1* xhtml - -_1* XML - both in sax and dom style xml structures for further development as required - -_1* ODF - open document format, the iso standard for document storage - -_1* LaTeX - used to generate pdf - -_1* pdf (via LaTeX) - -_1* sql - population of an sql database, (at the same object level that is used to cite text within a document) - -Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities)) - -_* ensuring content can be cited in a meaningful way regardless of selected output format. Online publishing (and publishing in multiple document formats) lacks a useful way of citing text internally within documents (important to academics generally and to lawyers) as page numbers are meaningless across browsers and formats. sisu seeks to provide a common way of pinpoint the text within a document, (which can be utilized for citation and by search engines). The outputs share a common numbering system that is meaningful (to man and machine) across all digital outputs whether paper, screen, or database oriented, (pdf, HTML, xml, sqlite, postgresql), this numbering system can be used to reference content. - -_* Granular search within documents. SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperestraier]. - -_* long term maintainability of document collections in a world of changing formats, having a very sparsely marked-up source document base. there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. e.g. addition of odf (open document text) module in 2006 and in future html5 output sometime in future, without modification of existing prepared texts - -_* SQL search aside, documents are generated as required and static once generated. - -_* documents produced are static files, and may be batch processed, this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations) - -_* document source (plaintext utf-8) if shared on the net may be used as input and processed locally to produce the different document outputs - -_* document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs - -_* generated document outputs may automatically be posted to remote sites. - -_* for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, HTML, XML, ODF, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to pdf, a latex processor like tetex or texlive. - -_* as a developers tool it is flexible and extensible - -Syntax highlighting for SiSU markup is available for a number of text editors. - -SiSU is less about document layout than about finding a way with little markup to be able to construct an abstract representation of a document that makes it possible to produce multiple representations of it which may be rather different from each other and used for different purposes, whether layout and publishing, or search of content - -i.e. to be able to take advantage from this minimal preparation starting point of some of the strengths of rather different established ways of representing documents for different purposes, whether for search (relational database, or indexed flat files generated for that purpose whether of complete documents, or say of files made up of objects), online viewing (e.g. html, xml, pdf), or paper publication (e.g. pdf)... - -the solution arrived at is by extracting structural information about the document (about headings within the document) and by tracking objects (which are serialized and also given hash values) in the manner described. It makes possible representations that are quite different from those offered at present. For example objects could be saved individually and identified by their hashes, with an index of how the objects relate to each other to form a document. - diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst index e9454cba..d74191c5 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_webrick.sst @@ -28,7 +28,7 @@ @bold: /Gnu|Debian|Ruby|SiSU/ -@man: 8; +@man: 7; name=sisu - documents: structuring, publishing in multiple formats, and search; synopsis=sisu -W diff --git a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi index de9033e8..2a16ac4a 100644 --- a/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi +++ b/data/doc/sisu/sisu_markup_samples/sisu_manual/sisu_work_needed_and_wishlist.ssi @@ -51,6 +51,10 @@ _* Amongst the most requested features is a way to represent and extract bibliog _* Integration of postgresql tsearch2 / gin indexing, (which currently needs to be done manually, and) which has been waiting for the integration of tsearch2 / gin into Postgresql main, which is supposed to occur in Postgresql 8.3 +_* Internationalisation always. SiSU is utf-8 and for those parts that are utf-8 friendly will work out of the box - html and postgresql for example work out of the box (and for example comfortably represent Chinese text), LaTeX and odf do not work out of the box, they need additional work for extended language sets. + +_* Refinements and improvements to output representations, some are fairly mature, others (such as manpages and info files (and even ODF) remain young. + 1~sisu_wishlist Wishlist SiSU provides a lot of "plumbing" and is readily usable as a tool by those comfortable with marking up documents with an editor. The syntax is fairly easy to learn, especially the subset required to start using SiSU effectively. -- cgit v1.2.3