# encoding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: Default values (reset by skins) =end $latex_run=nil module SiSU_Viz require 'uri' require_relative 'sysenv' # sysenv.rb include SiSU_Env require_relative 'css' # css.rb include SiSU_Style class Skin def initialize @fonts='verdana, arial, georgia, tahoma, sans-serif, helvetica, times, roman' # 'verdana, arial, georgia, tahoma, sans-serif, helvetica, "times new roman", times, roman' @dir=SiSU_Env::InfoEnv.new @date=SiSU_Env::InfoDate.new #{@date.year} @v=SiSU_Env::InfoVersion.instance.get_version end #% glyph def glyph_bullet # • '• ' # [• flagged] end #% html def html_hardspace ' ' end #% php def php_persist end #% javascript #kxjs knxjs def js_home end def js_infobox end def js_knxjs end def js_head end def js_top end def js_sisu end def js_home end def js_sponsor end def js_books end def js_journals end def js_conferences end def js_services end def js_catalogue end def js_doc end def js_toc end def js_seg end def js_mail end def js_manifest end def js_status end def js_next end def js_prev end def js_plaintext end def js_portrait end def js_landscape end def js_pdf end def js_epub end def js_odf end def js_concordance end def js_instruments end def js_external end def js_gopher end def js_ftp end def js_law end def js_disclaimer end def semantic_tags def default { pub: 'publication', conv: 'convention', vol: 'volume', pg: 'page', cty: 'city', org: 'organization', uni: 'university', dept: 'department', fac: 'faculty', inst: 'institute', co: 'company', com: 'company', conv: 'convention', dt: 'date', y: 'year', m: 'month', d: 'day', ti: 'title', au: 'author', ed: 'editor', #editor? v: 'version', #edition n: 'name', fn: 'firstname', mn: 'middlename', ln: 'lastname', in: 'initials', qt: 'quote', ct: 'cite', ref: 'reference', ab: 'abreviation', def: 'define', desc: 'description', trans: 'translate', } end self end #% decorate def decorate_italics 'title|article|book|journal' end def decorate_bold end def decorate_uppercase 'surname' end #% semantic def sem_title #dc 1 'title' end def sem_article 'article' end def sem_book 'book' end def sem_journal 'journal' end def sem_fullname # (contains: firstname, surname) #issues arise as contains surname etc. 'fullname' end def sem_first 'first' end def sem_surname 'surname' end def sem_middle 'middle' end def sem_creator #dc 2 #==fullname (contains: firstname, surname) 'creator' end def sem_author #==fullname (contains: firstname, surname) 'author' end def sem_editor #==fullname (contains: firstname, surname) 'editor' end def sem_illustrator #==fullname (contains: firstname, surname) 'illustrator' end def sem_translator #==fullname (contains: firstname, surname) 'translator' end def sem_isbn # 10 or 13 'isbn' end def sem_isbn_10 'isbn10' end def sem_isbn_13 'isbn13' end def sem_loc # library of congress 'loc' end def sem_dewey 'dewey' end def sem_pg # project gutenberg number 'pg' end def sem_subject #dc 3 'subject' end def sem_date #dc 7 'date' end def sem_date_created 'date_created' end def sem_date_issued 'date_issued' end def sem_date_available 'date_available' end def sem_date_valid 'date_valid' end def sem_date_modified 'date_modified' end def sem_type #dc 8 'type' end def sem_description #dc 4 'description' end def sem_publisher #dc 5 'publisher' end def sem_contributor #dc 6 'contributor' end def sem_format #dc 9 'format' end def sem_identifier #dc 10 'identifier' end def sem_source #dc 11 'source' end def sem_language #dc 12 'language' end def sem_relation #dc 13 'source' end def sem_coverage #dc 14 'coverage' end def sem_rights #dc 15 'rights' end def sem_copyright 'copyright' end def sem_license 'license' end def sem_prepared_by 'prepared_by' end def sem_digitized_by 'digitized_by' end def sem_keywords 'keywords' end def sem_comments 'comments' end def sem_abstract 'abstract' end #% path def path_stylesheet_home %{ } end #% text #changed from txt to avoid naming conflicts #FOLLOW def txt_generator %{ } end def txt_generator_comment %{ } end def txt_hp ' SiSU' end def txt_hp_alias 'SiSU' end def txt_home 'SiSU' end def txt_signature # used in latex/pdf footer 'SiSU' end #% url def url_urify(uri) URI.parse(uri) end def url_sisu 'http://www.sisudoc.org/' end def url_sisudoc 'http://www.sisudoc.org' end def url_footer_signature 'http://www.sisudoc.org/' end def url_root '/sisu' #watch end def url_root_http 'http://www.sisudoc.org/' #watch end def url_home 'http://www.sisudoc.org/' # used in pdf header end def url_site #used as stub... where there are subdirectories and is different from home url_home #'http://www.sisudoc.org/' # used in pdf header end def url_txt 'www.sisudoc.org/' end def url_path_image_base #used for html image display "#{Xx[:html_relative2]}_sisu/image" end def url_path_image #used for html image display "#{Xx[:html_relative2]}_sisu/image" end def url_path_image_sys #used for html image display "#{Xx[:html_relative2]}_sisu/image_sys" end def url_path_image_epub './image' end def url_path_ebook_dir './ebook' end def url_path_ebook_images '.' end def url_promo '' end def url_promo_home '' end def url_decoration def tex_open #'{\UseTextSymbol{OML}{<}}' Dx[:url_o] end def tex_close #'{\UseTextSymbol{OML}{>}}' Dx[:url_c] end def xml_open #'<' Dx[:url_o] end def xml_close #'>' Dx[:url_c] end def txt_open '<' end def txt_close '>' end self end def rel_decoration def tex_open #'{\UseTextSymbol{OML}{<}}' Dx[:rel_o] end def tex_close #'{\UseTextSymbol{OML}{>}}' Dx[:rel_c] end def xml_open #'<' Dx[:rel_o] end def xml_close #'>' Dx[:rel_c] end def txt_open '<' end def txt_close '>' end self end #% color def color_shadow '"4"' end def color_body %{} end def color_white '"#ffffff"' end def color_black '#000000' end def color_shadow #hmmm '"4"' end def color_blue_dark '#000099' end def color_blue 'blue' end def color_blue_base '#b9d4dd' end def color_blue_ink '#003399' end def color_blue_tinge '#e3ecef' end def color_blue_grey '#8faebf' end def color_blue_murky '#437389' end def color_beige '#f1e8de' end def color_subtleglow '#dddccc' end def color_glow '#fff0c3' end def color_rose '#ffdec9' end def color_turquoise '#1c869b' end def color_grey_pale '#eeeeee' end def color_grey_medium '#cccccc' end def color_grey '#999999' end def color_yellow_light '#fff3b6' end def color_yellow '#ffde14' end def color_yellow_dark '#ffcc00' end def color_green_light '#b7d398' # #e2efd5 #b7d398 #b1c999 # '#aed19e' end def color_green '#0a8400' end def color_green_dark '#086800' end def color_ruby '#a00000' end def color_maroon '#800000' end def color_paper %{"#{color_white}"} end def color_band1 %{"#{color_white}"} end def color_band2 %{"#{color_white}"} end def color_body %{} end def color_font_face #was font WATCH "#{color_black}" end def color_surround %{"#{color_white}"} end def color_band %{"#{color_white}"} end def color_table1 'ffffcc' end def color_table2 'c0d0f0' end def color_band1 '"#ffffff"' end def color_band2 '"#ffffff"' end #% icon def icon_ico 'rb7.ico' end def icon_sisu 'sisu.png' end def icon_manifest 'b_info.png' end def icon_doc 'b_doc.png' end def icon_toc 'b_toc.png' end def icon_wmp 'b_wmp.png' end def icon_odf 'b_odf.png' end def icon_epub 'b_epub.png' end def icon_pdf 'b_pdf.png' end def icon_pdf_portrait 'b_pdf.png' end def icon_pdf_landscape 'b_pdf.png' end def icon_status 'b_status.png' end def icon_external 'b_ext.png' end def icon_external_toc 'b_ext_toc.png' end def icon_seg_toc 'b_bluebell.png' end def icon_crosslink_toc 'b_amber.png' end def icon_mail 'b_mail.png' end def icon_para 'b_para.png' end def icon_pdf 'b_pdf.png' end def icon_ftp 'b_ftp.png' end def icon_gopher 'b_gopher.png' end def icon_choice 'b_choice.png' end def icon_new 'b_new.png' end def icon_book 'b_amber.png' # b_book.png end def icon_dot_clear 'dot_clear.png' end def icon_dot_white 'dot_white.png' end def icon_dot icon_dot_white end def icon_amber 'b_amber.png' end def icon_rose 'b_rose.png' end def icon_bluebell 'b_bluebell.png' end def icon_home_button 'sisu.png' end def icon_home_banner 'sisu.png' end def icon_site 'b_home.png' end def icon_bluedot 'blueband.png' end def icon_next 'arrow_next_red.png' end def icon_previous 'arrow_prev_red.png' end def icon_up 'arrow_up_red.png' end #% font def font_fonts @fonts end def font_face %{face="#{font_fonts}"} end def font_lmtoc_face %{face="#{font_fonts}"} end def font_ebook_face %{face="#{font_fonts}"} end def font_face_lmtoc %{face="#{font_fonts}"} end def font_color 'color="#000000"' end def font_size 'size="4"' end def font_size_txt 'size="4"' end def font_size_txt_00 'size="3"' end def font_size_endnote 'size="3"' end def font_small 'size="3"' end def font_tiny 'size="2"' end #% markup def markup_italics_list #regular expression of words to be italised end def markup_bold_list #regular expression of words to be made bold 'SiSU' end def markup_make_italic if defined? italics_list \ and italics_list make={} if italics_list r=italics_list.dup x=case r when /\/i$/; 'i' else '' end r=r.gsub(/^\/(.+?)\/i?/,'\1'). gsub(/\(/,'(?:') # avoid need to escape use of brackets within regex provided m='\b(' + r + ')\b' make[:str] make[:regx]=if x =~/i/; /#{m}/i else /#{m}/ end else nil end end end def markup_make_bold if defined? bold_list \ and not bold_list.empty? make={} if bold_list r=bold_list.dup x=case r when /\/i$/; 'i' else '' end r.gsub(/^\/(.+?)\/i?/,'\1'). gsub(/\(/,'(?:') # avoid need to escape use of brackets within regex provided m='\b(' + r + ')\b' make[:str] make[:regx]=if x =~/i/; /#{m}/i else /#{m}/ end else nil end make end end #% paragraph def paragraph_txt %{

} end def paragraph_txt_00 %{

} end def paragraph_font_citation %{} end def paragraph_endnote %{

} end def paragraph_table %{

} end def paragraph_table_xml end def paragraph_tiny %{

} end def paragraph_small %{

} # keep but not used? end def paragraph_font_tiny %{} end def paragraph_font_small %{} end def paragraph_heading_1 %{

} end def paragraph_heading_1_center %{

} end #% table def table_close ' ' end def table_close_centered_table end def table_align_A end def table_align_B end def table_align_C end def table_width_1 '"100%"' end def table_width_2 '"99%"' end def table_width_3 '"94%"' end def table_width_4 '"90%"' end def table_width_txt '"94%"' end def table_width_txt_avgo '"100%"' end def table_width_txt_r '"96%"' end def table_cellpad_small_paper_margins '"6"' end def table_cellpad_paper_margins '"36"' end def table_cellpad_A '"0"' end def table_cellpad_B '"20"' end def table_cellpad_shadow '"4"' end def table_cellpad_band '"16"' end def table_cellpad_box '"20"' end def table_table_align_A '
' end def table_table_align_B '
' end def table_table_align_C ' ' end #% table_do def table_do_table_paper %{#{table_align_C}
\n

\n} end def table_do_table_surround %{
\n} end #% indent def indent_level_0 '"1%"' end def indent_level_1 '"4%"' end def indent_level_2 '"6%"' end def indent_level_3 '"8%"' end def indent_level_4 '"10%"' end #% margin def margin_num '

' end def margin_numless '' end def margin_num_css ' ' end def margin_num_header ' ' end def margin_txt_00_1 %{
} end def margin_txt_w1 %{
  } end def margin_txt_w2 %{
  } end def margin_txt_0 %{
} end def margin_txt_1 %{
} end def margin_txt_2 %{
} end def margin_txt_3 %{
} end def margin_css #unused, check '
' end #% png def png_ico %{ } end def png_sisu #check url path %{ SiSU } end def png_hp dir=SiSU_Env::InfoEnv.new #(@fns) %{ #{txt_home} } end def png_site %{@} end def png_homepage png_site end def png_nav %{Contents} end def png_manifest %{Document Manifest} end def png_doc %{Full Text} end def png_toc %{TOC linked} end def png_odf %{ODF/ODT} end def png_epub %{EPUB} end def png_pdf %{PDF} end def png_pdf_portrait %{PDF portrait} end def png_pdf_landscape %{PDF landscape} end def png_wmp %{Concordance} end def png_para %{Segment} end def png_status %{Membership status} end def png_mark %{*} end def png_doc_tiny %{Doc} end def png_toc_tiny %{TOC} end def png_status_tiny %{Status, Member States} end def png_ftp %{FTP} end def png_gopher %{Gopher} end def png_crosslink %{lateral hop} end def png_crosslink_ext %{lateral hop} end def png_home #dir=SiSU_Env::InfoEnv.new #(@fns) %{#{txt_home} -->} #%{#{txt_home} -->} end def png_home_button rel=@dir.path_rel_links.html_scroll_2 %{#{txt_home} -->} end def png_book %{Cameron May Books} end #% png_nav def png_nav_home end def png_nav_toc %{TOC} end def png_nav_doc end def png_nav_previous %{<< previous} end def png_nav_next %{next >>} end def png_nav_pre png_nav_previous end def png_nav_nxt png_nav_next end def epub_png_nav_previous %{<< previous} end def epub_png_nav_next %{next >>} end def epub_png_nav_up %{^up ^} end def epub_png_nav_pre epub_png_nav_previous end def epub_png_nav_nxt epub_png_nav_next end def epub_png_nav_toc epub_png_nav_up end def png_nav_pdf %{PDF} end def png_nav_pdf_portrait %{pdf portrait} end def png_nav_pdf_landscape %{pdf landscape} end def png_nav_dot_toc %{^} end def png_nav_dot_previous %{<} end def png_nav_dot_next %{>} end def png_nav_dot_pre png_nav_dot_previous end def png_nav_dot_nxt png_nav_dot_next end #% nav_txt def nav_txt_home %{ #{png_site}  } end def nav_txt_home_button %{ #{png_home_button}  } end def nav_txt_homepage %{  home  } end def nav_txt_toc_link %{   toc  } end def nav_txt_toc_link_verbose %{ #{png_toc}  segments' toc  } end def nav_txt_doc_link %{  scroll  } end def nav_txt_manifest #{png_manifest} document manifest %{ [ document manifest ] } end def nav_txt_concordance %{   A-Z  } end def nav_txt_previous '   << Previous   Full Text ' end def nav_txt_next %{   Next     >>   } end def nav_txt_plaintext %{   txt  } end def nav_txt_odf %{   odt  } end def nav_txt_pdfs %{ pdfs   } end def nav_txt_epub %{  epub  } end def nav_txt_pdf_portrait %{  pdf  } end def nav_txt_pdf_landscape %{  pdf  } end #% banner def banner_home %{
#{png_site}
an
( international | transnational )
commercial law & e-commerce
infrastructure monitor
} end def banner_home_guide %{
#{png_doc} LM toc #{png_doc} LM 20**
} end def banner_home_button_only %{ #{png_home_button} } end def banner_home_button #yellow_dark now white %{
#{png_home_button} #{table_close}} end def banner_home_and_index_buttons #yellow_dark now white %{
#{png_home}
 This text's sub- 
 Table of Contents 
  #{table_close}} end def banner_url_txt_sisu %{SiSU} end def banner_band #yellow_dark now white %{
#{png_home} #{table_close}} end def banner_credit_band %{
} end def banner_instrument_cover_band_scr '
' end def banner_instrument_cover_band_seg '
' end #% widget def widget_promo # Array used to build promo from list.yml and promo.yml # ['sisu_icon','sisu','sisu_search_libre','open_society','fsf','ruby'] end def widget_browsers <

If you have problems viewing pages on this site please update your browser:

#{table_close}

 Epiphany ® |

 Galeon ® |

I-Explorer ® |

Kazehakase ® |

Konqueror ® |

Mozilla Firefox ® |

Netscape ® |

Opera ® |

Safari ®

#{table_close} WOK end def widgets_open < WOK end def widget_pdfviewers < WOK end def widget_sisu_text < Output generated by #{@v[:project]} #{@v[:version]} #{@v[:date]} (#{@v[:date_stamp]})

WOK end def widget_sisu <
WOK end def widget_sisu_verbose < WOK end def widget_way_better < WOK end #% credits def credits_itl_cover_band %{

for console/text viewing:

elinks  |

links2  |

w3m

for lightweight gui (X) viewing try:

Dillo  |

 Epiphany ® |

 Galeon  |

links2 -g

#{table_close}

& for pdf viewings of this site we recommend stand alone viewers
(rather than web browser plugins):

Acrobat Reader ®  |

Evince ®

 GhostView® , GV® & GSview®  |

 Xpdf ®

#{widget_sisu_text}

SiSU

Output generated by #{@v[:project]} #{@v[:version]} #{@v[:date]} (#{@v[:date_stamp]})
#{@v[:project]} Copyright © Ralph Amissah 1997, current #{@date.year_static}. All Rights Reserved.
#{@v[:project]} is software for document structuring, publishing and search,
www.sisudoc.org/ and www.sisudoc.org
w3 since October 3 1993 ralph@amissah.com

#{@v[:project]} using:
Standard SiSU markup syntax,
Standard SiSU meta-markup syntax, and the
Standard SiSU object citation numbering and system, (object/text positioning system)
Copyright © Ralph Amissah 1997, current #{@date.year_static}. All Rights Reserved.

GPLv3

#{@v[:project]} is released under GPLv3 or later, #{url_decoration.xml_open}http://www.gnu.org/licenses/gpl.html#{url_decoration.xml_close}

#{@v[:project]}, developed using Ruby on Debian/Gnu/Linux software infrastructure, with the usual GPL (or OSS) suspects.
Better - "performance, reliability, scalability, security & total cost of ownership" [not to mention flexibility & choice] use of and adherence to open standards (where practical and fair) and it is software libré.
Get With the Future Way Better!


\@
#{txt_home}
#{banner_url_txt_sisu}
#{table_close*4}} end def credits_sisu_manifest widget_sisu_text end def credits_sisu x=%{
#{widget_sisu} #{widget_way_better}
} '' end def credits_splash end def credits_sisu_epub x=%{

EPUB generated by #{@v[:project]} v#{@v[:version]}, GPL3

} '' end #% bottom def bottom_surround %{
#{table_close}
#{table_close} #{banner_band} #{table_close}
#{table_close} #{credits_splash} #{table_close} } end end class Home < Skin def initialize @v=SiSU_Env::InfoVersion.instance.get_version @dir=SiSU_Env::InfoEnv.new @date=SiSU_Env::InfoDate.new #{@date.year} end def redirect < SiSU SiSU informtion provided at www.sisudoc.org/sisu/SiSU

If your browser supports redirection, you will be escorted there shortly. WOK end def homepage < SiSU information Structuring Universe - Structured information, Serialized Units - software for electronic texts, documents, books, digital libraries in plaintext, html, XHTML, XML, ODF (OpenDocument), EPUB, LaTeX, PDF, SQL (PostgreSQL and SQLite), and for search

SiSU >>

SiSU information Structuring Universe

Structured information, Serialized Units    <www.sisudoc.org>   or   <www.jus.uio.no/sisu/>

software for electronic texts, document collections, books, digital libraries, and search,

with "atomic search" and text positioning system (shared text citation numbering: "ocn")

outputs include: plaintext, html, XHTML, XML, ODF (OpenDocument), EPUB, LaTeX, PDF, SQL (PostgreSQL and SQLite)

SiSU

---

SiSU Manual

---

What does SiSU do? Summary

---

Book Samples and Markup Examples

sorted by Author (sisu metadata)

sorted by Topic (sisu metadata)

---

Object Citation Numbering - ocn

(a text positioning system)

---

Search - "granular"

Of interest is the ease of streaming documents to a relational database, at an object (roughly paragraph) level and the potential for increased precision in the presentation of matches that results thereby. The ability to serialise html, LaTeX, XML, SQL, (whatever) is also inherent in / incidental to the design. For a description see the abandoned U.S. provisional patent application

---

Development, SCM - Git
<http://git.sisudoc.org>

Download alternatives

---

Changelog

---

License, SiSU is licensed under GPLv3 or later

Gnu / Linux / Unix

Note: the placement of SiSU documents on the Net predate the release of SiSU.

SiSU Short Description

SiSU is a comprehensive future-proofing electronic document management system. Built-in search capabilities allow you to search across multiple documents and highlight matches in an easy-to-follow format. Paragraph numbering system allows you to cite your electronic documents in a consistent manner across multiple file formats. Multiple format outputs allow you to display your documents in plain text, PDF (portrait and horizontal), OpenDocument format, HTML, or e-book reading format (EPUB). Word mapping allows you to easily create word indexes for your documents. Future-proofing flexibility allows you to quickly adapt your documents to newer output formats as needed. All these and many other features are achieved with little or no additional work on your documents - by marking up the documents with a super simplistic markup language, leaving the SiSU engine to handle the heavy-lifting processing.

Potential users of SiSU include individual authors who want to publish their books or articles electronically to reach a broad audience, web publishers who want to provide multiple channels of access to their electronic documents, or any organizations which centrally manage a medium or large set of electronic documents, especially governmental organizations which may prefer to keep their documents in easily accessible yet non-proprietary formats.

SiSU is an Open Source project initiated and led by Ralph Amissah <ralph.amissah@gmail.com> and can be contacted via mailing list <http://lists.sisudoc.org/listinfo/sisu> at <sisu@lists.sisudoc.org>. SiSU is licensed under the GNU General Public License.

For less markup than the most elementary HTML you can have more.

SiSU - Structured information, Serialized Units for electronic documents, is an information structuring, transforming, publishing and search framework with the following features:

(i) markup syntax: (a) simpler than html, (b) mnemonic, influenced by mail/messaging/wiki markup practices, (c) human readable, and easily writable,

(ii) (a) minimal markup requirement, (b) single file marked up for multiple outputs,

notes

* documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional.

* markup is easily readable/parsed by the human eye, (basic markup is simpler and more sparse than the most basic html), [this may also be converted to XML representations of the same input/source document].

* markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions.

(iii) (a) multiple outputs primarily industry established and institutionally accepted open standard formats, include amongst others: plaintext (UTF-8); html; (structured) XML; ODF (Open Document text); EPUB; LaTeX; PDF (via LaTeX); SQL type databases (currently PostgreSQL and SQLite). Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). (b) takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities))

(iv) outputs share a common numbering system (dubbed "object citation numbering" (ocn)) that is meaningful (to man and machine) across various digital outputs whether paper, screen, or database oriented, (PDF, html, XML, EPUB, sqlite, postgresql), this numbering system can be used to reference content.

(v) SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperesteier].

(vi) use of semantic meta-tags in headers permit the addition of semantic information on documents, (the available fields are easily extended)

(vii) creates organised directory/file structure for (file-system) output, easily mapped with its clearly defined structure, with all text objects numbered, you know in advance where in each document output type, a bit of text will be found (e.g. from an SQL search, you know where to go to find the prepared html output or PDF etc.)... there is more; easy directory management and document associations, the document preparation (sub-)directory may be used to determine output (sub-)directory, the skin used, and the SQL database used,

(viii) "Concordance file" wordmap, consisting of all the words in a document and their (text/ object) locations within the text, (and the possibility of adding vocabularies),

(ix) document content certification and comparison considerations: (a) the document and each object within it stamped with an md5 hash making it possible to easily check or guarantee that the substantive content of a document is unchanged, (b) version control, documents integrated with time based source control system, default RCS or CVS with use of $Id$ tag, which SiSU checks

(x) SiSU's minimalist markup makes for meaningful "diffing" of the substantive content of markup-files,

(xi) easily skinnable, document appearance on a project/site wide, directory wide, or document instance level easily controlled/changed,

(xii) in many cases a regular expression may be used (once in the document header) to define all or part of a documents structure obviating or reducing the need to provide structural markup within the document,

(xiii) prepared files may be batch process, documents produced are static files so this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations)

(xiv) possible to pre-process, which permits: the easy creation of standard form documents, and templates/term-sheets, or; building of composite documents (master documents) from other sisu marked up documents, or marked up parts, i.e. import documents or parts of text into a main document should this be desired

there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added.

(xv) there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added: (a) modular, (thanks in no small part to Ruby) another output format required, write another module.... (b) easy to update output formats (eg html, XHTML, LaTeX/PDF produced can be updated in program and run against whole document set), (c) easy to add, modify, or have alternative syntax rules for input, should you need to,

(xvi) scalability, dependent on your file-system (ext3, Reiserfs, XFS, whatever) and on the relational database used (currently Postgresql and SQLite), and your hardware,

(xvii) only marked up files need be backed up, to secure the larger document set produced,

(xviii) document management,

(xix) Syntax highlighting for SiSU markup is available for a number of text editors.

(xx) remote operations: (a) run SiSU on a remote server, (having prepared sisu markup documents locally or on that server, i.e. this solution where sisu is installed on the remote server, would work whatever type of machine you chose to prepare your markup documents on), (b) generated document outputs may be posted by sisu to remote sites (using rsync/scp) (c) document source (plaintext utf-8) if shared on the net may be identified by its url and processed locally to produce the different document outputs.

(xxi) document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs, these may be downloaded, shared as email attachments, or processed by running sisu against them, either using a url or the filename.

(xxii) for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, html, XML, ODF, EPUB, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to PDF, a LaTeX processor like tetex or texlive.

as a developers tool it is flexible and extensible


SiSU at <www.sisudoc.org> or <www.jus.uio.no/sisu/>


idx txt <search.sisudoc.org>

SiSU ("SiSU information Structuring Universe" or "Structured information, Serialized Units"),1 is a Unix command line oriented framework for document structuring, publishing and search. Featuring minimalistic markup, multiple standard outputs, a common citation system, and granular search.

Using markup applied to a document, SiSU can produce plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects2 (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity (e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content.

How it works

SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within text which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the instruction header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,2 which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones).

1. also chosen for the meaning of the Finnish term "sisu".

2 objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced.

More information on SiSU provided at: <www.sisudoc.org/sisu/SiSU>

SiSU was developed in relation to legal documents, and is strong across a wide variety of texts (law, literature...(humanities, law and part of the social sciences)). SiSU handles images but is not suitable for formulae/ statistics, or for technical writing at this time.

SiSU has been developed and has been in use for several years. Requirements to cover a wide range of documents within its use domain have been explored.

<ralph@amissah.com>

<ralph.amissah@gmail.com>

<sisu@lists.sisudoc.org>

<http://lists.sisudoc.org/listinfo/sisu>

#{@date.year_static}

w3 since October 3 1993

WOK end def home_toc ' ' end end class Inserts end class TeX < Skin def initialize(papersize='') @papersize=papersize end def a4 def portrait def w 160 end def h 228 end def img_px 450 end self end def landscape def w 238 end def h 160 end def img_px 300 end self end self end def letter def portrait def w 166 end def h 212 end def img_px 468 end self end def landscape def w 226 end def h 166 end def img_px 290 end self end self end def legal def portrait def w 168 end def h 286 end def img_px 474 end self end def landscape def w 296 end def h 166 end def img_px 420 end self end self end def b5 def portrait def w 140 end def h 204 end def img_px 356 end self end def landscape def w 200 end def h 130 end def img_px 260 end self end self end def a5 def portrait def w 112 end def h 162 end def img_px 280 end self end def landscape def w 152 end def h 100 end def img_px 190 end self end self end def dimensions d=case @papersize when /a4/; a4 when /letter/; letter when /legal/; legal when /b5/; b5 when /a5/; a5 else a4 end end end end __END__