Configuration Guide

The configuration of PyWrapper will be done mainly using the ConfigTool that is under development right now.

If you feel brave enough you can try editing the configuration XML files for yourself. They are all in the INSTALLDIR/config, good luck!


Datasource files

Every datasource inside the datasources folder is named after the subfolder in that directory. Each datasource subfolder contains at least 2 configuration files, a datasource preference file and a datasource metadata file.

To create a new datasource create a new folder in the datasources directory and create the two files as described below. An xml schema for each of them comes with the installation and there is an example with a small training database in the examples folder.

datasource_pref.xml

The datasource preferences are all defined in this one xml file. The xml schema for it is part of your installation at INSTALLDIR/schemas/datasource_prefs.xsd.

The latest version is always available from the subversion repository: http://trac.pywrapper.org/pywrapper/browser/branches/stable/schemas/datasource_prefs.xsd

An example of this file looks like this:

<datasource xmlns='http://www.biocase.org/schemas/datasource_prefs/1.0'>
    <settings>
        <maxElementRepetitions>100</maxElementRepetitions>
        <maxElementLevels>10</maxElementLevels>
        <maxResponseTags>500</maxResponseTags>
	<viewMetadata recNumRefreshRate="1" lastUpdateSQL="select max(DateModified) from abcdmetadata"/>
	<logging rate="daily" state="simple"/>
    </settings>
  <dbConnection>
    <user>WebUser</user>
    <password/>
    <database>biocase_training</database>
    <IP>localhost</IP>
    <DBMS>mysql</DBMS>
  </dbConnection>
  <dbStructure>
    <textEncoding>latin_1</textEncoding>
	
    <tableAlias alias='collectors' tablename='collectors'>
      <primaryKey>
        <attribute dbType='int'>ID</attribute>
      </primaryKey>
      <foreignKey target='herbariumsheets'>
        <attribute dbType='int'>UnitID</attribute>
      </foreignKey>
    </tableAlias>
	
    <tableAlias alias='herbariumsheets' tablename='herbariumsheets'>
      <primaryKey>
        <attribute dbType='int'>UnitID</attribute>
      </primaryKey>
      <foreignKey target='metadata'>
        <attribute dbType='int'>MetadataID</attribute>
      </foreignKey>
    </tableAlias>
	
    <tableAlias alias='identifications' tablename='identifications'>
      <primaryKey>
        <attribute dbType='int'>ID</attribute>
      </primaryKey>
      <foreignKey target='herbariumsheets'>
        <attribute dbType='int'>UnitID</attribute>
      </foreignKey>
    </tableAlias>
	
    <tableAlias alias='images' tablename='images'>
      <primaryKey>
        <attribute dbType='int'>ID</attribute>
      </primaryKey>
      <foreignKey target='herbariumsheets'>
        <attribute dbType='int'>UnitID</attribute>
      </foreignKey>
    </tableAlias>
	
    <tableAlias alias='metadata' tablename='abcdMetadata'>
      <primaryKey>
        <attribute dbType='int'>MetadataID</attribute>
      </primaryKey>
    </tableAlias>
  </dbStructure>
  <dbMappings>
	<schema namespace="http://digir.net/schema/conceptual/darwin/2003/1.0" location="http://digir.net/schema/conceptual/darwin/2003/1.0/darwin2.xsd">
		<concept path="/DateLastModified">
			<mapping><dbattribute attribute="DateModified" tablealias="metadata" type="date" /></mapping>
		</concept>
		<concept path="/InstitutionCode">
			<mapping><dbattribute attribute="OwnerOrganizationAbbrev" tablealias="metadata" type="text" /></mapping>
		</concept>
		<concept path="/CollectionCode">
			<mapping><dbattribute attribute="DatasetTitle" tablealias="metadata" type="text" /></mapping>
		</concept>
		<concept mandatory="false" path="/CatalogNumber">
			<mapping><dbattribute attribute="UnitID" tablealias="herbariumsheets" type="int" /></mapping>
		</concept>
		<concept mandatory="false" searchable="true" path="/ScientificName">
			<mapping><dbattribute attribute="NameAuthorYearString" tablealias="identifications" type="text" /></mapping>
		</concept>
		<concept path="/BasisOfRecord">
			<mapping><dbattribute attribute="RecordBasis" tablealias="metadata" type="text" /></mapping>
		</concept>
		<concept path="/Genus">
			<mapping><dbattribute attribute="Genus" tablealias="identifications" type="text" /></mapping>
		</concept>
		<concept path="/Family">
			<mapping><dbattribute attribute="HigherTaxon" tablealias="identifications" type="text" /></mapping>
		</concept>
		<concept path="/IdentifiedBy">
			<mapping><dbattribute attribute="IdentificationAuthor" tablealias="identifications" type="text" /></mapping>
		</concept>
		<concept path="/TypeStatus">
			<mapping><dbattribute attribute="TypeStatus" tablealias="herbariumsheets" type="text" /></mapping>
		</concept>
		<concept path="/Collector">
			<mapping><dbattribute attribute="AgentText" tablealias="collectors" type="text" /></mapping>
		</concept>
		<concept path="/Locality">
			<mapping><dbattribute attribute="LocalityText" tablealias="herbariumsheets" type="text" /></mapping>
		</concept>
		<concept path="/Country">
			<mapping><dbattribute attribute="CountryName" tablealias="herbariumsheets" type="text" /></mapping>
		</concept>
		<concept path="/Longitude">
			<mapping><dbattribute attribute="LongitudeDecimal" tablealias="herbariumsheets" type="int" /></mapping>
		</concept>
		<concept path="/Latitude">
			<mapping><dbattribute attribute="LatitudeDecimal" tablealias="herbariumsheets" type="int" /></mapping>
		</concept>
		<concept path="/Notes">
			<mapping><dbattribute attribute="Notes" tablealias="herbariumsheets" type="text" /></mapping>
		</concept>
	</schema>
	
	<schema namespace="http://digir.net/schema/conceptual/darwin/extension/curatorial/1.0" location="http://digir.net/schema/conceptual/darwin/extension/curatorial/1.0/curatorialWithDiGIRv1.3.xsd">
		<concept path="/CatalogNumberNumeric"><mapping><dbattribute attribute="UnitID" tablealias="herbariumsheets" type="int"/></mapping></concept>
		<concept path="/IdentifiedBy"><mapping><dbattribute attribute="IdentificationAuthor" tablealias="identifications" type="text"/></mapping></concept>
		<concept path="/TypeStatus"><mapping><dbattribute attribute="TypeStatus" tablealias="herbariumsheets" type="text"/></mapping></concept>
	</schema>
  </dbMappings>  
 </datasource>

It is devided into 3 sections:

Settings

<adminPassword>KarateKid?</adminPassword>

You can specify a separate password for the configtool for every datasource besides the system wide admin password. The configtool is not yet working so this setting is currently not being used.

<maxElementRepetitions>100</maxElementRepetitions>

This setting is used to limit the response. The maximmum number of repetitions allowed for any repeatable element in responses. Can also be used as a reference for paging.

<maxElementLevels>10</maxElementLevels>

The maximmum number of levels (element depth) allowed for responses.

<maxResponseTags>500</maxResponseTags>

The maximmum number of tags that can be returned in responses.

<logging rate="daily" state="simple"/>

The logging of requests and errors can be influenced here. The rate in which new log files are created can be one of the following: daily, weekly, monthly The state attribute tells the wrapper how detailed the logging should be. Select one of these: off, simple, detailed

<viewMetadata refreshRate="1" lastUpdateSQL="select max(DateModified?) from abcdmetadata"/>

The metadata of views used in capabilities and metadata responses is retrieved dynamically from the database. For performance reasons, the results of these queries are cached in files which need to be updated from time to time. The optional parameter refreshRate should be an integer giving the number of days after which the metadata should be gathered again. By default this is 1=every day. A parameters of 0 or smaller will result in an update for every query which is not advisable. The optional parameter lastUpdateSQL should be the full sql statement that is being used to get the date for the last update of the entire database. If this sql string is not supplied, this meta information is not being used in responses.

DB Connection

<user>webuser</user>

The username used to access the db server.

<password/>

The clear password used to access the db server.

<database>biocase_training</database>

The database name (or DSN for ODBC).

<IP>localhost</IP>

The IP address of the database server.

<DBMS>mysql</DBMS>

The kind of DBMS used. Currently onle postgres and mysql is accepted. But modules for Oracle, SQL Server, Sybase, ODBC, Access and others will come soon.

DB Structure

<textEncoding>latin_1</textEncoding>

The character encoding used in the database to store strings. If your DBMS uses unicode like postgres supports, this setting is irrelevant. Otherwise it can be one of the supported character encodings of python

<tableAlias alias='collectors' tablename='collectors'> ...

The structure of the database is defined now as a list of table aliases. You can declare multiple aliases for a single table in your db. This is needed when you want to refer to a table in different ways, depending upon which relation (or path in your ER diagram) you are referring to. Each table alias consists of the following parts: attribute alias: the unique name of this alias. You can chose any you like as long as its unique within the alias list. attribute tablename: the name of the real table inside your db. <primaryKey>

<attribute dbType='int'>ID</attribute> ... A primary key with a list of table columns (attributes) used as a key. Compound keys made up of multiple columns are accepted. The dbType attribute specifies the datatype used in the db in a coarse way. It can be any of these: text, int, float, date

<foreignKey target='herbariumsheets'>

A foreign key pointing to another table alias specified in the target attribute. The foreign key consists of a list of table columns (attributes). Compound keys made up of multiple columns are accepted. <attribute dbType='int'>UnitID</attribute> ...

There are some restrictions to the declaration of a db structure. If you draw a graph of the realtions, it should never contain circular relations. This will cause an error. Usually the solution is the declaration of multiple aliases for the same table which "cuts" the graph in half.

DB Mappings The bulk of the configuration is the mapping between local database columns and abstract concepts defined in conceptual schemas. This sections therefore contains a list of schemas which contain lists of mappings between their concepts and local database columns.

<schema>

A conceptual schema identified by the namespace: attribute namespace: The targetnamespace of the schema. Used as an identifier for a schema attribute location: location of the xml schema document in the form of a URI

<concept path="/DateLastModified">

A single concept of a schema with an identifier given as path. It does not need to be an xpath, but for xml schemas used as the concept definition this is recommended. attribute searchable, optional: if set to false, this concept will not be searchable. Defaults to true. attribute mandatory, optional: if set to true, the concepts is required to exist in every view. Otherwise an error is risen. Defaults to false.

<mapping>

For every concept there needs to be at least one mapping declaration. A single mapping sequence consists of one or more of the following mapping directives. If its a list, the results should be concatenated in the order of the sequence - with the disadvantage that this concept will not be searchable anymore. <dbattribute attribute="DateModified?" tablealias="metadata" type="date" /> Use values taken from a database column. Attributes tablealias and attribute specifiy the column while type is one of the 4 the basic datatypes again: text, int, float, date <literal value="Bombay" /> Use a fixed literal for the results. A literal cannot be searched, but is very handy for metadata or other small things you dont usually keep in your database. <totalMatchedRecords /> Returns the number of matched records for a given request.


metadata.xml

Each datasource contains a metadata.xml file. This file is used for the metadata response of a datasource and is defined by the TAPIR protocol schema

An example of this file looks like this:

<metadata>
    <label lang="en-us">BioCASe training</label>
    <accesspoint>http://ww3.bgbm.org/biocase/datasources/training/pywrapper.py</accesspoint>
    <abstract lang="en-us">This testing database is used in BioCASE workshops and other events. Here you have some chinese characters: 与不並丘坵垌垔.
    This database was gratefully provide by the Herbarium at the Universidad of Barcelona CeDocBiv, but the data has been greatly modified so DO NOT CONSIDER IT AS REAL DATA</abstract>
    <keywords lang="en-us">BioCASe test</keywords>
    <citation lang="en-us">Testing Citation</citation>
    <rights lang="en-us">You can distribute this database freely but only for testing purposes and training.</rights>
    <conceptualSchemas>
        <conceptualSchema namespace="http://www.tdwg.org/schemas/abcd/1.2"/>
        <conceptualSchema namespace="http://digir.net/schema/conceptual/darwin/2003/1.0"/>
    </conceptualSchemas>
    <views>
        <view name="example" dateLastUpdated="2004-11-19" numberOfRecords="951"/>
        <view name="example2" dateLastUpdated="2004-11-19" numberOfRecords="951"/>
        <view name="example3" dateLastUpdated="2004-11-19" numberOfRecords="951"/>
    </views>
     <relatedEntities>
        <entity>
            <identifier>http://ww3.bgbm.org/biocase/datasources/training/metadata.xml</identifier>
            <name lang="en-us">Botanic Garden &amp; Botanical Museum Berlin-Dahlem</name>
            <acronym>BGBM</acronym>
            <logoURL>http://www.biocase.org/images/bdi_logo.gif</logoURL>
            <role>provider</role>
            <role>host</role>
            <description lang="en-us">The Botanic Garden Berlin-Dahlem comprises an area of 126 acres and hence is one of the world´s largest and most important gardens. About 22,000 different species of plants are cultivated here. In the 39 acres of the plant-geography section - one of the biggest of its kind in the world - you can travel all the way around the Northern Hemisphere, and in the 42 acre Arboretum and taxonomy section you can get an insight into the relationship among woody and herbaceous plants. The greenhouse complex consists of 16 houses open to the public and offers the possibility to travel through tropical and subtropical vegetation. These living collections, along with the preserved collections, form the basis for the continued expansion of scientific study within the plant kingdom, with the goal to deepen the knowledge necessary for the utilization and protection of plant diversity on earth.</description>
            <relatedInformation>http://www.bgbm.org/</relatedInformation>
            <contact type="technical">
                <name>Markus Döring</name>
                <title>sysadmin</title>
                <email>m.doering@bgbm.org</email>
                <phone>+49 555-5555-55</phone>
            </contact>
            <contact type="administrative">
                <name>Javier de la Torre</name>
                <title>head of room</title>
                <email>j.torre@bgbm.org</email>
                <phone>+49 555-5555-55</phone>
            </contact>
        </entity>
    </relatedEntities>
</metadata>