123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- <?xml version="1.0" encoding="UTF-8"?>
- <!--
- sitemap_gen.py example configuration script
- This file specifies a set of sample input parameters for the
- sitemap_gen.py client.
- You should copy this file into "config.xml" and modify it for
- your server.
- ********************************************************* -->
- <!-- ** MODIFY **
- The "site" node describes your basic web site.
- Required attributes:
- base_url - the top-level URL of the site being mapped
- store_into - the webserver path to the desired output file.
- This should end in '.xml' or '.xml.gz'
- (the script will create this file)
- Optional attributes:
- verbose - an integer from 0 (quiet) to 3 (noisy) for
- how much diagnostic output the script gives
- suppress_search_engine_notify="1"
- - disables notifying search engines about the new map
- (same as the "testing" command-line argument.)
- default_encoding
- - names a character encoding to use for URLs and
- file paths. (Example: "UTF-8")
- -->
- <site
- base_url="http://wiki.dolibarr.org/"
- store_into="sitemap-wiki-bing.xml.gz"
- verbose="1"
- >
- <!-- ********************************************************
- INPUTS
- All the various nodes in this section control where the script
- looks to find URLs.
- MODIFY or DELETE these entries as appropriate for your server.
- ********************************************************* -->
- <!-- ** MODIFY or DELETE **
- "url" nodes specify individual URLs to include in the map.
- Required attributes:
- href - the URL
- Optional attributes:
- lastmod - timestamp of last modification (ISO8601 format)
- changefreq - how often content at this URL is usually updated
- priority - value 0.0 to 1.0 of relative importance in your site
- -->
- <!--
- <url href="http://www.example.com/stats?q=name" />
- <url
- href="http://www.example.com/stats?q=age"
- lastmod="2004-11-14T01:00:00-07:00"
- changefreq="yearly"
- priority="0.3"
- />
- -->
- <!-- ** MODIFY or DELETE **
- "urllist" nodes name text files with lists of URLs.
- An example file "example_urllist.txt" is provided.
- Required attributes:
- path - path to the file
- Optional attributes:
- encoding - encoding of the file if not US-ASCII
- -->
- <urllist path="urllist-wiki.txt" encoding="UTF-8" />
- <!-- ** MODIFY or DELETE **
- "directory" nodes tell the script to walk the file system
- and include all files and directories in the Sitemap.
- Required attributes:
- path - path to begin walking from
- url - URL equivalent of that path
- Optional attributes:
- default_file - name of the index or default file for directory URLs
- -->
- <!--
- <directory path="/var/www/icons" url="http://www.example.com/images/" />
- <directory
- path="/var/www/docroot"
- url="http://www.example.com/"
- default_file="index.html"
- />
- -->
- <!-- ** MODIFY or DELETE **
- "accesslog" nodes tell the script to scan webserver log files to
- extract URLs on your site. Both Common Logfile Format (Apache's default
- logfile) and Extended Logfile Format (IIS's default logfile) can be read.
- Required attributes:
- path - path to the file
- Optional attributes:
- encoding - encoding of the file if not US-ASCII
- -->
- <!--
- <accesslog path="/etc/httpd/logs/access.log" encoding="UTF-8" />
- <accesslog path="/etc/httpd/logs/access.log.0" encoding="UTF-8" />
- <accesslog path="/etc/httpd/logs/access.log.1.gz" encoding="UTF-8" />
- -->
- <!-- ** MODIFY or DELETE **
- "sitemap" nodes tell the script to scan other Sitemap files. This can
- be useful to aggregate the results of multiple runs of this script into
- a single Sitemap.
- Required attributes:
- path - path to the file
- -->
- <!--
- <sitemap path="/var/www/docroot/subpath/sitemap.xml" />
- -->
- <!-- ********************************************************
- FILTERS
- Filters specify wild-card patterns that the script compares
- against all URLs it finds. Filters can be used to exclude
- certain URLs from your Sitemap, for instance if you have
- hidden content that you hope the search engines don't find.
- Filters can be either type="wildcard", which means standard
- path wildcards (* and ?) are used to compare against URLs,
- or type="regexp", which means regular expressions are used
- to compare.
- Filters are applied in the order specified in this file.
- An action="drop" filter causes exclusion of matching URLs.
- An action="pass" filter causes inclusion of matching URLs,
- shortcutting any other later filters that might also match.
- If no filter at all matches a URL, the URL will be included.
- Together you can build up fairly complex rules.
- The default action is "drop".
- The default type is "wildcard".
- You can MODIFY or DELETE these entries as appropriate for
- your site. However, unlike above, the example entries in
- this section are not contrived and may be useful to you as
- they are.
- ********************************************************* -->
- <!-- Exclude URLs that end with a '~' (IE: emacs backup files) -->
- <filter action="drop" type="wildcard" pattern="*~" />
- <!-- Exclude URLs within UNIX-style hidden files or directories -->
- <filter action="drop" type="regexp" pattern="/\.[^/]*" />
- <filter action="drop" type="regexp" pattern="title=" />
- </site>
|