<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE QMRF SYSTEM "/WEB-INF/xslt/qmrf.dtd">
<QMRF author="Joint Research Centre, European Commission" contact="Joint Research Centre, European Commission" date="July 2007" email="qsardb@jrc.it" name="(Q)SAR Model Reporting Format" schema_version="0.9" url="http://ecb.jrc.ec.europa.eu/qsar/" version="1.2">
<QMRF_chapters>
<QSAR_identifier chapter="1" help="" name="QSAR identifier">
<QSAR_title chapter="1.1" help="" name="QSAR identifier (title)">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Nonlinear QSAR: artificial neural network for in vitro chromosomal &#13;
      aberration&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</QSAR_title>
<QSAR_models chapter="1.2" help="" name="Other related models">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</QSAR_models>
<QSAR_software chapter="1.3" help="" name="Software coding the model">
			
      















<software_ref idref="firstsoftware" catalog="software_catalog"/>
<software_ref idref="software_catalog_4" catalog="software_catalog"/>
</QSAR_software>
</QSAR_identifier>
<QSAR_General_information chapter="2" help="" name="General information">
<qmrf_date chapter="2.1" help="" name="Date of QMRF">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    4.06.2010&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</qmrf_date>
<qmrf_authors chapter="2.2" help="" name="QMRF author(s) and contact details">
		
      
































































<author_ref idref="firstauthor" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_3" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_4" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_5" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_6" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_7" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_8" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_9" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_10" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_11" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_12" catalog="authors_catalog"/>
<author_ref idref="authors_catalog_13" catalog="authors_catalog"/>
</qmrf_authors>
<qmrf_date_revision chapter="2.3" help="" name="Date of QMRF update(s)">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</qmrf_date_revision>
<qmrf_revision chapter="2.4" help="" name="QMRF update(s)"/>
<model_authors chapter="2.5" help="" name="Model developer(s) and contact details">
		
      









<author_ref idref="modelauthor" catalog="authors_catalog"/>
</model_authors>
<model_date chapter="2.6" help="" name="Date of model development and/or publication">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      12.04.2010&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</model_date>
<references chapter="2.7" help="" name="Reference(s) to main scientific papers and/or software package">

      
























<publication_ref idref="publications_catalog_14" number="" catalog="publications_catalog"/>
</references>
<info_availability chapter="2.8" help="" name="Availability of information about the model">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Training, selection and test sets are available. Model algorithm is &#13;
      available (snn file). &#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</info_availability>
<related_models chapter="2.9" help="" name="Availability of another QMRF for exactly the same model">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      None to date.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</related_models>
</QSAR_General_information>
<QSAR_Endpoint chapter="3" help="" name="Defining the endpoint - OECD Principle 1">
<model_species chapter="3.1" help="" name="Species">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Chinese Hamster Lung Cells&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</model_species>
<model_endpoint chapter="3.2" help="" name="Endpoint">

      









<endpoint_ref idref="endpoints_catalog_4" catalog="endpoints_catalog"/>
</model_endpoint>
<endpoint_comments chapter="3.3" help="" name="Comment on endpoint">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Chromosomal Aberration Index (indicated as CA: +1 and -1)&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Description of the in vitro chromosome aberration test:&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      The test system and its purpose are described in OECD Guideline for the &#13;
      Testing of chemicals, No. 473 (1).&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      &amp;#8220;The purpose of the in vitro chromosome aberration test is to identify &#13;
      agents that cause structural chromosome aberrations in cultured &#13;
      mammalian cells.Structural aberrations may be of two types, chromosome &#13;
      or chromatid. With the majority of chemical mutagens, induced &#13;
      aberrations are of the chromatid type, but chromosome-type aberrations &#13;
      also occur. An increase in polyploidy may indicate that a chemical has &#13;
      the potential to induce numerical aberrations. However, this guideline &#13;
      is not designed to measure numerical aberrations and is not routinely &#13;
      used for that purpose. Chromosome mutations and related events are the &#13;
      cause of many human genetic diseases and there is substantial evidence &#13;
      that chromosome mutations and related events causing alterations in &#13;
      oncogenes and tumour suppressor genes of somatic cells are involved in &#13;
      cancer induction in humans and experimental animals.&amp;#8221;&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</endpoint_comments>
<endpoint_units chapter="3.4" help="" name="Endpoint units">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    Unitless, binary property&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</endpoint_units>
<endpoint_variable chapter="3.5" help="" name="Dependent variable">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Chromosome aberration (CA) values -1 (NEG) or 1 (POS). No preprocessing &#13;
      of the original data has been done for this model.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</endpoint_variable>
<endpoint_protocol chapter="3.6" help="" name="Experimental protocol">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      All tests were performed using a Chinese Hamster Lung Cell (CHL) &#13;
      fibroblast cell line, which has been kept as a single cell sub-clone &#13;
      since 1973. This cell line has been used almost exclusively in Japan to &#13;
      test hundreds of chemicals over more than two decades, as opposed to the &#13;
      Chinese Hamster Ovary (CHO) cell lines that are more common in Europe &#13;
      and the United States. Much of the test information has been published &#13;
      in numerous scientific articles during the years over which it has been &#13;
      generated. An example is provided by Ishidate et al. (4).&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</endpoint_protocol>
<endpoint_data_quality chapter="3.7" help="" name="Endpoint data quality and variability">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      The test data used in this model were taken from a single source, the &#13;
      Data Book of Chromosomal Aberration Test In Vitro [ref 2, sect 9.2]. &#13;
      This book is written in Japanese, but all tables are in English and the &#13;
      authors were provided with English translations for everything except &#13;
      the Introduction. The Introduction is identical to that used in the &#13;
      previous version of the book, published in English by Dr. Motoi Ishidate &#13;
      [ref 3, sect 9.2], which was also available to the authors.&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Test results for a total of 901 substances are presented in the Data &#13;
      Book [ref 2, sect 9.2]. The chemicals were chosen for a variety of &#13;
      reasons, including use in foods. A number fall into the class commonly &#13;
      referred to as UVCB&amp;#8217;s, or chemicals that cannot be represented by a &#13;
      complete structure diagram and specific molecular formula. These were &#13;
      excluded for the obvious reason that it is impossible to model a &#13;
      chemical for which a structure is not available. However, it was found &#13;
      that this is not always a totally unambiguous process, so the authors &#13;
      made the best judgement they could. Inorganic chemicals were also &#13;
      excluded, as the modeling platform used by the authors cannot deal with &#13;
      them. A very small number of chemicals were excluded because the true &#13;
      identity was not clear (inconsistencies between chemical name, CAS &#13;
      number and structure/molecular weight that we were unable to resolve). A &#13;
      few stereo-isomers with conflicting results were also removed as they &#13;
      cannot be distinguished by SMILES notation (a computer code for 2D &#13;
      structures).&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      A toxicological decision was made to include chemicals as being positive &#13;
      if they were active in inducing either aberrations or polyploidy. While &#13;
      the current test guideline does not specify testing for a length of &#13;
      time, which would allow polyploidy to be assessed, much of the CHL data &#13;
      does and the information was felt to be too valuable to lose (18 &#13;
      chemicals). Chemicals were also retained even if the test had not been &#13;
      performed both in the presence and absence of metabolic activation.&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Beyond this, the judgement of the authors was used in their &#13;
      interpretation of the final test result. This included dropping 16 of 18 &#13;
      chemicals that the authors considered inconclusive in repeat tests (two &#13;
      were kept because while they were inconclusive for polyploidy, they were &#13;
      clearly positive for structural aberrations).&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Seventy-eight chemicals were excluded because the authors considered &#13;
      them False Positive (only active at dose of more than 10 mM where &#13;
      effects could be due to osmotic pressure).&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      As the modeling system was not able to handle salts (e.g. sodium salts, &#13;
      hydrochlorides), further interpretation was necessary. In the majority &#13;
      of cases there was no conflict with regard to results of testing ionised &#13;
      or non-ionised forms. However, in certain cases there were. The authors &#13;
      decided that for some simple organic acids that were active but where &#13;
      the salt was clearly inactive, to consider these as being inactive in &#13;
      accordance with the advice given in the OECD Guidelines and Morita et &#13;
      al. (5), that particularly low pH may lead to false positive &#13;
      predictions. It is not known if this decision is right or wrong in &#13;
      relation to use of results of this in vitro system for predicting in &#13;
      vivo effects, but it will clearly affect the performance of the model.&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      A few decisions have been done on a basis of additional data from the &#13;
      literature: vitamin B2 (Riboflavin, CAS 83-88-5) tested positive in &#13;
      insoluble form, but was negative in soluble form. The negative result &#13;
      was retained, as the mechanism for the insoluble compound appears to be &#13;
      physical ) [ref 6, sect 9.2] After some consideration, saccharin (CAS &#13;
      81-07-2) and EDTA (CAS 60-00-4) were entered as negatives, in agreement &#13;
      with Ashby et al. [ref 7, sect 9.2], even though there was conflicting &#13;
      information for some of the salts.&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Finally, about 40 chemicals having only equivocal results were excluded. &#13;
      This is also an arbitrary decision, but it was felt that equivocal &#13;
      results were not likely to lead to a better training set.&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Thus, a total of 513 chemicals remained. Their identities and SMILES &#13;
      notations are available in Training_set.doc. There were 263 positive and &#13;
      250 negative substances in the training set, giving the nearly 50:50 &#13;
      split considered ideal for modeling purposes.&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      For external validation, data generated over a six-year period &#13;
      (1991-1996) was used for chromosomal aberration testing of high &#13;
      production volume (HPV) industrial chemicals that had been conducted &#13;
      using Chinese hamster lung (CHL/IU) cells according to the OECD HPV &#13;
      testing program and the national program in Japan [Kusakabe et al., ref &#13;
      8, sect 9.2].&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Of a total of 98 substances, two were removed in the authors&amp;#8217; analyses: &#13;
      dicyclopentadiene (CAS 77-73-6), because it was already in the training &#13;
      set, and Pigment Green No. 7 (CAS 14832-145), a copper complex that &#13;
      cannot be modeled in the selected system. The 98 chemicals are available &#13;
      in Validation_set.doc. On further examination of the data set, it was &#13;
      noticed that one substance (4-(1-Methylpropyl)phenol, CAS 99-71-8) was &#13;
      actually a false positive (only active at very high concentration, and &#13;
      ultimately judged inactive following an in vitro micronucleus test). &#13;
      Eight additional chemicals were identified where the chromosomal &#13;
      aberrations are induced under non-physiological culture conditions &#13;
      (pH&amp;lt;6), which could be kept in mind when using the data.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</endpoint_data_quality>
</QSAR_Endpoint>
<QSAR_Algorithm chapter="4" help="" name="Defining the algorithm - OECD Principle 2">
<algorithm_type chapter="4.1" help="" name="Type of model">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Neural network&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</algorithm_type>
<algorithm_explicit chapter="4.2" help="" name="Explicit algorithm">
<algorithm_ref idref="algorithms_catalog_1" catalog="algorithms_catalog"/>
<equation>&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      The algorithm is based on neural network predictor with structure &#13;
      9-9-8-1. Available as snn file.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</equation>
</algorithm_explicit>
<algorithms_descriptors chapter="4.3" help="" name="Descriptors in the model">
      
      




























































































<descriptor_ref idref="descriptors_catalog_13" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_14" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_15" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_16" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_17" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_18" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_19" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_20" catalog="descriptors_catalog"/>
<descriptor_ref idref="descriptors_catalog_27" catalog="descriptors_catalog"/>
</algorithms_descriptors>
<descriptors_selection chapter="4.4" help="" name="Descriptor selection">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Initial pool of ~1000 descriptors. Stepwise descriptor (as forward &#13;
      selection) selection based on a set of statistical selection rules as F &#13;
      statistic and p probability of F distribution. The first highest F (low &#13;
      p) descriptors (9) were selected from the whole (~1075) descriptors. &#13;
      These 9 descriptors were used as inputs to the network. Twelve networks &#13;
      with different structures were tested in order to find the best ANN with &#13;
      lowest RMS (root-mean-squared error) and highest correct predictions &#13;
      (for training, selection and test sets). Then 1998 epochs were used to &#13;
      train the final network with architecture depicted in 4.2. Optimization &#13;
      of the weights was performed with Levenberg-Marquardt algorithm encoded &#13;
      in the backpropagation scheme using linear and hyperbolic activation &#13;
      functions. The cost function was Entropy function.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</descriptors_selection>
<descriptors_generation chapter="4.5" help="" name="Algorithm and descriptor generation">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      All descriptors were generated using QSARModel on structure optimized by &#13;
      AM1 semiempirical quantum mechanical model.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</descriptors_generation>
<descriptors_generation_software chapter="4.6" help="" name="Software name and version for descriptor generation" options="">
				
      









<software_ref idref="software_catalog_2" catalog="software_catalog"/>
</descriptors_generation_software>
<descriptors_chemicals_ratio chapter="4.7" help="" name="Chemicals/Descriptors ratio">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      66 (501 chemicals / 9 descriptors)&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</descriptors_chemicals_ratio>
</QSAR_Algorithm>
<QSAR_Applicability_domain chapter="5" help="" name="Defining the applicability domain - OECD Principle 3">
<app_domain_description chapter="5.1" help="" name="Description of the applicability domain of the model">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Applicability domain based on training set and by descriptor value range &#13;
      (between min and max values):&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      The model is suitable for compounds (including ethers, esters, amides, &#13;
      halides, aromatic, aliphatic functional groups etc) that have the &#13;
      descriptors in the following range augmented with the confidence in 5.2:&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Desc ID&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      See 4.3 1 2 3 4 5 6 7 8 9&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Min 0.000000 0.000000 1.25747 0.971429 0.00000 -10.0872 0.00000 -228.998 &#13;
      0.590701&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Max 0.237228 0.978325 14.61591 2.900000 25.56523 0.0000 67.00000 791.387 &#13;
      0.930916&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</app_domain_description>
<app_domain_method chapter="5.2" help="" name="Method used to assess the applicability domain">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Presence of functional groups in structures&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Range of descriptor values in training set with &amp;#177;30% confidence&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Descriptor values must fall between maximal and minimal descriptor &#13;
      values (see5.1) of training set &amp;#177;30%.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</app_domain_method>
<app_domain_software chapter="5.3" help="" name="Software name and version for applicability domain assessment">

      









<software_ref idref="software_catalog_3" catalog="software_catalog"/>
</app_domain_software>
<applicability_limits chapter="5.4" help="" name="Limits of applicability">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      See 5.2&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</applicability_limits>
</QSAR_Applicability_domain>
<QSAR_Robustness chapter="6" help="" name="Internal validation - OECD Principle 4">
<training_set_availability answer="Yes" chapter="6.1" help="" name="Availability of the training set"/>
<training_set_data cas="Yes" chapter="6.2" chemname="Yes" formula="No" help="" inchi="No" mol="Yes" name="Available information for the training set" smiles="No"/>
<training_set_descriptors answer="All" chapter="6.3" help="" name="Data for each descriptor variable for the training set"/>
<dependent_var_availability answer="All" chapter="6.4" help="" name="Data for the dependent variable for the training set"/>
<other_info chapter="6.5" help="" name="Other information about the training set">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Data points: 501 (initial set was refined: salts and equivocal &#13;
      experimental values were removed). See also 6.7&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</other_info>
<preprocessing chapter="6.6" help="" name="Pre-processing of data before modelling">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Standardization and normalization of the inputs by taking into account &#13;
      the mean and standard deviation. Some of the structures which were not &#13;
      able to be properly optimized were discarded from the original set.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</preprocessing>
<goodness_of_fit chapter="6.7" help="" name="Statistics for goodness-of-fit">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Training negatives; Training positives; Selection negatives; Selection &#13;
      positives; Test negatives; Test positives&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Total 242.0000 259.0000 19.00000 31.00000 23.00000 27.00000&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Correct 233.0000 252.0000 13.00000 22.00000 13.00000 18.00000&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Wrong 9.0000 7.0000 6.00000 9.00000 10.00000 9.00000&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Correct (%) 96.2810 97.2973 68.42105 70.96774 56.52174 66.66667&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Wrong (%) 3.7190 2.7027 31.57895 29.03226 43.47826 33.33333&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</goodness_of_fit>
<loo chapter="6.8" help="" name="Robustness - Statistics obtained by leave-one-out cross-validation">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</loo>
<lmo chapter="6.9" help="" name="Robustness - Statistics obtained by leave-many-out cross-validation">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      See 6.7&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</lmo>
<yscrambling chapter="6.10" help="" name="Robustness - Statistics obtained by Y-scrambling"/>
<bootstrap chapter="6.11" help="" name="Robustness - Statistics obtained by bootstrap">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
&#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      &#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</bootstrap>
<other_statistics chapter="6.12" help="" name="Robustness - Statistics obtained by other methods">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      See 6.7 for classification statistics&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</other_statistics>
</QSAR_Robustness>
<QSAR_Predictivity chapter="7" help="" name="External validation - OECD Principle 4">
<validation_set_availability answer="Yes" chapter="7.1" help="" name="Availability of the external validation set"/>
<validation_set_data cas="Yes" chapter="7.2" chemname="Yes" formula="No" help="" inchi="No" mol="Yes" name="Available information for the external validation set" smiles="No"/>
<validation_set_descriptors answer="All" chapter="7.3" help="" name="Data for each descriptor variable for the external validation set"/>
<validation_dependent_var_availability answer="All" chapter="7.4" help="" name="Data for the dependent variable for the external validation set"/>
<validation_other_info chapter="7.5" help="" name="Other information about the external validation set">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      The method used two randonly selected validation sets &amp;#8211; selection (50) &#13;
      and test (50; 23 positive and 27 negative) (see 7.9 for description)&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</validation_other_info>
<experimental_design chapter="7.6" help="" name="Experimental design of test set">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Randomly selected 50 (for selection set) and 50 (test set) data points&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</experimental_design>
<validation_predictivity chapter="7.7" help="" name="Predictivity - Statistics obtained by external validation">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      NEG POS&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Total 23.00000 27.00000&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Correct 13.00000 18.00000&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Wrong 10.00000 9.00000&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Correct (%) 56.52174 66.66667&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Wrong (%) 43.47826 33.33333&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</validation_predictivity>
<validation_assessment chapter="7.8" help="" name="Predictivity - Assessment of the external validation set">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      The descriptors for the test set are in the limit of applicability, see &#13;
      6.7 and 6.12&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</validation_assessment>
<validation_comments chapter="7.9" help="" name="Comments on the external validation of the model">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Overall predictions for the selection set (used to stop the ANN training &#13;
      and not to over fit it) and the test set (used to test the external &#13;
      prediction of the net after training) are given in the classification &#13;
      matrix, see 6.7.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</validation_comments>
</QSAR_Predictivity>
<QSAR_Interpretation chapter="8" help="" name="Providing a mechanistic interpretation - OECD Principle 5">
<mechanistic_basis chapter="8.1" help="" name="Mechanistic basis of the model">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      The mechanistic picture is difficult to analyze because of the nature of &#13;
      the ANN models. According to the descriptors used as inputs to the &#13;
      network, it can be concluded that the property is mainly related to the &#13;
      charged surfaces that may play important role in defining the property &#13;
      values. For instance, the most significant descriptor (according to F) &#13;
      Square root of Partial Surface Area of H atoms leads to positive index &#13;
      of the chromosomal aberration when its values are lower.&#13;
    &lt;/p&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      In addition to the charged surfaces, hydrogen abilities of the compounds &#13;
      are also important in conjunction with the energy terms related to &#13;
      HOMO-LUMO and exchange interactions for the C-C bond.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</mechanistic_basis>
<mechanistic_basis_comments chapter="8.2" help="" name="A priori or a posteriori mechanistic interpretation">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    A posteriori relation between the CA and the charge distribution over &#13;
    certain areas in the molecule was observed  [ref 7, sect 9.2].&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</mechanistic_basis_comments>
<mechanistic_basis_info chapter="8.3" help="" name="Other information about the mechanistic interpretation">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</mechanistic_basis_info>
</QSAR_Interpretation>
<QSAR_Miscelaneous chapter="9" help="" name="Miscellaneous information">
<comments chapter="9.1" help="" name="Comments">&lt;html&gt;&#13;
  &lt;head&gt;&#13;
    &#13;
  &lt;/head&gt;&#13;
  &lt;body&gt;&#13;
    &lt;p style="margin-top: 0"&gt;&#13;
      Supporting information for: Training set(s), Selection set(s), Test &#13;
      set(s), 9-9-8-1.snn file (binary) includes the ANN model, in order to be &#13;
      used the user must have Statistica 7 or higher with ANN modules.&#13;
    &lt;/p&gt;&#13;
  &lt;/body&gt;&#13;
&lt;/html&gt;&#13;
</comments>
<bibliography chapter="9.2" help="" name="Bibliography">
				
      

















































































<publication_ref idref="publications_catalog_19" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_20" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_21" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_22" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_23" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_24" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_25" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_26" number="" catalog="publications_catalog"/>
<publication_ref idref="publications_catalog_16" number="" catalog="publications_catalog"/>
</bibliography>
<attachments chapter="9.3" name="Supporting information" help="">
<attachment_training_data>
<molecules description="Chromosomal_Aberration_trainingset_501" filetype="sdf" url="http://qsardb.jrc.ec.europa.eu:80/qmrf/download_attachment.jsp?name=qmrf311_Chromosomal_Aberration_trainingset_501.sdf"/>
</attachment_training_data>
<attachment_validation_data>
<molecules description="Chromosomal_Aberration_testset_50" filetype="sdf" url="http://qsardb.jrc.ec.europa.eu:80/qmrf/download_attachment.jsp?name=qmrf311_Chromosomal_Aberration_testset_50.sdf"/>
<molecules description="Chromosomal_Aberration_selectionset_50" filetype="sdf" url="http://qsardb.jrc.ec.europa.eu:80/qmrf/download_attachment.jsp?name=qmrf311_Chromosomal_Aberration_selectionset_50.sdf"/>
</attachment_validation_data>
<attachment_documents>
<document description="9-9-8-1" filetype="snn" url="http://qsardb.jrc.ec.europa.eu:80/qmrf/download_attachment.jsp?name=qmrf311_9-9-8-1.snn"/>
</attachment_documents>
</attachments>
</QSAR_Miscelaneous>
<QMRF_Summary chapter="10" help="" name="Summary (JRC Inventory)">
<QMRF_number chapter="10.1" help="" name="QMRF number">Q17-10-1-311</QMRF_number>
<date_publication chapter="10.2" help="" name="Publication date">2011/06/06</date_publication>
<keywords chapter="10.3" name="Keywords" help="">Molcode, artificial neural network, in vitro chromosome aberration, Chinese Hamster Lung cell</keywords>
<summary_comments chapter="10.4" name="Comments" help=""/>
</QMRF_Summary>
</QMRF_chapters>
<Catalogs>
<software_catalog>
<software contact="Turu 2, Tartu, 51014, Estonia" description="The software was used to calculate the molecular descriptors" id="firstsoftware" name="QSARModel 3.3.8" number="" url="http://www.molcode.com"/>
<software contact="" description="The  descriptors are based on structure optimized by mopac 6 with key words AM1 BOND, PRECISE, GNORM=0.01, PI, POLAR, ENPART, VECTOR" id="software_catalog_2" name="QSARModel 3.3.8 " number="" url="http://www.molcode.com"/>
<software contact="" description="" id="software_catalog_3" name="QSARModel 3.3.8 " number="" url="http://www.molcode.com"/>
<software contact="StatSoft Ltd." description="The software was used to build the ANN models" id="software_catalog_4" name="Statistica 7" number="" url="statsoft.com"/>
</software_catalog>
<algorithms_catalog>
<algorithm definition="Neural network" description="Standard Backpropagation Neural Network (Multilayer Perceptron) classification" id="algorithms_catalog_1" publication_ref=""/>
</algorithms_catalog>
<descriptors_catalog>
<descriptor description="" id="descriptors_catalog_13" name="Square root of Partial Surface Area of H atoms" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_14" name="Partial Surface Area of H atoms" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_15" name="HOMO - LUMO energy gap (AM1)" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_16" name="No. of occupied electronic levels (AM1) / # atoms" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_17" name="WFOSA Atomic charge (Zefirov) weighted FOSA" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_18" name="Highest exchange energy (AM1) for C - C bonds" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_19" name="Number of H atoms" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_20" name="DPSA1 Difference in CPSAs (PPSA1-PNSA1) (AM1)" publication_ref="" units=""/>
<descriptor description="" id="descriptors_catalog_27" name="Max Sigma-Sigma bond order (AM1)" publication_ref="" units=""/>
</descriptors_catalog>
<endpoints_catalog>
<endpoint group="4.Human health effects" id="endpoints_catalog_4" name="4.10.Mutagenicity " subgroup=""/>
</endpoints_catalog>
<publications_catalog>
<publication id="publications_catalog_19" title="OECD (1997). OECD Guidelines for the Testing of Chemicals No. 473: Genetic Toxicology: In Vitro Mammalian Cytogenetic Test. Organisation for Economic Cooperation and Development, Paris, France." url=""/>
<publication id="publications_catalog_20" title="Sofuni T (1998). Data Book of Chromosomal Aberration Test In Vitro, Revised Edition.. Life-Science Information Center, Tokyo, Japan." url=""/>
<publication id="publications_catalog_21" title="Ishidate M  (1988). Data Book of Chromosomal Aberration Test In Vitro, Revised Edition. Elsevier, Amsterdam, New York, Oxford." url=""/>
<publication id="publications_catalog_22" title="Ishidate M, Haronois MC &amp; Sofuni T (1988). A Comparative analysis of data on the clastogenicity of 951 chemicals tested in mammalian cell cultures. Mutation Research 195, 151-213." url=""/>
<publication id="publications_catalog_23" title="Morita T, Nagaki T, Fukuda I &amp; Okumura K (1992). Clastogenicity of low pH to various cultures mammalian cells. Mutation Research 268, 297-305." url=""/>
<publication id="publications_catalog_24" title="Kawaguchi Y, Hayashi H, Sato M &amp; Shindo Y (1997). Needle crystals of Vitamin B2 induce polyploidy in Chinese hamster lung (CHL/IU) cells. Mutation Research 373, 1-7." url=""/>
<publication id="publications_catalog_25" title="Ashby J &amp; Ishidate M Jr (1986). Clastogenicity in vitro of the Na, K, Ca and Mg. Salts of Saccharin; and of magnesium chloride; consideration of significance. Mutation Research 163, 63-73." url=""/>
<publication id="publications_catalog_26" title="Kusakabe H, Ymakage K, Wakuri S, Sasaki K, Nakagawa Y, Watanabe M, Hayashi M, Sufuni T, Ono H &amp; Tanaka N (2002). Relevance of chemical structure and cytotoxicity to the induction of chromosome aberrations based on testing of 98 high production volume industrial chemicals. Mutation Research 517, 187-198.  " url=""/>
<publication id="publications_catalog_14" title="Karelson M, Karelson G, Tamm T, Tulp I, Jänes J, Tämm K, Lomaka A, Savchenko D &amp; Dobchev D (2009). QSAR study of pharmacological permeabilities. Arkivoc 2, 218-238." url=""/>
<publication id="publications_catalog_16" title="Niemelä J &amp; Wedeby E (2004). Evaluation of the setubal principles for establishing the status of development and validation of (Q)SARs, Annex 4, A “global” MULTI-CASE model for in vitro chromosomal aberrations in mammalian cells. pp 113-133 in: OECD Environment Health and Safety Publications, Series on Testing and Assessment, no 49, Report from the expert group on (Quantitative) Structure-Activity Relationships ((Q)SARs) on the principles for the validation of (Q)SARs. " url=""/>
</publications_catalog>
<authors_catalog>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="firstauthor" name="Dimitar Dobchev" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd" contact="Turu 2, Tartu, 51014, Estonia" email="models@molcode.com" id="modelauthor" name="Molcode model development team " number="" url="www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_3" name="Tarmo Tamm" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_4" name="Gunnar Karelson" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_5" name="Indrek Tulp" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_6" name="Dana Martin" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_7" name="Kaido Tämm" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_8" name="Deniss Savchenko" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_9" name="Jaak Jänes" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_10" name="Eneli Härk" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_11" name="Andres Kreegipuu" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_12" name="Mati Karelson" number="" url="http://www.molcode.com"/>
<author affiliation="Molcode Ltd. " contact="Turu 2, Tartu, 51014, Estonia " email="models@molcode.com" id="authors_catalog_13" name="Molcode model development team" number="" url="http://www.molcode.com"/>
</authors_catalog>
</Catalogs>
</QMRF>
<!--
	include_attachments
	Replaces <attachments/> in param.xml with attachments from database.
	expects:
	param.id - qmrf_documents.documents.idqmrf
	param.xml - the xml to be transformed
-->
<!--
	include_attachments
	Replaces <attachments/> in param.xml with attachments from database.
	expects:
	param.id - qmrf_documents.documents.idqmrf
	param.xml - the xml to be transformed
-->
<!--
	include_attachments
	Replaces <attachments/> in param.xml with attachments from database.
	expects:
	param.id - qmrf_documents.documents.idqmrf
	param.xml - the xml to be transformed
-->
<!--
	include_attachments
	Replaces <attachments/> in param.xml with attachments from database.
	expects:
	param.id - qmrf_documents.documents.idqmrf
	param.xml - the xml to be transformed
-->