Home > src > geckomat > change_model > getComplexData.m

getComplexData

PURPOSE ^

getComplexData

SYNOPSIS ^

function complexInfo = getComplexData(taxonomicID, modelAdapter)

DESCRIPTION ^

 getComplexData
   Download curated complex stochiometries from the EMBL-EBI Complex
   Portal database. Writes data/ComplexPortal.json in the obj.params.path
   specified in the model adapter.

 Input:
   taxonomicID     taxonomic identifier for which complex data should be
                   downloaded. Only taxonomic identifiers allowed are
                   those included on Complex Portal:
                   https://www.ebi.ac.uk/complexportal/complex/organisms
                   If empty, no complex data is downloaded, if 0 (zero),
                   complex data from all organisms in Complex Portal is
                   downloaded.
   modelAdapter    a loaded model adapter (Optional, will otherwise use the
                   default model adapter).
 Output:
   complexInfo     structure with data downloaded from Complex Portal.
                   Contains the following fields:
                   - complexID: id of the complex on Complex Portal
                   - name: name of the complex on Complex Portal
                   - species: organism containing the complex
                   - geneName: names of the genes in the complex
                   - protID: Uniprot IDs of the proteins in the complex
                   - stochiometry: the complex stochiometry given in the same
                     order as the genes and proteins in geneName and protID
                   - defined:  0 if Complex Portal has no defined stochiometry
                               1 if defined subunit stochiometry is given
                               2 if complex consists of sub-complexes, whose
                                 subunit stochiometries are given
 Usage
   complexInfo = getComplexData(organism, modelAdapter);

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function complexInfo = getComplexData(taxonomicID, modelAdapter)
0002 % getComplexData
0003 %   Download curated complex stochiometries from the EMBL-EBI Complex
0004 %   Portal database. Writes data/ComplexPortal.json in the obj.params.path
0005 %   specified in the model adapter.
0006 %
0007 % Input:
0008 %   taxonomicID     taxonomic identifier for which complex data should be
0009 %                   downloaded. Only taxonomic identifiers allowed are
0010 %                   those included on Complex Portal:
0011 %                   https://www.ebi.ac.uk/complexportal/complex/organisms
0012 %                   If empty, no complex data is downloaded, if 0 (zero),
0013 %                   complex data from all organisms in Complex Portal is
0014 %                   downloaded.
0015 %   modelAdapter    a loaded model adapter (Optional, will otherwise use the
0016 %                   default model adapter).
0017 % Output:
0018 %   complexInfo     structure with data downloaded from Complex Portal.
0019 %                   Contains the following fields:
0020 %                   - complexID: id of the complex on Complex Portal
0021 %                   - name: name of the complex on Complex Portal
0022 %                   - species: organism containing the complex
0023 %                   - geneName: names of the genes in the complex
0024 %                   - protID: Uniprot IDs of the proteins in the complex
0025 %                   - stochiometry: the complex stochiometry given in the same
0026 %                     order as the genes and proteins in geneName and protID
0027 %                   - defined:  0 if Complex Portal has no defined stochiometry
0028 %                               1 if defined subunit stochiometry is given
0029 %                               2 if complex consists of sub-complexes, whose
0030 %                                 subunit stochiometries are given
0031 % Usage
0032 %   complexInfo = getComplexData(organism, modelAdapter);
0033 
0034 if nargin < 2 || isempty(modelAdapter)
0035     modelAdapter = ModelAdapterManager.getDefault();
0036     if isempty(modelAdapter)
0037         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
0038     end
0039 end
0040 
0041 if nargin<1 || isempty(taxonomicID)
0042     taxonomicID = modelAdapter.getParameters().complex.taxonomicID;
0043 end
0044 
0045 params = modelAdapter.params;
0046 if isempty(taxonomicID) % Can be empty when gathered from model adapter
0047     printOrange('WARNING: No taxonomicID specified.');
0048     return
0049 elseif taxonomicID == 0
0050     taxonomicID = [];
0051 end
0052 
0053 webOptions = weboptions('Timeout', 30);
0054 try
0055     url1 = 'https://www.ebi.ac.uk/intact/complex-ws/search/*';
0056     if ~isempty(taxonomicID)
0057         url1 = [url1 '?facets=species&filters=species:("' num2str(taxonomicID) '")'];
0058     end
0059     data = webread(url1,webOptions);
0060 catch ME
0061     if (strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError'))
0062         error('Cannot connect to the Complex Portal, perhaps the server is not responding');
0063     end
0064 end
0065 if data.size == 0
0066     error('No data could be gathered from Complex Portal for the specified taxonomicID.')
0067 end
0068 complexData = cell(data.size,7);
0069 
0070 progressbar('Retrieving information for complexes');
0071 for i = 1:data.size
0072     progressbar(i/data.size) % Update progress bar
0073     url2 = 'https://www.ebi.ac.uk/intact/complex-ws/complex/';
0074     complexID = data.elements(i,1).complexAC;
0075     try
0076         temp = webread([url2 complexID],webOptions);
0077     catch ME
0078         if (strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError'))
0079             printOrange(['WARNING: Cannot retrieve the information for ' complexID '.\n']);
0080         end
0081         temp = [];
0082     end
0083 
0084     if ~isempty(temp)
0085         complexData(i,1) = {temp.complexAc};
0086         complexData(i,2) = {temp.name};
0087         complexData(i,3) = {temp.species};
0088 
0089         idxIntType = find(strcmpi({temp.participants.interactorType}, 'protein'));
0090 
0091         % Some complex reported are 'stable complex', then, save the id
0092         % complex and but set the genes and protein to a empty string.
0093         if numel(idxIntType) > 0
0094             complexData(i,4) = {{temp.participants(idxIntType).name}};
0095             complexData(i,5) = {{temp.participants(idxIntType).identifier}};
0096         else
0097             complexData(i,4) = {{temp.participants.name}};
0098             complexData(i,5) = {{temp.participants.identifier}};
0099         end
0100 
0101         % Portal complex has two stochiometry values, a minimum and
0102         % maximum value. Only minimum will be store. In some cases,
0103         % some complex does not have stochiometry coefficient, then, it
0104         % will be fill with zeros
0105         if ~cellfun('isempty',{temp.participants.stochiometry})
0106             % For some reason, if there is only one protein in the complex
0107             % split function does nor return a cell nx2, instead is 2x1,
0108             % then assign an incorrect stochiometry
0109             switch numel(idxIntType)
0110                 case 0 % Contains complexes
0111                     stochiometry = split({temp.participants.stochiometry}.', ',');
0112                     complexData(i,7) = {2};
0113                 case 1 % Contains one protein
0114                     stochiometry = split({temp.participants(idxIntType).stochiometry}.', ',').';
0115                     complexData(i,7) = {1};
0116                 otherwise
0117                     stochiometry = split({temp.participants(idxIntType).stochiometry}.', ',');
0118                     complexData(i,7) = {1};
0119             end
0120             values = str2double(erase(stochiometry(:,1),"minValue: ")).';
0121             complexData(i,6) = {values};
0122         else
0123             complexData(i,6) = {repelem(0,numel(complexData{i,4}))};
0124             complexData(i,7) = {0};
0125         end
0126     end
0127 end
0128 fprintf('\n');
0129 
0130 % Expand complexes of complexes
0131 complexComplex = find([complexData{:,7}]==2);
0132 if ~isempty(complexComplex)
0133     for i=1:numel(complexComplex)
0134         subComplex    = complexData{complexComplex(i),5};
0135         subComplexS   = complexData{complexComplex(i),6};
0136         subComplexIdx = find(ismember(complexData(:,1),subComplex));
0137         allGenes = horzcat(complexData{subComplexIdx,4});
0138         allProts = horzcat(complexData{subComplexIdx,5});
0139         allStoch = {complexData{subComplexIdx,6}};
0140         for j=1:numel(subComplex)
0141             allStoch{j}=allStoch{j}*subComplexS(j);
0142         end
0143         allStoch = horzcat(allStoch{:});
0144         [allGenes,ia,ic] = unique(allGenes,'stable');
0145         allProts = allProts(ia);
0146         allStoch = splitapply(@sum, allStoch', ic);
0147         complexData{complexComplex(i),4} = allGenes;
0148         complexData{complexComplex(i),5} = allProts;
0149         complexData{complexComplex(i),6} = allStoch;
0150     end
0151 end
0152 
0153 rowHeadings = {'complexID','name','species','geneName','protID','stochiometry','defined'};
0154 
0155 complexInfo = cell2struct(complexData, rowHeadings, 2);
0156 
0157 % Convert to a JSON file
0158 jsontxt = jsonencode(cell2table(complexData, 'VariableNames', rowHeadings));
0159 % Write to a JSON file
0160 fid = fopen(fullfile(params.path,'data','ComplexPortal.json'), 'w');
0161 fprintf(fid, '%s', jsontxt);
0162 fclose(fid);
0163 fprintf('Model-specific ComplexPortal database stored at %s\n',fullfile(params.path,'data','ComplexPortal.json'));
0164 end

Generated by m2html © 2005